In [5]:
# Import dependencies
import pandas as pd
from pathlib import Path 
from datetime import datetime
import sqlite3

In [6]:
# Import csv file and create df
cleaned_data = Path('Resources/Crime_Data_from_2020_to_Present.csv')
cleaned_data_df = pd.read_csv(cleaned_data)
cleaned_data_df.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON
0,190326475,03/01/2020 12:00:00 AM,03/01/2020 12:00:00 AM,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,AA,Adult Arrest,510.0,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506
1,200106753,02/09/2020 12:00:00 AM,02/08/2020 12:00:00 AM,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,IC,Invest Cont,330.0,998.0,,,1000 S FLOWER ST,,34.0444,-118.2628
2,200320258,11/11/2020 12:00:00 AM,11/04/2020 12:00:00 AM,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,IC,Invest Cont,480.0,,,,1400 W 37TH ST,,34.021,-118.3002
3,200907217,05/10/2023 12:00:00 AM,03/10/2020 12:00:00 AM,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,IC,Invest Cont,343.0,,,,14000 RIVERSIDE DR,,34.1576,-118.4387
4,220614831,08/18/2022 12:00:00 AM,08/17/2020 12:00:00 AM,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,IC,Invest Cont,354.0,,,,1900 TRANSIENT,,34.0944,-118.3277


In [7]:
# Drop columns not needed
cleaned_data_df.drop(columns=['Date Rptd', 'TIME OCC','AREA', 'AREA NAME', 
                              'Rpt Dist No', 'Part 1-2', 'Crm Cd',
                              'Mocodes', 'Vict Age','Status','Vict Sex','Vict Age','Premis Cd','Premis Desc','Weapon Used Cd','Weapon Used Cd','Weapon Desc','Status','Status Desc', 'Crm Cd 1', 
                              'Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4', 'LOCATION', 
                              'Cross Street'], inplace=True)

# Mapping dictionary to replace "descent" column
descent_mapping = {
    'A': 'Other Asian',
    'B': 'Black',
    'C': 'Chinese',
    'D': 'Cambodian',
    'F': 'Filipino',
    'G': 'Guamanian',
    'H': 'Hispanic/Latin/Mexican',
    'I': 'American Indian/Alaskan Native',
    'J': 'Japanese',
    'K': 'Korean',
    'L': 'Laotian',
    'O': 'Other',
    'P': 'Pacific Islander',
    'S': 'Samoan',
    'U': 'Hawaiian',
    'V': 'Vietnamese',
    'W': 'White',
    'X': 'Unknown',
    'Z': 'Asian Indian'
}
cleaned_data_df.loc[:, 'Vict Descent'] = cleaned_data_df['Vict Descent'].map(descent_mapping)

# Split date and time from "DATE OCC" column
cleaned_data_df[['Date', 'Time']] = cleaned_data_df['DATE OCC'].str.split(' ', n=1, expand=True)

# Convert 'Date' to datetime using the correct format
cleaned_data_df['Date'] = pd.to_datetime(cleaned_data_df['Date'], format='%m/%d/%Y')

# Extract the year and create a new column 'Year'
cleaned_data_df['Year'] = cleaned_data_df['Date'].dt.year

# Drop columns not needed
cleaned_data_df.drop(columns=['DATE OCC', 'Time', 'Date'], inplace=True)

# Display the updated DataFrame
cleaned_data_df.head()

Unnamed: 0,DR_NO,Crm Cd Desc,Vict Descent,LAT,LON,Year
0,190326475,VEHICLE - STOLEN,Other,34.0375,-118.3506,2020
1,200106753,BURGLARY FROM VEHICLE,Other,34.0444,-118.2628,2020
2,200320258,BIKE - STOLEN,Unknown,34.021,-118.3002,2020
3,200907217,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),Other,34.1576,-118.4387,2020
4,220614831,THEFT OF IDENTITY,Hispanic/Latin/Mexican,34.0944,-118.3277,2020


In [8]:
# Create a sqlite file
conn = sqlite3.connect('LA_crime.sqlite')
cleaned_data_df.to_sql('LA_crime_data', conn, if_exists='replace', index=False)
conn.close()