In [25]:
# Install the required packages that are not included in the standard library.
!pip install numpy
!pip install pypdf



In [26]:
# Import the required libraries.
import pandas as pd
import numpy as np
from pathlib import Path

# Used to read the contents of a PDF file.
from pypdf import PdfReader

# Used to extract the contents of a zip file.
from zipfile import ZipFile

In [27]:
# We are using the Los Angeles Crime Dataset (2020 -- Present) to analyze the crime data in Los Angeles.
# Source: https://www.kaggle.com/datasets/nathaniellybrand/los-angeles-crime-dataset-2020-present/data

# Check if the CSV file is present in the resources folder. Unzip the file if it is not present.
zip_file_path = Path("resources/LA_CRIMES_2020to2024.zip")
csv_file_path = Path("resources/Crime_Data_from_2020_to_Present.csv")

if not csv_file_path.is_file():
    with ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extract("Crime_Data_from_2020_to_Present.csv", path="resources")

In [28]:
# Read the Los Angeles Crime Dataset (2020 -- Present) into a DataFrame.
df = pd.read_csv(csv_file_path)

In [29]:
# Add a new column to the DataFrame that provides a true/false value based on
# whether the crime was committed during the coronavirus pandemic stay-at-home order.
# Source: https://www.nbclosangeles.com/news/coronavirus/2020-2021-california-coronavirus-pandemic-timeline-key-events/2334100/

# The stay-at-home order in Los Angeles started on March 19, 2020, and ended on January 25, 2021.
start_date = pd.to_datetime("2020-03-19", format="%Y-%m-%d")
end_date = pd.to_datetime("2021-01-25", format="%Y-%m-%d")

# convert the DATE OCC column to datetime using format YYYY-MM-DD.
df["DATE OCC"] = pd.to_datetime(df["DATE OCC"], format="mixed")

# Add a new column to the DataFrame that indicates whether the crime was committed during the stay-at-home order.
df["COVID SAH"] = np.where((df["DATE OCC"] >= start_date) & (df["DATE OCC"] <= end_date), True, False)

In [40]:
# Classify the crimes as Violent or Property crimes based on the Crime Code.
# Load the PDF file containing crime codes classifications of Violent and Property crimes.
pdf_file_path = Path("resources/UCR-COMPSTAT062618.pdf")
pdf_reader = PdfReader(pdf_file_path)

# Extract the text from the PDF file.
raw_pdf_content = "".join(page.extract_text() for page in pdf_reader.pages)

In [41]:
# Remove all new lines, tabs and extra spaces from the text. Then split the text by spaces.
pdf_content_list = raw_pdf_content.replace("\n", " ").replace("\t", " ").replace("  ", " ").replace(",", " ").split(" ")

# Create a dictionary to store the crime codes and their classifications.
crime_code_classification = {}

# iterate through the pdf_content_list to extract the crime codes and their classifications.
current_classification = None
for word in pdf_content_list:
    word = word.strip()
    if word == "Violent":
        current_classification = "VIOLENT"
    elif word == "Property":
        current_classification = "PROPERTY"
    else:
        # If the word is a crime code, store the current classification for that crime code.
        try:
            word = int(word)
        except ValueError:
            continue
        crime_code_classification[word] = current_classification

print(crime_code_classification)

# Assert known crime codes to their classifications at the known dividers.
assert crime_code_classification[310] == "PROPERTY"
assert crime_code_classification[491] == "PROPERTY"
assert crime_code_classification[110] == "VIOLENT"
assert crime_code_classification[930] == "VIOLENT"
assert crime_code_classification[926] == "VIOLENT"

{110: 'VIOLENT', 113: 'VIOLENT', 121: 'VIOLENT', 122: 'VIOLENT', 815: 'VIOLENT', 820: 'VIOLENT', 821: 'VIOLENT', 210: 'VIOLENT', 220: 'VIOLENT', 230: 'VIOLENT', 231: 'VIOLENT', 235: 'VIOLENT', 236: 'VIOLENT', 250: 'VIOLENT', 251: 'VIOLENT', 761: 'VIOLENT', 926: 'VIOLENT', 435: 'VIOLENT', 436: 'VIOLENT', 437: 'VIOLENT', 622: 'VIOLENT', 623: 'VIOLENT', 624: 'VIOLENT', 625: 'VIOLENT', 626: 'VIOLENT', 627: 'VIOLENT', 647: 'VIOLENT', 763: 'VIOLENT', 928: 'VIOLENT', 930: 'VIOLENT', 310: 'PROPERTY', 320: 'PROPERTY', 510: 'PROPERTY', 520: 'PROPERTY', 433: 'PROPERTY', 330: 'PROPERTY', 331: 'PROPERTY', 410: 'PROPERTY', 420: 'PROPERTY', 421: 'PROPERTY', 350: 'PROPERTY', 351: 'PROPERTY', 352: 'PROPERTY', 353: 'PROPERTY', 450: 'PROPERTY', 451: 'PROPERTY', 452: 'PROPERTY', 453: 'PROPERTY', 341: 'PROPERTY', 343: 'PROPERTY', 345: 'PROPERTY', 440: 'PROPERTY', 441: 'PROPERTY', 442: 'PROPERTY', 443: 'PROPERTY', 444: 'PROPERTY', 445: 'PROPERTY', 470: 'PROPERTY', 471: 'PROPERTY', 472: 'PROPERTY', 473: 'PRO

In [43]:
# Add a new column to the DataFrame that classifies the crime as Violent or Property.
df["CRIME CLASSIFICATION"] = df["Crm Cd"].map(crime_code_classification).fillna("OTHER")


In [44]:
# Preview new columns and value counts.
print(df[["COVID SAH", "CRIME CLASSIFICATION"]].value_counts())
df

COVID SAH  CRIME CLASSIFICATION
False      PROPERTY                259368
           VIOLENT                 175514
           OTHER                   151032
True       PROPERTY                 72437
           VIOLENT                  53351
           OTHER                    41209
Name: count, dtype: int64


Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON,COVID SAH,CRIME CLASSIFICATION
0,10304468,01/08/2020 12:00:00 AM,2020-01-08,2230,3,Southwest,377,2,624,BATTERY - SIMPLE ASSAULT,...,624.0,,,,1100 W 39TH PL,,34.0141,-118.2978,False,VIOLENT
1,190101086,01/02/2020 12:00:00 AM,2020-01-01,330,1,Central,163,2,624,BATTERY - SIMPLE ASSAULT,...,624.0,,,,700 S HILL ST,,34.0459,-118.2545,False,VIOLENT
2,200110444,04/14/2020 12:00:00 AM,2020-02-13,1200,1,Central,155,2,845,SEX OFFENDER REGISTRANT OUT OF COMPLIANCE,...,845.0,,,,200 E 6TH ST,,34.0448,-118.2474,False,OTHER
3,191501505,01/01/2020 12:00:00 AM,2020-01-01,1730,15,N Hollywood,1543,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),...,745.0,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,False,OTHER
4,191921269,01/01/2020 12:00:00 AM,2020-01-01,415,19,Mission,1998,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",...,740.0,,,,14400 TITUS ST,,34.2198,-118.4468,False,OTHER
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
752906,231510379,05/29/2023 12:00:00 AM,2023-05-25,1100,15,N Hollywood,1548,2,662,"BUNCO, GRAND THEFT",...,662.0,,,,5300 DENNY AV,,34.1667,-118.3643,False,OTHER
752907,231604807,01/27/2023 12:00:00 AM,2023-01-26,1800,16,Foothill,1663,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",...,740.0,,,,12500 BRANFORD ST,,34.2466,-118.4054,False,OTHER
752908,231606525,03/22/2023 12:00:00 AM,2023-03-22,1000,16,Foothill,1602,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",...,230.0,,,,12800 FILMORE ST,,34.2790,-118.4116,False,VIOLENT
752909,231210064,04/12/2023 12:00:00 AM,2023-04-12,1630,12,77th Street,1239,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",...,230.0,,,,6100 S VERMONT AV,,33.9841,-118.2915,False,VIOLENT
