In [9]:
import pandas as pd
import plotly.express as px

# Load the data
data = pd.read_csv("Database/final/crime_data_modified_final.csv")

# Filter data for 2020, 2021, and 2022
data_2020 = data[data['year_rept'] == 2020]
data_2021_2022 = data[data['year_rept'].isin([2021, 2022])]

# Group data by crime description and count occurrences
crime_counts_2020 = data_2020['cr_cd_desc'].value_counts()
crime_counts_2021_2022 = data_2021_2022['cr_cd_desc'].value_counts() / 2  # Average counts for 2021 and 2022

# Create DataFrames to facilitate merging
df_2020 = crime_counts_2020.reset_index()
df_2020.columns = ['Crime Description', '2020 Count']

df_2021_2022 = crime_counts_2021_2022.reset_index()
df_2021_2022.columns = ['Crime Description', '2021-2022 Average Count']

# Apply the name replacement using a dictionary
short_names = {
    "THEFT OF IDENTITY": "ID Theft",
    "BATTERY - SIMPLE ASSAULT": "Simple Assault",
    "BURGLARY FROM VEHICLE": "Vehicle Burglary",
    "INTIMATE PARTNER - SIMPLE ASSAULT": "Domestic Assault",
    "VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)": "VANDALISM FELONY",
    "ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT": "DEADLY WEAPON ASSAULT",
    "THEFT PLAIN - PETTY ($950 & UNDER)": "THEFT PLAIN",
    "THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)": "MOTOR VEHICLE THEFT",
    "VANDALISM - MISDEAMEANOR ($399 OR UNDER)": "VANDALISM MISDEAMEANOR"
}

df_2020['Crime Description'] = df_2020['Crime Description'].map(short_names).fillna(df_2020['Crime Description'])
df_2021_2022['Crime Description'] = df_2021_2022['Crime Description'].map(short_names).fillna(df_2021_2022['Crime Description'])

# Merge the DataFrames on Crime Description
merged_data = pd.merge(df_2020, df_2021_2022, on='Crime Description', how='outer').fillna(0)

# Calculate the percentage change
merged_data['Percentage Change'] = ((merged_data['2021-2022 Average Count'] - merged_data['2020 Count']) / merged_data['2020 Count']) * 100

# Filter to top 12 crimes in 2020 for better clarity in visualization
merged_data = merged_data.nlargest(12, '2020 Count')

# Create a bar chart
fig = px.bar(merged_data, x='Crime Description', y='Percentage Change',
             title="PERCENTAGE CHANGE IN CRIME RATES FROM 2020 TO (2021,2022)",
             labels={'Percentage Change': 'Percentage Change (%)'},
             text_auto='.2f')  # Formatting text to show two decimal places

# Add color based on positive or negative change
fig.update_traces(marker_color=merged_data['Percentage Change'].apply(lambda x: 'red' if x < 0 else 'green'))

# Customize text font
fig.update_traces(textfont=dict(
    family="Arial, sans-serif",
    size=12,
    color="black"
))

# Update plot background color
fig.update_layout(
    plot_bgcolor="#fdc1c5",  # Light pink background
    paper_bgcolor="#e6e0e1",  # Light grey background
    template="plotly_dark",  # Apply the Plotly dark template
    font_color="black"
)

# Show the figure
fig.show()