In [1]:
import pandas as pd
import numpy as np

crime_df = pd.read_csv(
    r"C:\Users\nicol\OneDrive\DAT490\RQ2_Crime_Final_Pct.csv", 
    dtype={'tract_geoid': str}
)

crime_df.head()

Unnamed: 0,year_occurred,Time,Highest Offense Description,Crime_Category,Census Block Group,tract_geoid,white_alone_pct,white_non_hispanic_pct,black_african_american_pct,asian_alone_pct,...,occupied_housing_units,vacant_housing_units,vacant_housing_pct,avg_household_size_owner,avg_household_size_renter,avg_household_size,avg_family_size,total_households,median_home_value,median_gross_rent
0,2023,14:35:00,FORGERY - OTHER,Property,4530434002,48453043400,31.1,0.9,1.6,43.9,...,2196,201,8.4,2.36,2.26,2.27,3.06,2196,-,1383
1,2019,21:30:00,THEFT,Property,4530021131,48453002113,18.0,0.0,0.0,80.4,...,1381,28,2.0,2.74,2.33,2.66,3.05,1381,542800,1497
2,2016,18:56:00,POSSESSION OF MARIJUANA,Drug,4530011011,48453001101,4.8,4.2,0.0,76.7,...,873,94,9.7,-,1.36,1.28,2.59,873,487000,1710
3,2017,12:15:00,CRIMINAL MISCHIEF,Property,4530023043,48453002304,18.6,0.0,0.0,69.7,...,2794,185,6.2,1.41,1.33,1.33,2.22,2794,683600,1931
4,2021,18:48:00,FAMILY DISTURBANCE,Violent,4530024123,48453002412,70.0,1.1,0.6,24.2,...,1475,19,1.3,3.09,2.95,3.06,3.49,1475,306800,1669


In [None]:
# Annual crime counts by category
annual_counts = (
    crime_df
        .groupby(['year_occurred', 'Crime_Category'])
        .size()
        .reset_index(name='count')
)

year_totals = (
    crime_df
        .groupby('year_occurred')
        .size()
        .reset_index(name='total_crime')
)

annual = annual_counts.merge(year_totals, on='year_occurred')
annual['share'] = annual['count'] / annual['total_crime']

annual.head()

In [None]:
composition = (
    annual
        .pivot(index='year_occurred', 
               columns='Crime_Category', 
               values='share')
        .fillna(0)
        .reset_index()
)

composition.head()

In [None]:
import matplotlib.pyplot as plt


share_cols = ['Administrative', 'Drug', 'Other', 'Property', 'Public Order', 'Violent']

years = composition['year_occurred'].values
data = composition[share_cols].values  # shape: (n_years, n_categories)

fig, ax = plt.subplots(figsize=(8, 5))

im = ax.imshow(data.T, aspect='auto', origin='lower')

ax.set_xticks(np.arange(len(years)))
ax.set_xticklabels(years, rotation=45, ha="right")

ax.set_yticks(np.arange(len(share_cols)))
ax.set_yticklabels(share_cols)

# Labels and title
ax.set_xlabel("Year")
ax.set_ylabel("Crime category")
ax.set_title("Crime Composition by Category and Year (Share of Total Crime)")

cbar = fig.colorbar(im, ax=ax)
cbar.set_label("Share of total crime")

plt.tight_layout()
plt.show()

In [None]:
cats = ['Administrative', 'Drug', 'Other', 'Property', 'Public Order', 'Violent']

years = composition['year_occurred'].values
x = np.arange(len(years)) 

fig, ax = plt.subplots(figsize=(10, 6))

offset = 0  
spacing = 0.18  

for cat in cats:
    y = composition[cat].values
    ax.plot(x, y + offset, linewidth=2)
    ax.fill_between(x, offset, y + offset, alpha=0.25)
    ax.text(-0.4, offset + np.mean(y), cat, va='center')
    offset += spacing

ax.set_xticks(x)
ax.set_xticklabels(years, rotation=45)

ax.set_xlabel("Year")
ax.set_ylabel("Relative stacked position (not comparable to share)")
ax.set_title("Ridgeline-Style Visualization of Crime Composition Over Time")

for spine in ['top', 'right', 'left']:
    ax.spines[spine].set_visible(False)
ax.yaxis.set_visible(False)

plt.tight_layout()
plt.show()