In [148]:
import pandas as pd
import altair as alt
import geopandas as gpd
from shapely import wkt
alt.data_transformers.disable_max_rows()

df_csv = pd.read_csv('../data/chicago_boundary.csv')
df_csv['geometry'] = df_csv['the_geom'].apply(wkt.loads)
gdf_chicago = gpd.GeoDataFrame(df_csv, geometry='geometry', crs="EPSG:4326")
gdf_chicago.to_file('../data/chicago_boundary.geojson', driver='GeoJSON')
df_com = pd.read_csv('../data/chicago_communities.csv')
df_com['geometry'] = df_com['the_geom'].apply(wkt.loads)
gdf = gpd.GeoDataFrame(df_com, geometry='geometry', crs='EPSG:4326')
gdf.to_file('../data/community_areas.geojson', driver='GeoJSON')


In [149]:
df = pd.read_csv("../data/crimes_chicago.csv", low_memory=False)
df = df.copy()
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %I:%M:%S %p', 
    errors='coerce'  
)
df.loc[:, 'year'] = df['date'].dt.year
df.loc[:, 'month'] = df['date'].dt.month
df.loc[:, 'hour'] = df['date'].dt.hour
df.loc[:, 'day'] = df['date'].dt.day
df = df.dropna(subset=['latitude', 'longitude']).copy()
df.loc[:, 'arrest'] = df['arrest'].astype(bool)
df.loc[:, 'domestic'] = df['domestic'].astype(bool)


In [150]:
def month_to_season(m):
    if m in [12,1,2]:
        return 'Winter'
    elif m in [3,4,5]:
        return 'Spring'
    elif m in [6,7,8]:
        return 'Summer'
    else:
        return 'Fall'

df['MonthNumber'] = df['date'].dt.month
df['DayOfWeek'] = df['date'].dt.day_name()
df['Season'] = df['MonthNumber'].apply(month_to_season)
df['hour_label'] = df['hour']
hour_order = list(range(6, 24)) + list(range(0, 6))
df = df[df['hour_label'].isin(hour_order)]
df['hour_label'] = pd.Categorical(df['hour_label'], categories=hour_order, ordered=True)
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df['DayOfWeek'] = pd.Categorical(df['DayOfWeek'], categories=day_order, ordered=True)



In [151]:

map_to_major = {
    "THEFT": "Theft",
    "BURGLARY": "Theft",
    "MOTOR VEHICLE THEFT": "Theft",
    "DECEPTIVE PRACTICE": "Theft",
    "ARSON": "Criminal Damage",
    "CRIMINAL DAMAGE": "Criminal Damage",

    "ASSAULT": "Violent Crime",
    "BATTERY": "Violent Crime",
    "HOMICIDE": "Violent Crime",
    "STALKING": "Violent Crime",
    "INTIMIDATION": "Violent Crime",
    "CRIMINAL SEXUAL ASSAULT": "Violent Crime",
    "SEX OFFENSE": "Violent Crime",
    "OFFENSE INVOLVING CHILDREN": "Violent Crime",
    "KIDNAPPING": "Violent Crime",
    "ROBBERY": "Violent Crime",
    "WEAPONS VIOLATION": "Violent Crime",
    
    "NARCOTICS": "Drug / Minor Offense",
    "OTHER NARCOTIC VIOLATION": "Drug / Minor Offense",
    "PROSTITUTION": "Drug / Minor Offense",
    "LIQUOR LAW VIOLATION": "Drug / Minor Offense",
    "GAMBLING": "Drug / Minor Offense",
    "PUBLIC PEACE VIOLATION": "Drug / Minor Offense",
    "INTERFERENCE WITH PUBLIC OFFICER": "Drug / Minor Offense",
    "CONCEALED CARRY LICENSE VIOLATION": "Drug / Minor Offense",
    
    "OBSCENITY": "Other Offense",
    "PUBLIC INDECENCY": "Other Offense",
    "HUMAN TRAFFICKING": "Other Offense",
    "NON-CRIMINAL": "Other Offense",
    "OTHER OFFENSE": "Other Offense",
    "CRIMINAL TRESPASS": "Other Offense",
}

df["Crime Category"] = df["primary_type"].map(map_to_major)
df["Crime Category"] = df["Crime Category"].fillna(df["primary_type"])
df['Crime Type'] = df['Crime Category'].apply(
    lambda x: 'Violent' if x == 'Violent Crime' else 'Non-Violent'
)



In [152]:
location_mapping = {

    'APARTMENT': 'Residential',
    'RESIDENCE': 'Residential',
    'RESIDENCE - GARAGE': 'Residential',
    'RESIDENCE - YARD (FRONT / BACK)': 'Residential',
    'RESIDENCE - PORCH / HALLWAY': 'Residential',
    'CHA APARTMENT': 'Residential',
    'CHA HALLWAY / STAIRWELL / ELEVATOR': 'Residential',
    'CHA HALLWAY': 'Residential',
    'CHA GROUNDS': 'Residential',
    'HOUSE': 'Residential',
    'YARD': 'Residential',
    'DRIVEWAY - RESIDENTIAL': 'Residential',
    'BASEMENT': 'Residential',
    
    'STREET': 'Public/Street',
    'SIDEWALK': 'Public/Street',
    'ALLEY': 'Public/Street',
    'HIGHWAY / EXPRESSWAY': 'Public/Street',
    'BRIDGE': 'Public/Street',
    'LAKEFRONT / WATERFRONT / RIVERBANK': 'Public/Street',

    'BAR OR TAVERN': 'Commercial',
    'TAVERN / LIQUOR STORE': 'Commercial',
    'CONVENIENCE STORE': 'Commercial',
    'SMALL RETAIL STORE': 'Commercial',
    'RETAIL STORE': 'Commercial',
    'RESTAURANT': 'Commercial',
    'GROCERY FOOD STORE': 'Commercial',
    'DRUG STORE': 'Commercial',
    'DEPARTMENT STORE': 'Commercial',
    'AUTO / BOAT / RV DEALERSHIP': 'Commercial',
    'APPLIANCE STORE': 'Commercial',
    'PAWN SHOP': 'Commercial',
    'CAR WASH': 'Commercial',
    'CURRENCY EXCHANGE': 'Commercial',
    'BANK': 'Commercial',
    'SAVINGS AND LOAN': 'Commercial',
    
    'SCHOOL - PUBLIC BUILDING': 'Public/Institutional',
    'SCHOOL - PRIVATE BUILDING': 'Public/Institutional',
    'SCHOOL - PUBLIC GROUNDS': 'Public/Institutional',
    'SCHOOL - PRIVATE GROUNDS': 'Public/Institutional',
    'COLLEGE / UNIVERSITY - RESIDENCE HALL': 'Public/Institutional',
    'COLLEGE / UNIVERSITY - GROUNDS': 'Public/Institutional',
    'HOSPITAL BUILDING / GROUNDS': 'Public/Institutional',
    'HOSPITAL': 'Public/Institutional',
    'MEDICAL / DENTAL OFFICE': 'Public/Institutional',
    'FIRE STATION': 'Public/Institutional',
    'POLICE FACILITY / VEHICLE PARKING LOT': 'Public/Institutional',
    'GOVERNMENT BUILDING / PROPERTY': 'Public/Institutional',
    'FEDERAL BUILDING': 'Public/Institutional',
    'LIBRARY': 'Public/Institutional',
    'DAY CARE CENTER': 'Public/Institutional',
    'CHURCH / SYNAGOGUE / PLACE OF WORSHIP': 'Public/Institutional',
    
    'VEHICLE NON-COMMERCIAL': 'Transport',
    'VEHICLE - OTHER RIDE SHARE SERVICE (LYFT, UBER, ETC.)': 'Transport',
    'VEHICLE - COMMERCIAL': 'Transport',
    'VEHICLE - COMMERCIAL: TROLLEY BUS': 'Transport',
    'VEHICLE - COMMERCIAL: ENTERTAINMENT / PARTY BUS': 'Transport',
    'VEHICLE - DELIVERY TRUCK': 'Transport',
    'AUTO': 'Transport',
    'PARKING LOT': 'Transport',
    'PARKING LOT / GARAGE (NON RESIDENTIAL)': 'Transport',
    'CHA PARKING LOT / GROUNDS': 'Transport',
    'CTA PARKING LOT / GARAGE / OTHER PROPERTY': 'Transport',
    'AIRPORT PARKING LOT': 'Transport',
    'CTA BUS': 'Transport',
    'CTA BUS STOP': 'Transport',
    'CTA PLATFORM': 'Transport',
    'CTA TRAIN': 'Transport',
    'CTA TRACKS - RIGHT OF WAY': 'Transport',
    'CTA "L" TRAIN': 'Transport',
    'CTA PROPERTY': 'Transport',
 
    'OTHER (SPECIFY)': 'Other',
    'NON-CRIMINAL': 'Other',
    'CEMETARY': 'Other',
    'KENNEL': 'Other',
    'ANIMAL HOSPITAL': 'Other',
    'GANGWAY': 'Other',
    'NEWSSTAND': 'Other'
}

df['Location Category'] = df['location_description'].map(location_mapping).fillna('Other')
df['Domestic'] = df['domestic'].replace({True:'Domestic', False:'Non-Domestic'})


In [None]:
df_violent = df[df['Crime Category']=='Violent Crime'].copy()
df_violent['Crime Group'] = df_violent['primary_type'].replace({
    'HOMICIDE': 'Other',
    'INTIMIDATION': 'Other',
    'KIDNAPPING': 'Other',
    'STALKING': 'Other',
    'SEX OFFENSE': 'Other',
    'OFFENSE INVOLVING CHILDREN':'Other',
    'CRIMINAL SEXUAL ASSAULT': 'Assault',
})
df_violent['Crime Group'] = df_violent['Crime Group'].str.capitalize()
df_violent['primary_type'] = df_violent['primary_type'].str.capitalize()
df.to_csv('../data/chicago_crimes_clean.csv', index=False)
df_violent.to_csv('../data/chicago_violent_crimes.csv', index=False)
