In [None]:
import pandas as pd
import numpy as np

# Dataset
df = pd.read_csv('/content/Crimes_-_2025_20250709.csv')


# Drop unnecessary columns
columns_to_drop = ['Case Number', 'IUCR', 'X Coordinate', 'Y Coordinate', 'Location']
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Extract time-based features for hierarchy
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['DayOfWeek'] = df['Date'].dt.day_name()
df['Hour'] = df['Date'].dt.hour

# Standardized titles
df['Primary Type'] = df['Primary Type'].str.title()
df['Location Description'] = df['Location Description'].str.title()

# Replace missing values
df['Location Description'].fillna('Unknown', inplace=True)

# Convert Arrest and Domestic to Boolean
df['Arrest'] = df['Arrest'].astype(bool)
df['Domestic'] = df['Domestic'].astype(bool)

# Create custom column: Time of Day
def get_time_of_day(hour):
    if 5 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 21:
        return 'Evening'
    else:
        return 'Night'

df['Time of Day'] = df['Hour'].apply(get_time_of_day)

#Flag violent crimes
violent_crimes = ['Homicide', 'Assault', 'Battery', 'Robbery', 'Criminal Sexual Assault']
df['Violent Crime'] = df['Primary Type'].isin(violent_crimes)

# Drop rows without location
df.dropna(subset=['Latitude', 'Longitude'], inplace=True)

# Final Check
print(df.head())

#cleaned data
df.to_csv('chicago_crime_cleaned.csv', index=False)


  df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Location Description'].fillna('Unknown', inplace=True)


         ID       Date                 Block Primary Type  \
0  13887564 2025-07-01       017XX W ERIE ST        Theft   
1  13886571 2025-07-01   048XX S ASHLAND AVE      Battery   
2  13885818 2025-07-01       022XX E 84TH ST        Theft   
3  13886478 2025-07-01     014XX W WILLOW ST        Theft   
4  13887522 2025-07-01  032XX S CARPENTER ST      Assault   

                      Description          Location Description  Arrest  \
0                       OVER $500                        Street   False   
1  AGGRAVATED OF A SENIOR CITIZEN  Commercial / Business Office    True   
2                  $500 AND UNDER                        Street   False   
3                       OVER $500                         Alley   False   
4                          SIMPLE                     Apartment   False   

   Domestic  Beat  District  ...  FBI Code  Year              Updated On  \
0     False  1215        12  ...        06  2025  07/08/2025 03:44:15 PM   
1     False   933         9  .