In [1]:
from tqdm import tqdm
import time
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import os, glob
#import tick customization tools
import matplotlib.ticker as mticks
import matplotlib.dates as mdates
## Setting figures to timeseries-friendly
plt.rcParams['figure.figsize'] = (12,4)
plt.rcParams['figure.facecolor'] = 'white'
sns.set_context("talk", font_scale=0.9)
# set random seed
SEED = 321
np.random.seed(SEED)
#display more columns
pd.set_option('display.max_columns',50)



# Loading Data

In [2]:
folder = "Data/Chicago/"
crime_files = sorted(glob.glob(folder+"*.csv"))
chicago_df = pd.concat([pd.read_csv(f) for f in crime_files])
chicago_df

Unnamed: 0,ID,Date,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Latitude,Longitude
0,1358218,01/01/2001 01:00:00 PM,ASSAULT,SIMPLE,APARTMENT,False,False,312,3.0,,41.778744,-87.612135
1,1310288,01/01/2001 01:00:00 AM,CRIMINAL DAMAGE,TO PROPERTY,RESIDENCE,False,False,621,6.0,,41.756650,-87.641608
2,1310393,01/01/2001 01:00:00 AM,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,1614,16.0,,41.974911,-87.846348
3,1316324,01/01/2001 01:00:00 PM,THEFT,OVER $500,STREET,False,False,1513,15.0,,41.869008,-87.773947
4,1311626,01/01/2001 01:00:00 AM,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,1033,10.0,,41.848786,-87.704087
...,...,...,...,...,...,...,...,...,...,...,...,...
123545,13124429,06/30/2023 12:50:00 AM,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,False,False,1925,19.0,46.0,41.949093,-87.643737
123546,13124821,06/30/2023 12:50:00 PM,THEFT,RETAIL THEFT,TAVERN / LIQUOR STORE,False,False,1924,19.0,44.0,41.939906,-87.654410
123547,13124281,06/30/2023 12:52:00 AM,SEX OFFENSE,PUBLIC INDECENCY,STREET,False,False,2534,25.0,36.0,41.909763,-87.735792
123548,13124301,06/30/2023 12:57:00 AM,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,832,8.0,18.0,41.761952,-87.684388


# Chicago Dataframe

In [3]:
chicago_df.isna().sum()

ID                           0
Date                         0
Primary Type                 0
Description                  0
Location Description     10687
Arrest                       0
Domestic                     0
Beat                         0
District                    47
Ward                    614849
Latitude                 88034
Longitude                88034
dtype: int64

In [4]:
chicago_df.dropna(inplace = True)
chicago_df

Unnamed: 0,ID,Date,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Latitude,Longitude
8,6154338,01/01/2001 01:00:00 PM,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,831,8.0,15.0,41.774819,-87.702896
10,3769790,01/01/2001 01:00:00 PM,THEFT,AGG: FINANCIAL ID THEFT,RESIDENCE,False,False,523,5.0,34.0,41.673171,-87.638000
13,3206463,01/01/2001 01:00:00 AM,CRIM SEXUAL ASSAULT,PREDATORY,RESIDENCE,False,True,835,8.0,18.0,41.735507,-87.690095
22,3212105,01/01/2001 01:00:00 AM,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,True,False,913,9.0,14.0,41.823730,-87.698440
44,5462733,01/01/2001 01:00:00 AM,OFFENSE INVOLVING CHILDREN,AGG CRIM SEX ABUSE FAM MEMBER,RESIDENCE,False,True,233,2.0,20.0,41.789084,-87.620849
...,...,...,...,...,...,...,...,...,...,...,...,...
123545,13124429,06/30/2023 12:50:00 AM,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,False,False,1925,19.0,46.0,41.949093,-87.643737
123546,13124821,06/30/2023 12:50:00 PM,THEFT,RETAIL THEFT,TAVERN / LIQUOR STORE,False,False,1924,19.0,44.0,41.939906,-87.654410
123547,13124281,06/30/2023 12:52:00 AM,SEX OFFENSE,PUBLIC INDECENCY,STREET,False,False,2534,25.0,36.0,41.909763,-87.735792
123548,13124301,06/30/2023 12:57:00 AM,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,832,8.0,18.0,41.761952,-87.684388


In [5]:
chicago_df['Datetime'] = pd.to_datetime(chicago_df['Date'], format="%m/%d/%Y %H:%M:%S %p")
chicago_df = chicago_df.sort_values('Datetime')
chicago_df = chicago_df.set_index('Datetime')
chicago_df.info()
chicago_df.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7135456 entries, 2001-01-01 01:00:00 to 2023-06-30 12:58:00
Data columns (total 12 columns):
 #   Column                Dtype  
---  ------                -----  
 0   ID                    int64  
 1   Date                  object 
 2   Primary Type          object 
 3   Description           object 
 4   Location Description  object 
 5   Arrest                bool   
 6   Domestic              bool   
 7   Beat                  int64  
 8   District              float64
 9   Ward                  float64
 10  Latitude              float64
 11  Longitude             float64
dtypes: bool(2), float64(4), int64(2), object(4)
memory usage: 612.4+ MB


Unnamed: 0_level_0,ID,Date,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Latitude,Longitude
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2001-01-01 01:00:00,6154338,01/01/2001 01:00:00 PM,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,831,8.0,15.0,41.774819,-87.702896
2001-01-01 01:00:00,3769790,01/01/2001 01:00:00 PM,THEFT,AGG: FINANCIAL ID THEFT,RESIDENCE,False,False,523,5.0,34.0,41.673171,-87.638
2001-01-01 01:00:00,3206463,01/01/2001 01:00:00 AM,CRIM SEXUAL ASSAULT,PREDATORY,RESIDENCE,False,True,835,8.0,18.0,41.735507,-87.690095
2001-01-01 01:00:00,3212105,01/01/2001 01:00:00 AM,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,True,False,913,9.0,14.0,41.82373,-87.69844
2001-01-01 01:00:00,5462733,01/01/2001 01:00:00 AM,OFFENSE INVOLVING CHILDREN,AGG CRIM SEX ABUSE FAM MEMBER,RESIDENCE,False,True,233,2.0,20.0,41.789084,-87.620849


## Holiday Dataframe

In [6]:
crime_files = sorted(glob.glob("Data/"+"*.csv"))
holiday_df = pd.concat([pd.read_csv(f) for f in crime_files])
holiday_df

Unnamed: 0,Date,US Holidays,State Holidays
0,2001-01-01,New Year's Day,New Year's Day
1,2001-01-02,,
2,2001-01-03,,
3,2001-01-04,,
4,2001-01-05,,
...,...,...,...
7942,2022-09-30,,
7943,2022-10-01,,
7944,2022-10-02,,
7945,2022-10-03,,


In [7]:
# nulls
holiday_df.isna().sum()

Date                 0
US Holidays       7703
State Holidays    7646
dtype: int64

In [8]:
holiday_df.dropna(inplace = True)
holiday_df.drop(columns = ["State Holidays"], inplace = True)
holiday_df

Unnamed: 0,Date,US Holidays
0,2001-01-01,New Year's Day
14,2001-01-15,Martin Luther King Jr. Day
49,2001-02-19,Washington's Birthday
147,2001-05-28,Memorial Day
184,2001-07-04,Independence Day
...,...,...
7819,2022-05-30,Memorial Day
7839,2022-06-19,Juneteenth National Independence Day
7840,2022-06-20,Juneteenth National Independence Day (Observed)
7854,2022-07-04,Independence Day


In [47]:
!pip install holidays

Collecting holidays
  Downloading holidays-0.28-py3-none-any.whl (642 kB)
     -------------------------------------- 642.9/642.9 kB 8.0 MB/s eta 0:00:00
Installing collected packages: holidays
Successfully installed holidays-0.28


In [48]:
import holidays
import datetime as dt
from holidays import country_holidays

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Int64Index'

In [50]:
## making a date range that covers full dataset
all_days = pd.date_range(chicago_df["Date"].min(), chicago_df["Date"].max())
all_days



DatetimeIndex(['2001-01-01 01:00:00', '2001-01-02 01:00:00',
               '2001-01-03 01:00:00', '2001-01-04 01:00:00',
               '2001-01-05 01:00:00', '2001-01-06 01:00:00',
               '2001-01-07 01:00:00', '2001-01-08 01:00:00',
               '2001-01-09 01:00:00', '2001-01-10 01:00:00',
               ...
               '2022-12-22 01:00:00', '2022-12-23 01:00:00',
               '2022-12-24 01:00:00', '2022-12-25 01:00:00',
               '2022-12-26 01:00:00', '2022-12-27 01:00:00',
               '2022-12-28 01:00:00', '2022-12-29 01:00:00',
               '2022-12-30 01:00:00', '2022-12-31 01:00:00'],
              dtype='datetime64[ns]', length=8035, freq='D')

In [51]:
## Create an instance of the US country holidays.
us_holidays = country_holidays('US')
us_holidays

holidays.country_holidays('US')

In [52]:
## Testing first date
print(all_days[0])
us_holidays.get(all_days[0])

2001-01-01 01:00:00


"New Year's Day"

In [53]:
## Getting us holidays for all dates
holiday_list = [us_holidays.get(day) for day in all_days]
holiday_list[:5]

["New Year's Day", None, None, None, None]

In [60]:
# For a specific subdivisions (e.g. state or province):
co_holidays = country_holidays('US', subdiv='CO')
co_holidays

holidays.country_holidays('US', subdiv='CO')

In [61]:
## Saving both holiday types as columns
chicago_df["US Holiday"] = [us_holidays.get(day) for day in chicago_df['Date']]
chicago_df['CO Holiday'] = [co_holidays.get(day) for day in chicago_df['Date']]
chicago_df.head()

Unnamed: 0_level_0,ID,Date,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Latitude,Longitude,Total # of Crimes,US Holiday,CO Holiday
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2001-01-01 01:00:00,6154338,01/01/2001 01:00:00 PM,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,831,8.0,15.0,41.774819,-87.702896,,New Year's Day,New Year's Day
2001-01-01 01:00:00,3769790,01/01/2001 01:00:00 PM,THEFT,AGG: FINANCIAL ID THEFT,RESIDENCE,False,False,523,5.0,34.0,41.673171,-87.638,,New Year's Day,New Year's Day
2001-01-01 01:00:00,3206463,01/01/2001 01:00:00 AM,CRIM SEXUAL ASSAULT,PREDATORY,RESIDENCE,False,True,835,8.0,18.0,41.735507,-87.690095,,New Year's Day,New Year's Day
2001-01-01 01:00:00,3212105,01/01/2001 01:00:00 AM,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,True,False,913,9.0,14.0,41.82373,-87.69844,,New Year's Day,New Year's Day
2001-01-01 01:00:00,5462733,01/01/2001 01:00:00 AM,OFFENSE INVOLVING CHILDREN,AGG CRIM SEX ABUSE FAM MEMBER,RESIDENCE,False,True,233,2.0,20.0,41.789084,-87.620849,,New Year's Day,New Year's Day


In [None]:
## US Holidays
chicago_df['US Holiday'].value_counts()

In [None]:
## MD Holidays
chicago_df['CO Holiday'].value_counts()

In [None]:
ax = sns.barplot(data= chicago_dfgo_df, x='US Holiday',y='Total_Incidents',estimator=np.sum)
ax.set_xticklabels(ax.get_xticklabels(),rotation=45, ha='right');

# Resampled Dataframe

In [15]:
## Creating a Total # of Crimes
# getting list of  unique crime descriptions
crime_list = chicago_df['Description'].unique()
crime_list

array(['FINANCIAL ID THEFT: OVER $300', 'AGG: FINANCIAL ID THEFT',
       'PREDATORY', 'AGG SEX ASSLT OF CHILD FAM MBR',
       'AGG CRIM SEX ABUSE FAM MEMBER', 'OVER $500',
       'AGG CRIMINAL SEXUAL ABUSE', 'FIRST DEGREE MURDER',
       'THEFT/RECOVERY: AUTOMOBILE', 'CRIM SEX ABUSE BY FAM MEMBER',
       'SEXUAL EXPLOITATION OF A CHILD', 'AUTOMOBILE',
       'OTHER VEHICLE OFFENSE', 'FRAUD OR CONFIDENCE GAME',
       'SEX ASSLT OF CHILD BY FAM MBR', 'CREDIT CARD FRAUD',
       '$500 AND UNDER', 'FINANCIAL ID THEFT:$300 &UNDER',
       'AGGRAVATED: OTHER', 'NON-AGGRAVATED', 'EMBEZZLEMENT', 'FORGERY',
       'HARASSMENT BY ELECTRONIC MEANS', 'BOGUS CHECK', 'CHILD ABUSE',
       'FROM BUILDING', 'CONTRIBUTE DELINQUENCY OF A CHILD',
       'HARASSMENT BY TELEPHONE', 'FINAN EXPLOIT-ELDERLY/DISABLED',
       'CRIMINAL SEXUAL ABUSE', 'OTHER CRIME INVOLVING PROPERTY',
       'TRUCK, BUS, MOTOR HOME', 'FORCIBLE ENTRY', 'POSS: CRACK',
       'SIMPLE', 'POSS: HEROIN(WHITE)', 'UNLAWFUL POSS OF 

In [18]:
# Creating an empty dictionary  
CRIMES = {}
# 3. Loop through the list of crimes
for crime in crime_list:
    # I. Save a temp df of just the rows that match the crime
   temp = chicago_df.loc[chicago_df['Description']==crime].copy()
    # II. Resample the temp DataFrame as Daily data (crime counts) 
    # and keep ONLY the .size() 
   temp_res = temp.resample("D").size() 
   #III. Save the temporary DataFrame in the dictionary,
   #using the crime description as the key.
   CRIMES[crime] = temp_res.copy() 
CRIMES.keys()

dict_keys(['FINANCIAL ID THEFT: OVER $300', 'AGG: FINANCIAL ID THEFT', 'PREDATORY', 'AGG SEX ASSLT OF CHILD FAM MBR', 'AGG CRIM SEX ABUSE FAM MEMBER', 'OVER $500', 'AGG CRIMINAL SEXUAL ABUSE', 'FIRST DEGREE MURDER', 'THEFT/RECOVERY: AUTOMOBILE', 'CRIM SEX ABUSE BY FAM MEMBER', 'SEXUAL EXPLOITATION OF A CHILD', 'AUTOMOBILE', 'OTHER VEHICLE OFFENSE', 'FRAUD OR CONFIDENCE GAME', 'SEX ASSLT OF CHILD BY FAM MBR', 'CREDIT CARD FRAUD', '$500 AND UNDER', 'FINANCIAL ID THEFT:$300 &UNDER', 'AGGRAVATED: OTHER', 'NON-AGGRAVATED', 'EMBEZZLEMENT', 'FORGERY', 'HARASSMENT BY ELECTRONIC MEANS', 'BOGUS CHECK', 'CHILD ABUSE', 'FROM BUILDING', 'CONTRIBUTE DELINQUENCY OF A CHILD', 'HARASSMENT BY TELEPHONE', 'FINAN EXPLOIT-ELDERLY/DISABLED', 'CRIMINAL SEXUAL ABUSE', 'OTHER CRIME INVOLVING PROPERTY', 'TRUCK, BUS, MOTOR HOME', 'FORCIBLE ENTRY', 'POSS: CRACK', 'SIMPLE', 'POSS: HEROIN(WHITE)', 'UNLAWFUL POSS OF HANDGUN', 'FALSE POLICE REPORT', 'AGGRAVATED', 'AGGRAVATED VEHICULAR HIJACKING', 'POSS: HEROIN(BLACK 

In [23]:
resampled_df = pd.DataFrame(CRIMES)
resampled_df

Unnamed: 0_level_0,FINANCIAL ID THEFT: OVER $300,AGG: FINANCIAL ID THEFT,PREDATORY,AGG SEX ASSLT OF CHILD FAM MBR,AGG CRIM SEX ABUSE FAM MEMBER,OVER $500,AGG CRIMINAL SEXUAL ABUSE,FIRST DEGREE MURDER,THEFT/RECOVERY: AUTOMOBILE,CRIM SEX ABUSE BY FAM MEMBER,SEXUAL EXPLOITATION OF A CHILD,AUTOMOBILE,OTHER VEHICLE OFFENSE,FRAUD OR CONFIDENCE GAME,SEX ASSLT OF CHILD BY FAM MBR,CREDIT CARD FRAUD,$500 AND UNDER,FINANCIAL ID THEFT:$300 &UNDER,AGGRAVATED: OTHER,NON-AGGRAVATED,EMBEZZLEMENT,FORGERY,HARASSMENT BY ELECTRONIC MEANS,BOGUS CHECK,CHILD ABUSE,...,AGGRAVATED DOMESTIC BATTERY - OTHER FIREARM,CONTRIBUTE TO THE DELINQUENCY OF CHILD,POSSESS - HEROIN (BLACK TAR),OTHER ARSON / EXPLOSIVE INCIDENT,AGGRAVATED OF AN UNBORN CHILD,"ATTEMPT - CYCLE, SCOOTER, BIKE WITH VIN",POSSESSION - EXPLOSIVE / INCENDIARY DEVICE,MANUFACTURE / DELIVER - AMPHETAMINES,INTERFERE WITH EMERGENCY EQUIPMENT,UNLAWFUL SALE - HANDGUN,OF AN UNBORN CHILD,GAME / AMUSEMENT DEVICE,MANUFACTURE / DELIVER - METHAMPHETAMINE,MANUFACTURE / DELIVER - SYNTHETIC MARIJUANA,"AGG. RITUAL MUTILATION - HANDS, FISTS, FEET, SERIOUS INJURY",SELL / ADVERTISE FIREWORKS,"ATTEMPT - CYCLE, SCOOTER, BIKE NO VIN",DISCLOSE DOMESTIC VIOLENCE VICTIM LOCATION,DELIVER CANNABIS TO PERSON UNDER 18,SECOND DEGREE MURDER,POSSESS - HYPODERMIC NEEDLE,"THEFT / RECOVERY - CYCLE, SCOOTER, BIKE NO VIN",DELIVER CONTROLLED SUBSTANCES TO PERSON UNDER 18,POSSESS KEYS OR DEVICE TO COIN MACHINE,POSSESS - LOOK-ALIKE DRUGS
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2001-01-01,96.0,7.0,17,28.0,15.0,5,13.0,2,1.0,11.0,2.0,2,1,4,8.0,3,2,27.0,8.0,1,3.0,4,1,1,3.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-02,5.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0,0,0.0,0,0,2.0,0.0,0,1.0,0,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-03,1.0,0.0,0,0.0,0.0,0,1.0,0,0.0,0.0,0.0,1,0,1,0.0,0,0,0.0,0.0,0,0.0,0,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-04,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0,0.0,0,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-05,1.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,2,0,0,0.0,0,0,0.0,0.0,0,0.0,1,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-26,,,1,,,77,,2,,,,50,2,6,,4,62,,,3,,2,7,0,1.0,...,,,0.0,,1.0,0.0,,,,,,,,,,,,,,,,,,,
2023-06-27,,,0,,,71,,3,,,,52,3,2,,3,50,,,5,,3,8,0,3.0,...,,,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2023-06-28,,,0,,,48,,2,,,,43,3,0,,2,32,,,1,,0,3,0,,...,,,,,,1.0,,,,,,,,,,,,,,,,,,,
2023-06-29,,,0,,,55,,4,,,,51,0,8,,4,41,,,3,,2,3,0,,...,,,,,,,,,,,,,,,,,,,,,,,,,


In [25]:
## saving to disk
resampled_df.to_csv("Data/Chicago/Chicago_crime_counts.csv")

In [10]:
chicago_df.groupby("Description").size().head()

Description
$300 AND UNDER                         10
$500 AND UNDER                     552169
ABUSE / NEGLECT - CARE FACILITY        38
ABUSE/NEGLECT: CARE FACILITY          133
ADULTRY                                 6
dtype: int64

In [11]:
# II. Resample the temp DataFrame as Daily data (crime counts)
# and keep ONLY the .size()
temp_res = temp.resample("D").size()
temp_res




NameError: name 'temp' is not defined

In [26]:
## testing our saved data
resampled_df = pd.read_csv("Data/Chicago/Chicago_crime_counts.csv", parse_dates=['Datetime'], index_col=0)
resampled_df

Unnamed: 0_level_0,FINANCIAL ID THEFT: OVER $300,AGG: FINANCIAL ID THEFT,PREDATORY,AGG SEX ASSLT OF CHILD FAM MBR,AGG CRIM SEX ABUSE FAM MEMBER,OVER $500,AGG CRIMINAL SEXUAL ABUSE,FIRST DEGREE MURDER,THEFT/RECOVERY: AUTOMOBILE,CRIM SEX ABUSE BY FAM MEMBER,SEXUAL EXPLOITATION OF A CHILD,AUTOMOBILE,OTHER VEHICLE OFFENSE,FRAUD OR CONFIDENCE GAME,SEX ASSLT OF CHILD BY FAM MBR,CREDIT CARD FRAUD,$500 AND UNDER,FINANCIAL ID THEFT:$300 &UNDER,AGGRAVATED: OTHER,NON-AGGRAVATED,EMBEZZLEMENT,FORGERY,HARASSMENT BY ELECTRONIC MEANS,BOGUS CHECK,CHILD ABUSE,...,AGGRAVATED DOMESTIC BATTERY - OTHER FIREARM,CONTRIBUTE TO THE DELINQUENCY OF CHILD,POSSESS - HEROIN (BLACK TAR),OTHER ARSON / EXPLOSIVE INCIDENT,AGGRAVATED OF AN UNBORN CHILD,"ATTEMPT - CYCLE, SCOOTER, BIKE WITH VIN",POSSESSION - EXPLOSIVE / INCENDIARY DEVICE,MANUFACTURE / DELIVER - AMPHETAMINES,INTERFERE WITH EMERGENCY EQUIPMENT,UNLAWFUL SALE - HANDGUN,OF AN UNBORN CHILD,GAME / AMUSEMENT DEVICE,MANUFACTURE / DELIVER - METHAMPHETAMINE,MANUFACTURE / DELIVER - SYNTHETIC MARIJUANA,"AGG. RITUAL MUTILATION - HANDS, FISTS, FEET, SERIOUS INJURY",SELL / ADVERTISE FIREWORKS,"ATTEMPT - CYCLE, SCOOTER, BIKE NO VIN",DISCLOSE DOMESTIC VIOLENCE VICTIM LOCATION,DELIVER CANNABIS TO PERSON UNDER 18,SECOND DEGREE MURDER,POSSESS - HYPODERMIC NEEDLE,"THEFT / RECOVERY - CYCLE, SCOOTER, BIKE NO VIN",DELIVER CONTROLLED SUBSTANCES TO PERSON UNDER 18,POSSESS KEYS OR DEVICE TO COIN MACHINE,POSSESS - LOOK-ALIKE DRUGS
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2001-01-01,96.0,7.0,17,28.0,15.0,5,13.0,2,1.0,11.0,2.0,2,1,4,8.0,3,2,27.0,8.0,1,3.0,4,1,1,3.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-02,5.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0,0,0.0,0,0,2.0,0.0,0,1.0,0,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-03,1.0,0.0,0,0.0,0.0,0,1.0,0,0.0,0.0,0.0,1,0,1,0.0,0,0,0.0,0.0,0,0.0,0,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-04,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0,0.0,0,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-05,1.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,2,0,0,0.0,0,0,0.0,0.0,0,0.0,1,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-26,,,1,,,77,,2,,,,50,2,6,,4,62,,,3,,2,7,0,1.0,...,,,0.0,,1.0,0.0,,,,,,,,,,,,,,,,,,,
2023-06-27,,,0,,,71,,3,,,,52,3,2,,3,50,,,5,,3,8,0,3.0,...,,,1.0,,,0.0,,,,,,,,,,,,,,,,,,,
2023-06-28,,,0,,,48,,2,,,,43,3,0,,2,32,,,1,,0,3,0,,...,,,,,,1.0,,,,,,,,,,,,,,,,,,,
2023-06-29,,,0,,,55,,4,,,,51,0,8,,4,41,,,3,,2,3,0,,...,,,,,,,,,,,,,,,,,,,,,,,,,


In [37]:
resampled_df = resampled_df.resample('D').asfreq()
resampled_df.head(3)


Unnamed: 0_level_0,FINANCIAL ID THEFT: OVER $300,AGG: FINANCIAL ID THEFT,PREDATORY,AGG SEX ASSLT OF CHILD FAM MBR,AGG CRIM SEX ABUSE FAM MEMBER,OVER $500,AGG CRIMINAL SEXUAL ABUSE,FIRST DEGREE MURDER,THEFT/RECOVERY: AUTOMOBILE,CRIM SEX ABUSE BY FAM MEMBER,SEXUAL EXPLOITATION OF A CHILD,AUTOMOBILE,OTHER VEHICLE OFFENSE,FRAUD OR CONFIDENCE GAME,SEX ASSLT OF CHILD BY FAM MBR,CREDIT CARD FRAUD,$500 AND UNDER,FINANCIAL ID THEFT:$300 &UNDER,AGGRAVATED: OTHER,NON-AGGRAVATED,EMBEZZLEMENT,FORGERY,HARASSMENT BY ELECTRONIC MEANS,BOGUS CHECK,CHILD ABUSE,...,AGGRAVATED DOMESTIC BATTERY - OTHER FIREARM,CONTRIBUTE TO THE DELINQUENCY OF CHILD,POSSESS - HEROIN (BLACK TAR),OTHER ARSON / EXPLOSIVE INCIDENT,AGGRAVATED OF AN UNBORN CHILD,"ATTEMPT - CYCLE, SCOOTER, BIKE WITH VIN",POSSESSION - EXPLOSIVE / INCENDIARY DEVICE,MANUFACTURE / DELIVER - AMPHETAMINES,INTERFERE WITH EMERGENCY EQUIPMENT,UNLAWFUL SALE - HANDGUN,OF AN UNBORN CHILD,GAME / AMUSEMENT DEVICE,MANUFACTURE / DELIVER - METHAMPHETAMINE,MANUFACTURE / DELIVER - SYNTHETIC MARIJUANA,"AGG. RITUAL MUTILATION - HANDS, FISTS, FEET, SERIOUS INJURY",SELL / ADVERTISE FIREWORKS,"ATTEMPT - CYCLE, SCOOTER, BIKE NO VIN",DISCLOSE DOMESTIC VIOLENCE VICTIM LOCATION,DELIVER CANNABIS TO PERSON UNDER 18,SECOND DEGREE MURDER,POSSESS - HYPODERMIC NEEDLE,"THEFT / RECOVERY - CYCLE, SCOOTER, BIKE NO VIN",DELIVER CONTROLLED SUBSTANCES TO PERSON UNDER 18,POSSESS KEYS OR DEVICE TO COIN MACHINE,POSSESS - LOOK-ALIKE DRUGS
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2001-01-01,96.0,7.0,17,28.0,15.0,5,13.0,2,1.0,11.0,2.0,2,1,4,8.0,3,2,27.0,8.0,1,3.0,4,1,1,3.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-02,5.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0,0,0.0,0,0,2.0,0.0,0,1.0,0,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2001-01-03,1.0,0.0,0,0.0,0.0,0,1.0,0,0.0,0.0,0.0,1,0,1,0.0,0,0,0.0,0.0,0,0.0,0,0,0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,


In [38]:
ts0 = resampled_df.index[0]
ts0

Timestamp('2001-01-01 00:00:00', freq='D')

In [39]:
# checking the documentation for astimezone
ts0.astimezone?

# 

In [None]:
## remove time zone from the dt index
df = df.tz_convert(None)
df.head(3)

In [45]:
resampled_df.isna().sum()

FINANCIAL ID THEFT: OVER $300                       3402
AGG: FINANCIAL ID THEFT                             3405
PREDATORY                                              0
AGG SEX ASSLT OF CHILD FAM MBR                      1220
AGG CRIM SEX ABUSE FAM MEMBER                       1235
                                                    ... 
POSSESS - HYPODERMIC NEEDLE                         8215
THEFT / RECOVERY - CYCLE, SCOOTER, BIKE NO VIN      8109
DELIVER CONTROLLED SUBSTANCES TO PERSON UNDER 18    8215
POSSESS KEYS OR DEVICE TO COIN MACHINE              8215
POSSESS - LOOK-ALIKE DRUGS                          8215
Length: 542, dtype: int64

In [46]:
## filling the null values with 0
resampled_df = resampled_df.fillna(0)
resampled_df



Unnamed: 0_level_0,FINANCIAL ID THEFT: OVER $300,AGG: FINANCIAL ID THEFT,PREDATORY,AGG SEX ASSLT OF CHILD FAM MBR,AGG CRIM SEX ABUSE FAM MEMBER,OVER $500,AGG CRIMINAL SEXUAL ABUSE,FIRST DEGREE MURDER,THEFT/RECOVERY: AUTOMOBILE,CRIM SEX ABUSE BY FAM MEMBER,SEXUAL EXPLOITATION OF A CHILD,AUTOMOBILE,OTHER VEHICLE OFFENSE,FRAUD OR CONFIDENCE GAME,SEX ASSLT OF CHILD BY FAM MBR,CREDIT CARD FRAUD,$500 AND UNDER,FINANCIAL ID THEFT:$300 &UNDER,AGGRAVATED: OTHER,NON-AGGRAVATED,EMBEZZLEMENT,FORGERY,HARASSMENT BY ELECTRONIC MEANS,BOGUS CHECK,CHILD ABUSE,...,AGGRAVATED DOMESTIC BATTERY - OTHER FIREARM,CONTRIBUTE TO THE DELINQUENCY OF CHILD,POSSESS - HEROIN (BLACK TAR),OTHER ARSON / EXPLOSIVE INCIDENT,AGGRAVATED OF AN UNBORN CHILD,"ATTEMPT - CYCLE, SCOOTER, BIKE WITH VIN",POSSESSION - EXPLOSIVE / INCENDIARY DEVICE,MANUFACTURE / DELIVER - AMPHETAMINES,INTERFERE WITH EMERGENCY EQUIPMENT,UNLAWFUL SALE - HANDGUN,OF AN UNBORN CHILD,GAME / AMUSEMENT DEVICE,MANUFACTURE / DELIVER - METHAMPHETAMINE,MANUFACTURE / DELIVER - SYNTHETIC MARIJUANA,"AGG. RITUAL MUTILATION - HANDS, FISTS, FEET, SERIOUS INJURY",SELL / ADVERTISE FIREWORKS,"ATTEMPT - CYCLE, SCOOTER, BIKE NO VIN",DISCLOSE DOMESTIC VIOLENCE VICTIM LOCATION,DELIVER CANNABIS TO PERSON UNDER 18,SECOND DEGREE MURDER,POSSESS - HYPODERMIC NEEDLE,"THEFT / RECOVERY - CYCLE, SCOOTER, BIKE NO VIN",DELIVER CONTROLLED SUBSTANCES TO PERSON UNDER 18,POSSESS KEYS OR DEVICE TO COIN MACHINE,POSSESS - LOOK-ALIKE DRUGS
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2001-01-01,96.0,7.0,17,28.0,15.0,5,13.0,2,1.0,11.0,2.0,2,1,4,8.0,3,2,27.0,8.0,1,3.0,4,1,1,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001-01-02,5.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0,0,0.0,0,0,2.0,0.0,0,1.0,0,0,0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001-01-03,1.0,0.0,0,0.0,0.0,0,1.0,0,0.0,0.0,0.0,1,0,1,0.0,0,0,0.0,0.0,0,0.0,0,0,0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001-01-04,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0,0.0,0,0,0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001-01-05,1.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,2,0,0,0.0,0,0,0.0,0.0,0,0.0,1,0,0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-26,0.0,0.0,1,0.0,0.0,77,0.0,2,0.0,0.0,0.0,50,2,6,0.0,4,62,0.0,0.0,3,0.0,2,7,0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-06-27,0.0,0.0,0,0.0,0.0,71,0.0,3,0.0,0.0,0.0,52,3,2,0.0,3,50,0.0,0.0,5,0.0,3,8,0,3.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-06-28,0.0,0.0,0,0.0,0.0,48,0.0,2,0.0,0.0,0.0,43,3,0,0.0,2,32,0.0,0.0,1,0.0,0,3,0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-06-29,0.0,0.0,0,0.0,0.0,55,0.0,4,0.0,0.0,0.0,51,0,8,0.0,4,41,0.0,0.0,3,0.0,2,3,0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Q1: Which district has the most crimes? Which has the least?


In [63]:
df_ts = chicago_df.groupby('District').sum()
df_ts

  df_ts = chicago_df.groupby('District').sum()


Unnamed: 0_level_0,ID,Arrest,Domestic,Beat,Ward,Latitude,Longitude,Total # of Crimes
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.0,2322496986567,78756,14318,68576416,5917976.0,12267630.0,-25673470.0,0.0
2.0,2379563209111,79518,44740,204145780,1722587.0,13554840.0,-28404100.0,0.0
3.0,2707354769568,91866,75571,117360884,3952735.0,15176540.0,-31826050.0,0.0
4.0,3085858693218,91209,65595,172476767,3324443.0,17079780.0,-35835720.0,0.0
5.0,2388726565355,79773,62122,165692687,5944355.0,13258010.0,-27866910.0,0.0
6.0,3233376140778,108357,80919,262710636,5827025.0,17625250.0,-36998990.0,0.0
7.0,3031563364512,115833,83742,299658491,6064684.0,17331860.0,-36365650.0,0.0
8.0,3566584588148,108098,70243,397398116,8089554.0,20148240.0,-42302360.0,0.0
9.0,2547125176088,94933,45144,330526061,4511038.0,14560860.0,-30526440.0,0.0
10.0,2364011945941,97085,49650,316276789,7002450.0,12958550.0,-27157270.0,0.0
