In [101]:
# import libraries and env variables
import requests
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()
APP_TOKEN = os.environ.get("X-App-Token")


In [102]:
# initialize parameters
start_date = '2023-11-06T00:00:00.000'
end_date = '2023-11-19T23:59:59.999'
soql_date = f"$where=date_of_occurrence between '{start_date}' and '{end_date}'"
limit = 1000
offset = 0

response = requests.get(f"https://data.cityofchicago.org/resource/x2n5-8w5q.json?"
                        f"$$app_token={APP_TOKEN}&"
                        f"$order=:id"  
                        f"&{soql_date}"
                        f"&$limit={limit}"
                        f"&$offset={offset}")

if not response.status_code==200:
    raise Exception


response_data = response.json()
crime_df = pd.json_normalize(data=response_data)

In [103]:
print(f"Distinct entries: {len(crime_df.groupby(['case_']))}")

Distinct entries: 1000


In [104]:
crime_df

Unnamed: 0,case_,date_of_occurrence,block,_iucr,_primary_decsription,_secondary_description,_location_description,arrest,domestic,beat,ward,fbi_cd,x_coordinate,y_coordinate,latitude,longitude,location.latitude,location.longitude,location.human_address
0,JG495930,2023-11-06T00:00:00.000,023XX W LELAND AVE,0820,THEFT,$500 AND UNDER,STREET,N,N,1911,47,06,1159788,1931121,41.96671217,-87.687866208,41.96671217,-87.687866208,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."
1,JG494364,2023-11-06T00:00:00.000,105XX S WALDEN PKWY,0560,ASSAULT,SIMPLE,APARTMENT,N,N,2212,19,08A,1165481,1834708,41.702025415,-87.669672767,41.702025415,-87.669672767,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."
2,JG495115,2023-11-06T00:00:00.000,074XX W BELMONT AVE,0930,MOTOR VEHICLE THEFT,THEFT / RECOVERY - AUTOMOBILE,COMMERCIAL / BUSINESS OFFICE,N,N,1631,29,07,1125766,1920416,41.937971562,-87.813201099,41.937971562,-87.813201099,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."
3,JG494379,2023-11-06T00:00:00.000,069XX S HARPER AVE,2820,OTHER OFFENSE,TELEPHONE THREAT,RESIDENCE,N,Y,332,5,08A,1187589,1859066,41.768370253,-87.587947949,41.768370253,-87.587947949,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."
4,JG493406,2023-11-06T00:00:00.000,098XX S HOXIE AVE,0910,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,N,N,431,7,07,1195083,1840238,41.71652318,-87.561098665,41.71652318,-87.561098665,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,JG495727,2023-11-07T19:00:00.000,082XX S MARYLAND AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,N,Y,631,8,08B,1183338,1850541,41.745076822,-87.603794713,41.745076822,-87.603794713,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."
996,JG495495,2023-11-07T16:52:00.000,066XX S WENTWORTH AVE,143B,WEAPONS VIOLATION,UNLAWFUL POSSESSION - OTHER FIREARM,GAS STATION,Y,N,722,6,15,1176103,1860852,41.773536673,-87.62999581,41.773536673,-87.62999581,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."
997,JG501827,2023-11-06T09:40:00.000,009XX W CULLERTON ST,0820,THEFT,$500 AND UNDER,STREET,N,N,1235,25,06,1170653,1890607,41.855307984,-87.649105748,41.855307984,-87.649105748,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."
998,JG495634,2023-11-07T18:43:00.000,007XX N DRAKE AVE,0560,ASSAULT,SIMPLE,STREET,N,N,1121,27,08A,1152581,1904742,41.894471978,-87.715064809,41.894471978,-87.715064809,"{""address"": """", ""city"": """", ""state"": """", ""zip""..."


In [105]:
# create a dataframe for specific dates with holiday column 

begin_date = "2023-01-01"
end_date = "2024-12-31" 
# ^ parameters to be set by yaml config later

date_df = pd.DataFrame({'Date':pd.date_range(start=begin_date, end=end_date)})

date_df['Day'] = date_df['Date'].dt.day
date_df['Month'] = date_df['Date'].dt.month
date_df['MonthName'] = date_df['Date'].dt.month_name()
date_df['Year'] = date_df['Date'].dt.year
date_df['DayOfWeek'] = date_df['Date'].dt.dayofweek
date_df['DayOfWeekName'] = date_df['Date'].dt.day_name()

In [106]:
holidays_df = pd.concat(map(pd.read_csv, ['raw_data/holidays/2023.csv', 'raw_data/holidays/2024.csv']))
# ^ change code above to run as for loop for each file inside the holidays folder

holidays_df = holidays_df.rename(columns={'Name': 'HolidayName'})

In [107]:
holidays_df['Date'] = pd.to_datetime(holidays_df['Date'])

In [108]:
date_merge_df = pd.merge(left=date_df, right=holidays_df, on=["Date"], how="left")

In [109]:
holidays_df

Unnamed: 0,HolidayName,Date
0,New Year's Day,2023-01-02
1,Dr. Martin Luther King Jr.'s Birthday,2023-01-16
2,Lincoln's Birthday,2023-02-13
3,Washington's Birthday,2023-02-20
4,Pulaski Day,2023-03-06
5,Memorial Day,2023-05-29
6,Juneteenth Day,2023-06-19
7,Independence Day,2023-07-04
8,Labor Day,2023-09-04
9,Columbus Day,2023-10-09
