In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import os

In [55]:
toprocess = 'data/weather_gov.csv'
processed = 'data/weather_gov_daily_summaries.csv'

In [120]:
df = pd.read_csv(toprocess)

In [59]:
df.columns

Index(['date', 'hr', 'time', 'temp', 'wind_speed', 'wind_gust', 'max_wind',
       'visibility_miles', 'prcp_1_hr', 'wind_direction', 'weather', 'clouds',
       'id'],
      dtype='object')

In [None]:
# I want to end up with: date, max_wind(max), high(max temp), low, prcp(sum rounded), storm_day(bool), storm_hrs(count), rain_hrs(count)

In [82]:
pt1 = df.groupby(['date'])[['max_wind','temp']].max().reset_index()

In [86]:
pt1.rename(columns={'temp': 'high'}, inplace=True)

In [88]:
pt2 = df.groupby(['date'])[['temp']].min().reset_index()
pt2.rename(columns={'temp': 'low'}, inplace=True)

In [90]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low
0,1/1/2018,14,22,7
1,1/1/2019,10,55,46
2,1/1/2020,13,52,34
3,1/1/2021,38,74,45
4,1/1/2022,41,76,57
...,...,...,...,...
2654,9/9/2020,6,89,66
2655,9/9/2021,24,82,60
2656,9/9/2022,18,87,64
2657,9/9/2023,23,85,66


In [92]:
pt2 = df.groupby(['date'])[['prcp_1_hr']].sum().reset_index()

In [96]:
pt2.rename(columns={'prcp_1_hr': 'prcp'}, inplace=True)
pt2['prcp'] = round(pt2['prcp'],2)

In [100]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low,prcp
0,1/1/2018,14,22,7,0.00
1,1/1/2019,10,55,46,0.00
2,1/1/2020,13,52,34,0.00
3,1/1/2021,38,74,45,0.37
4,1/1/2022,41,76,57,1.83
...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00
2655,9/9/2021,24,82,60,0.00
2656,9/9/2022,18,87,64,0.00
2657,9/9/2023,23,85,66,0.00


In [150]:
pt2['rain_hrs'] = 0
pt2.loc[pt2['weather'].fillna('').str.lower().str.contains('rain'), 'rain_hrs'] = 1

pt2['storm_hrs'] = 0
pt2.loc[pt2['weather'].fillna('').str.lower().str.contains('thunder'), 'storm_hrs'] = 1

In [152]:
pt2 = df.groupby(['date'])[['rain_hrs','storm_hrs']].sum().reset_index()

In [157]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low,prcp,rain_hrs,storm_hrs
0,1/1/2018,14,22,7,0.00,0,0
1,1/1/2019,10,55,46,0.00,0,0
2,1/1/2020,13,52,34,0.00,0,0
3,1/1/2021,38,74,45,0.37,6,0
4,1/1/2022,41,76,57,1.83,9,2
...,...,...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00,0,0
2655,9/9/2021,24,82,60,0.00,0,0
2656,9/9/2022,18,87,64,0.00,0,0
2657,9/9/2023,23,85,66,0.00,0,0


In [159]:
pt1['storm_day'] = pt1['storm_hrs'] > 0

In [161]:
pt1

Unnamed: 0,date,max_wind,high,low,prcp,rain_hrs,storm_hrs,storm_day
0,1/1/2018,14,22,7,0.00,0,0,False
1,1/1/2019,10,55,46,0.00,0,0,False
2,1/1/2020,13,52,34,0.00,0,0,False
3,1/1/2021,38,74,45,0.37,6,0,False
4,1/1/2022,41,76,57,1.83,9,2,True
...,...,...,...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00,0,0,False
2655,9/9/2021,24,82,60,0.00,0,0,False
2656,9/9/2022,18,87,64,0.00,0,0,False
2657,9/9/2023,23,85,66,0.00,0,0,False


In [163]:
pt1.to_csv(processed, index=False)

In [180]:
# group and merge the active dispatch data with the codes

In [259]:
startfile = '../data/active_dispatch.csv'
codes = '../data/codes.csv'

ad_w_codes = '../data/active_dispatch_processed_w_codes.csv'

In [261]:
df = pd.read_csv(startfile)
codes_df = pd.read_csv(codes)

In [263]:
df.columns

Index(['code', 'date', 'hr', 'time', 'address', 'ert_mins', 'id'], dtype='object')

In [265]:
# merge with codes.
# group by date, code.  count of ids for call.  mean, min, max of ert_mins.  sum the tree, wires, flooding.

In [267]:
merged_df = pd.merge(df, codes_df, on='code', how='inner')

In [269]:
pt1 = merged_df.groupby(['date','code','incident_type_name'])[['id']].count().reset_index()

In [271]:
pt1.rename(columns={'id': 'how_many'}, inplace=True)

In [273]:
pt2 = merged_df.groupby(['date','code'])[['ert_mins']].mean().reset_index()

In [275]:
pt2.rename(columns={'ert_mins': 'ert_mins_avg'}, inplace=True)

In [277]:
pt1 = pd.merge(pt1, pt2, on=['date','code'], how='inner')

In [279]:
pt2 = merged_df.groupby(['date','code'])[['ert_mins']].min().reset_index()

In [281]:
pt2.rename(columns={'ert_mins': 'ert_mins_min'}, inplace=True)

In [283]:
pt1 = pd.merge(pt1, pt2, on=['date','code'], how='inner')

In [285]:
pt2 = merged_df.groupby(['date','code'])[['ert_mins']].max().reset_index()

In [287]:
pt2.rename(columns={'ert_mins': 'ert_mins_max'}, inplace=True)

In [289]:
pt1 = pd.merge(pt1, pt2, on=['date','code'], how='inner')

In [291]:
pt2 = merged_df.groupby(['date','code'])[['tree','wires','flooding']].sum().reset_index()

In [293]:
pt1 = pd.merge(pt1, pt2, on=['date','code'], how='inner')

In [295]:
pt1

Unnamed: 0,date,code,incident_type_name,how_many,ert_mins_avg,ert_mins_min,ert_mins_max,tree,wires,flooding
0,3/27/2025,1000,Bank Robbery/Hold Up,1,280.0,280,280,0,0,0
1,3/27/2025,53A,Robbery/Hold Up Alarm,4,42.0,16,95,0,0,0
2,3/27/2025,53P,Hold Up/Robbery In Progress,1,258.0,258,258,0,0,0
3,3/27/2025,57P,Fight / Assault,4,135.5,57,230,0,0,0
4,3/27/2025,57PJ,Fight / Assault,1,171.0,171,171,0,0,0
...,...,...,...,...,...,...,...,...,...,...
275,4/9/2025,87B,Safety Hazard,1,9.0,9,9,1,1,0
276,4/9/2025,87PR,Safety Hazard,1,51.0,51,51,0,0,0
277,4/9/2025,87T,Safety Hazard,3,81.0,9,207,3,0,0
278,4/9/2025,87W,Safety Hazard,1,27.0,27,27,0,1,0


In [297]:
pt1.to_csv(ad_w_codes, index=False)

In [362]:
input_folder = '../data/mnpd_calls_for_service_processed/'
codes = '../data/codes.csv'

mnpd_w_codes = '../data/mnpd_calls_daily_counts_w_codes.csv'
mnpd_no_codes = '../data/mnpd_calls_daily_counts_no_codes.csv'

In [364]:
final_df = pd.DataFrame(columns = ['date','code','incident_type_name','how_many','tree','wires','flooding'])

In [366]:
files = [f for f in os.listdir(input_folder)]

print('starting...')

# processing file X of Y: filename.csv

x = 1
y = len(files)

for file in files :
    print(f"\rprocessing file {x} of {y}: {file}                  ", end="")
    df = pd.read_csv(input_folder + str(file))

    merged_df = pd.merge(df, codes_df, on='code', how='inner')
    # use the category column as new code column
    merged_df = merged_df.drop(columns='code')
    merged_df.rename(columns={'category': 'code'}, inplace=True)
    
    pt1 = merged_df.groupby(['date','code','incident_type_name'])[['hr']].count().reset_index()
    pt1.rename(columns={'hr': 'how_many'}, inplace=True)
    pt2 = merged_df.groupby(['date','code'])[['tree','wires','flooding']].sum().reset_index()
    merged_df = pd.merge(pt1, pt2, on=['date','code'], how='inner')

    final_df = pd.concat([final_df, merged_df])
    x += 1

final_df.to_csv(mnpd_w_codes, index=False)
print()
print('done!')

starting...
processing file 8 of 8: 2025_ytd.csv                  
done!


In [367]:
final_df = final_df.groupby(['date'])[['how_many','tree','wires','flooding']].sum().reset_index()

In [368]:
final_df.to_csv(mnpd_no_codes, index=False)

In [78]:
mnpd = '../data/mnpd_calls_daily_counts.csv'

ad = '../data/active_dispatch_processed.csv'


ad_w_codes = '../data/active_dispatch_processed_w_codes.csv'

In [80]:
mnpd_df = pd.read_csv(mnpd)
codes_df = pd.read_csv(codes)
ad_df = pd.read_csv(ad)

In [82]:
mnpd_df.columns

Index(['date', 'code', 'how_many'], dtype='object')

In [84]:
codes_df.columns

Index(['incident_type_code', 'category', 'incident_type_name', 'tree', 'wires',
       'flooding'],
      dtype='object')

In [86]:
codes_df.rename(columns={'incident_type_code': 'code'}, inplace=True)

In [88]:
mnpd_df

Unnamed: 0,date,code,how_many
0,1/1/2018,15P,65
1,1/1/2018,16P,10
2,1/1/2018,16PJ,1
3,1/1/2018,3,17
4,1/1/2018,35P,3
...,...,...,...
193566,2025-04-02,88PW,5
193567,2025-04-02,93,47
193568,2025-04-02,93TS,2
193569,2025-04-02,95,2


In [90]:
merged_df = pd.merge(mnpd_df, codes_df, on='code', how='inner')

In [92]:
merged_df

Unnamed: 0,date,code,how_many,category,incident_type_name,tree,wires,flooding
0,1/1/2018,15P,65,15,Community Policing Activity,0,0,0
1,1/1/2018,16P,10,16,Prisoner Transport,0,0,0
2,1/1/2018,16PJ,1,16,Prisoner Transport,0,0,0
3,1/1/2018,3,17,3,Special Assignment,0,0,0
4,1/1/2018,35P,3,35,Mentally Ill Person,0,0,0
...,...,...,...,...,...,...,...,...
193566,2025-04-02,88PW,5,88,Investigate 911 Hang-Up Call,0,0,0
193567,2025-04-02,93,47,93,Traffic Violation,0,0,0
193568,2025-04-02,93TS,2,93,err,0,0,0
193569,2025-04-02,95,2,94,Personal Relief,0,0,0


In [94]:
merged_df.to_csv(mnpd_w_codes, index=False)

In [95]:
ad_df.columns

Index(['date', 'code', 'incident_type_name', 'how_many', 'ert_mins_avg',
       'ert_mins_min', 'ert_mins_max'],
      dtype='object')

In [96]:
merged_df = pd.merge(ad_df, codes_df, on='code', how='inner')

In [100]:
ad_df

Unnamed: 0,date,code,incident_type_name,how_many,ert_mins_avg,ert_mins_min,ert_mins_max
0,2025-03-27,1000,BANK ROBBERY/HOLD UP,1,280.000000,280,280
1,2025-03-27,53A,ROBERRY/HOLD UP ALARM,4,42.000000,16,95
2,2025-03-27,53P,HOLD UP ROBBERY IN PROGRESS,1,258.000000,258,258
3,2025-03-27,57P,FIGHT/ASSAULT,4,135.500000,57,230
4,2025-03-27,57PJ,FIGHT/ASSAULT IN PROGRESS,1,171.000000,171,171
...,...,...,...,...,...,...,...
275,2025-04-14,70P,BURGLARY-RESIDENCE BREAK-IN,2,20.500000,15,26
276,2025-04-14,71A,NON-RESIDENCE-BURGLARY ALARM,29,64.413793,10,200
277,2025-04-14,71P,BURGLARY-NON-RESIDENCE BREAK-IN,1,64.000000,64,64
278,2025-04-14,83P,SHOTS FIRED,4,115.250000,47,188


In [102]:
merged_df.rename(columns={'incident_type_name_y': 'incident_type_name'}, inplace=True)

In [104]:
merged_df = merged_df.drop('incident_type_name_x', axis=1)

In [106]:
merged_df

Unnamed: 0,date,code,how_many,ert_mins_avg,ert_mins_min,ert_mins_max,category,incident_type_name,tree,wires,flooding
0,2025-03-27,1000,1,280.000000,280,280,1000,Bank Robbery/Hold Up,0,0,0
1,2025-03-27,53A,4,42.000000,16,95,53A,Robbery/Hold Up Alarm,0,0,0
2,2025-03-27,53P,1,258.000000,258,258,53P,Hold Up/Robbery In Progress,0,0,0
3,2025-03-27,57P,4,135.500000,57,230,57,Fight / Assault,0,0,0
4,2025-03-27,57PJ,1,171.000000,171,171,57,Fight / Assault,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
275,2025-04-14,70P,2,20.500000,15,26,70P,Burglary-Residence Break-In,0,0,0
276,2025-04-14,71A,29,64.413793,10,200,71A,Non-Residence Burglary Alarm,0,0,0
277,2025-04-14,71P,1,64.000000,64,64,71P,Burglary-Non-Residence Break-In,0,0,0
278,2025-04-14,83P,4,115.250000,47,188,83,Shots Fired,0,0,0


In [108]:
merged_df.to_csv(ad_w_codes, index=False)

In [None]:
# summarize the daily counts

In [122]:
mnpd = '../data/mnpd_calls_daily_counts.csv'
daily_mnpd = '../data/mnpd_calls_daily_counts_no_codes.csv'

In [124]:
daily_mnpd_df = pd.read_csv(mnpd)

In [126]:
daily_mnpd_df.columns

Index(['date', 'code', 'how_many'], dtype='object')

In [128]:
daily_mnpd_df

Unnamed: 0,date,code,how_many
0,1/1/2018,15P,65
1,1/1/2018,16P,10
2,1/1/2018,16PJ,1
3,1/1/2018,3,17
4,1/1/2018,35P,3
...,...,...,...
193566,2025-04-02,88PW,5
193567,2025-04-02,93,47
193568,2025-04-02,93TS,2
193569,2025-04-02,95,2


In [130]:
daily_mnpd_df = daily_mnpd_df.groupby(['date'])[['how_many']].sum().reset_index()

In [138]:
daily_mnpd_df.to_csv(daily_mnpd, index=False)