In [181]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import os

In [55]:
toprocess = 'data/weather_gov.csv'
processed = 'data/weather_gov_daily_summaries.csv'

In [120]:
df = pd.read_csv(toprocess)

In [59]:
df.columns

Index(['date', 'hr', 'time', 'temp', 'wind_speed', 'wind_gust', 'max_wind',
       'visibility_miles', 'prcp_1_hr', 'wind_direction', 'weather', 'clouds',
       'id'],
      dtype='object')

In [None]:
# I want to end up with: date, max_wind(max), high(max temp), low, prcp(sum rounded), storm_day(bool), storm_hrs(count), rain_hrs(count)

In [82]:
pt1 = df.groupby(['date'])[['max_wind','temp']].max().reset_index()

In [86]:
pt1.rename(columns={'temp': 'high'}, inplace=True)

In [88]:
pt2 = df.groupby(['date'])[['temp']].min().reset_index()
pt2.rename(columns={'temp': 'low'}, inplace=True)

In [90]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low
0,1/1/2018,14,22,7
1,1/1/2019,10,55,46
2,1/1/2020,13,52,34
3,1/1/2021,38,74,45
4,1/1/2022,41,76,57
...,...,...,...,...
2654,9/9/2020,6,89,66
2655,9/9/2021,24,82,60
2656,9/9/2022,18,87,64
2657,9/9/2023,23,85,66


In [92]:
pt2 = df.groupby(['date'])[['prcp_1_hr']].sum().reset_index()

In [96]:
pt2.rename(columns={'prcp_1_hr': 'prcp'}, inplace=True)
pt2['prcp'] = round(pt2['prcp'],2)

In [100]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low,prcp
0,1/1/2018,14,22,7,0.00
1,1/1/2019,10,55,46,0.00
2,1/1/2020,13,52,34,0.00
3,1/1/2021,38,74,45,0.37
4,1/1/2022,41,76,57,1.83
...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00
2655,9/9/2021,24,82,60,0.00
2656,9/9/2022,18,87,64,0.00
2657,9/9/2023,23,85,66,0.00


In [150]:
pt2['rain_hrs'] = 0
pt2.loc[pt2['weather'].fillna('').str.lower().str.contains('rain'), 'rain_hrs'] = 1

pt2['storm_hrs'] = 0
pt2.loc[pt2['weather'].fillna('').str.lower().str.contains('thunder'), 'storm_hrs'] = 1

In [152]:
pt2 = df.groupby(['date'])[['rain_hrs','storm_hrs']].sum().reset_index()

In [157]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low,prcp,rain_hrs,storm_hrs
0,1/1/2018,14,22,7,0.00,0,0
1,1/1/2019,10,55,46,0.00,0,0
2,1/1/2020,13,52,34,0.00,0,0
3,1/1/2021,38,74,45,0.37,6,0
4,1/1/2022,41,76,57,1.83,9,2
...,...,...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00,0,0
2655,9/9/2021,24,82,60,0.00,0,0
2656,9/9/2022,18,87,64,0.00,0,0
2657,9/9/2023,23,85,66,0.00,0,0


In [159]:
pt1['storm_day'] = pt1['storm_hrs'] > 0

In [161]:
pt1

Unnamed: 0,date,max_wind,high,low,prcp,rain_hrs,storm_hrs,storm_day
0,1/1/2018,14,22,7,0.00,0,0,False
1,1/1/2019,10,55,46,0.00,0,0,False
2,1/1/2020,13,52,34,0.00,0,0,False
3,1/1/2021,38,74,45,0.37,6,0,False
4,1/1/2022,41,76,57,1.83,9,2,True
...,...,...,...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00,0,0,False
2655,9/9/2021,24,82,60,0.00,0,0,False
2656,9/9/2022,18,87,64,0.00,0,0,False
2657,9/9/2023,23,85,66,0.00,0,0,False


In [163]:
pt1.to_csv(processed, index=False)

In [165]:
df = pd.read_excel('data/active_dispatch.xlsx')

In [167]:
df.columns

Index(['code', 'date', 'hr', 'time', 'address', 'ert_mins', 'id',
       'incident_type_name', 'tree', 'wires', 'flooding'],
      dtype='object')

In [None]:
# group by date, code, incident_type_name.  count of code

In [173]:
pt1 = df.groupby(['date','code','incident_type_name'])[['id']].count().reset_index()

In [177]:
pt1.rename(columns={'id': 'how_many'}, inplace=True)

In [179]:
pt1.to_csv('data/active_dispatch_processed.csv', index=False)

In [199]:
input_folder = 'data/mnpd_calls_for_service_processed/'
output_file = 'data/mnpd_calls_daily_counts.csv'

In [203]:
pt1 = pd.DataFrame(columns = ['date','code','how_many'])

In [205]:
files = [f for f in os.listdir(input_folder)]

print('starting...')

# processing file X of Y: filename.csv

# I want to end up with: group by date, code.  count of code

x = 1
y = len(files)

for file in files :
    print(f"\rprocessing file {x} of {y}: {file}                  ", end="")
    df = pd.read_csv(input_folder + str(file))
    pt2 = df.groupby(['date','code'])[['hr']].count().reset_index()
    pt2.rename(columns={'hr': 'how_many'}, inplace=True)
    pt1 = pd.concat([pt1, pt2])
    x += 1

pt1.to_csv(output_file, index=False)
print('done!')

starting...
processing file 8 of 8: 2025_ytd.csv                  done!


In [279]:
# merge codes with mnpd and processed active_dispatch

In [314]:
mnpd = 'data/mnpd_calls_daily_counts.csv'
codes = 'data/codes.csv'
ad = 'data/active_dispatch_processed.csv'

mnpd_w_codes = 'data/mnpd_calls_daily_counts_w_codes.csv'
ad_w_codes = 'data/active_dispatch_processed_w_codes.csv'

In [316]:
mnpd_df = pd.read_csv(mnpd)
codes_df = pd.read_csv(codes)
ad_df = pd.read_csv(ad)

In [318]:
mnpd_df.columns

Index(['date', 'code', 'how_many'], dtype='object')

In [320]:
codes_df.columns

Index(['incident_type_code', 'category', 'incident_type_name', 'tree', 'wires',
       'flooding'],
      dtype='object')

In [322]:
codes_df.rename(columns={'incident_type_code': 'code'}, inplace=True)

In [324]:
mnpd_df

Unnamed: 0,date,code,how_many
0,1/1/2018,15P,65
1,1/1/2018,16P,10
2,1/1/2018,16PJ,1
3,1/1/2018,3,17
4,1/1/2018,35P,3
...,...,...,...
193566,2025-04-02,88PW,5
193567,2025-04-02,93,47
193568,2025-04-02,93TS,2
193569,2025-04-02,95,2


In [326]:
merged_df = pd.merge(mnpd_df, codes_df, on='code', how='inner')

In [328]:
merged_df

Unnamed: 0,date,code,how_many,category,incident_type_name,tree,wires,flooding
0,1/1/2018,15P,65,15,Community Policing Activity,0,0,0
1,1/1/2018,16P,10,16,Prisoner Transport,0,0,0
2,1/1/2018,16PJ,1,16,Prisoner Transport,0,0,0
3,1/1/2018,3,17,3,Special Assignment,0,0,0
4,1/1/2018,35P,3,35,Mentally Ill Person,0,0,0
...,...,...,...,...,...,...,...,...
193566,2025-04-02,88PW,5,88,Investigate 911 Hang-Up Call,0,0,0
193567,2025-04-02,93,47,93,Traffic Violation,0,0,0
193568,2025-04-02,93TS,2,93,err,0,0,0
193569,2025-04-02,95,2,94,Personal Relief,0,0,0


In [330]:
merged_df.to_csv(mnpd_w_codes, index=False)

In [331]:
ad_df.columns

Index(['date', 'code', 'incident_type_name', 'how_many'], dtype='object')

In [334]:
merged_df = pd.merge(ad_df, codes_df, on='code', how='inner')

In [336]:
ad_df

Unnamed: 0,date,code,incident_type_name,how_many
0,2025-03-27,1000,BANK ROBBERY/HOLD UP,1
1,2025-03-27,53A,ROBERRY/HOLD UP ALARM,4
2,2025-03-27,53P,HOLD UP ROBBERY IN PROGRESS,1
3,2025-03-27,57P,FIGHT/ASSAULT,4
4,2025-03-27,57PJ,FIGHT/ASSAULT IN PROGRESS,1
...,...,...,...,...
275,2025-04-14,70P,BURGLARY-RESIDENCE BREAK-IN,2
276,2025-04-14,71A,NON-RESIDENCE-BURGLARY ALARM,29
277,2025-04-14,71P,BURGLARY-NON-RESIDENCE BREAK-IN,1
278,2025-04-14,83P,SHOTS FIRED,4


In [338]:
merged_df.rename(columns={'incident_type_name_y': 'incident_type_name'}, inplace=True)

In [340]:
merged_df = merged_df.drop('incident_type_name_x', axis=1)

In [342]:
merged_df

Unnamed: 0,date,code,how_many,category,incident_type_name,tree,wires,flooding
0,2025-03-27,1000,1,1000,Bank Robbery/Hold Up,0,0,0
1,2025-03-27,53A,4,53A,Robbery/Hold Up Alarm,0,0,0
2,2025-03-27,53P,1,53P,Hold Up/Robbery In Progress,0,0,0
3,2025-03-27,57P,4,57,Fight / Assault,0,0,0
4,2025-03-27,57PJ,1,57,Fight / Assault,0,0,0
...,...,...,...,...,...,...,...,...
275,2025-04-14,70P,2,70P,Burglary-Residence Break-In,0,0,0
276,2025-04-14,71A,29,71A,Non-Residence Burglary Alarm,0,0,0
277,2025-04-14,71P,1,71P,Burglary-Non-Residence Break-In,0,0,0
278,2025-04-14,83P,4,83,Shots Fired,0,0,0


In [344]:
merged_df.to_csv(ad_w_codes, index=False)