In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import os

In [55]:
toprocess = 'data/weather_gov.csv'
processed = 'data/weather_gov_daily_summaries.csv'

In [120]:
df = pd.read_csv(toprocess)

In [59]:
df.columns

Index(['date', 'hr', 'time', 'temp', 'wind_speed', 'wind_gust', 'max_wind',
       'visibility_miles', 'prcp_1_hr', 'wind_direction', 'weather', 'clouds',
       'id'],
      dtype='object')

In [None]:
# I want to end up with: date, max_wind(max), high(max temp), low, prcp(sum rounded), storm_day(bool), storm_hrs(count), rain_hrs(count)

In [82]:
pt1 = df.groupby(['date'])[['max_wind','temp']].max().reset_index()

In [86]:
pt1.rename(columns={'temp': 'high'}, inplace=True)

In [88]:
pt2 = df.groupby(['date'])[['temp']].min().reset_index()
pt2.rename(columns={'temp': 'low'}, inplace=True)

In [90]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low
0,1/1/2018,14,22,7
1,1/1/2019,10,55,46
2,1/1/2020,13,52,34
3,1/1/2021,38,74,45
4,1/1/2022,41,76,57
...,...,...,...,...
2654,9/9/2020,6,89,66
2655,9/9/2021,24,82,60
2656,9/9/2022,18,87,64
2657,9/9/2023,23,85,66


In [92]:
pt2 = df.groupby(['date'])[['prcp_1_hr']].sum().reset_index()

In [96]:
pt2.rename(columns={'prcp_1_hr': 'prcp'}, inplace=True)
pt2['prcp'] = round(pt2['prcp'],2)

In [100]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low,prcp
0,1/1/2018,14,22,7,0.00
1,1/1/2019,10,55,46,0.00
2,1/1/2020,13,52,34,0.00
3,1/1/2021,38,74,45,0.37
4,1/1/2022,41,76,57,1.83
...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00
2655,9/9/2021,24,82,60,0.00
2656,9/9/2022,18,87,64,0.00
2657,9/9/2023,23,85,66,0.00


In [150]:
pt2['rain_hrs'] = 0
pt2.loc[pt2['weather'].fillna('').str.lower().str.contains('rain'), 'rain_hrs'] = 1

pt2['storm_hrs'] = 0
pt2.loc[pt2['weather'].fillna('').str.lower().str.contains('thunder'), 'storm_hrs'] = 1

In [152]:
pt2 = df.groupby(['date'])[['rain_hrs','storm_hrs']].sum().reset_index()

In [157]:
pt1 = pd.merge(pt1, pt2, on='date', how='inner')
pt1

Unnamed: 0,date,max_wind,high,low,prcp,rain_hrs,storm_hrs
0,1/1/2018,14,22,7,0.00,0,0
1,1/1/2019,10,55,46,0.00,0,0
2,1/1/2020,13,52,34,0.00,0,0
3,1/1/2021,38,74,45,0.37,6,0
4,1/1/2022,41,76,57,1.83,9,2
...,...,...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00,0,0
2655,9/9/2021,24,82,60,0.00,0,0
2656,9/9/2022,18,87,64,0.00,0,0
2657,9/9/2023,23,85,66,0.00,0,0


In [159]:
pt1['storm_day'] = pt1['storm_hrs'] > 0

In [161]:
pt1

Unnamed: 0,date,max_wind,high,low,prcp,rain_hrs,storm_hrs,storm_day
0,1/1/2018,14,22,7,0.00,0,0,False
1,1/1/2019,10,55,46,0.00,0,0,False
2,1/1/2020,13,52,34,0.00,0,0,False
3,1/1/2021,38,74,45,0.37,6,0,False
4,1/1/2022,41,76,57,1.83,9,2,True
...,...,...,...,...,...,...,...,...
2654,9/9/2020,6,89,66,0.00,0,0,False
2655,9/9/2021,24,82,60,0.00,0,0,False
2656,9/9/2022,18,87,64,0.00,0,0,False
2657,9/9/2023,23,85,66,0.00,0,0,False


In [163]:
pt1.to_csv(processed, index=False)

In [3]:
# group and merge the active dispatch data with the codes

In [19]:
startfile = '../data/active_dispatch.csv'
codes = '../data/codes.csv'

ad_w_codes = '../data/active_dispatch_processed_w_codes.csv'

In [21]:
df = pd.read_csv(startfile)
codes_df = pd.read_csv(codes)

In [23]:
df.columns

Index(['code', 'date', 'hr', 'time', 'address', 'ert_mins'], dtype='object')

In [25]:
# merge with codes.
# group by date, code.  count of ids for call.  mean, min, max of ert_mins.  sum the tree, wires, flooding.

In [27]:
merged_df = pd.merge(df, codes_df, on='code', how='inner')

In [29]:
pt1 = merged_df.groupby(['date','code','incident_type_name'])[['hr']].count().reset_index()

In [31]:
pt1.rename(columns={'hr': 'how_many'}, inplace=True)

In [33]:
pt2 = merged_df.groupby(['date','code'])[['ert_mins']].mean().reset_index()

In [35]:
pt2.rename(columns={'ert_mins': 'ert_mins_avg'}, inplace=True)

In [37]:
pt1 = pd.merge(pt1, pt2, on=['date','code'], how='inner')

In [39]:
pt2 = merged_df.groupby(['date','code'])[['ert_mins']].min().reset_index()

In [41]:
pt2.rename(columns={'ert_mins': 'ert_mins_min'}, inplace=True)

In [43]:
pt1 = pd.merge(pt1, pt2, on=['date','code'], how='inner')

In [45]:
pt2 = merged_df.groupby(['date','code'])[['ert_mins']].max().reset_index()

In [47]:
pt2.rename(columns={'ert_mins': 'ert_mins_max'}, inplace=True)

In [49]:
pt1 = pd.merge(pt1, pt2, on=['date','code'], how='inner')

In [51]:
pt2 = merged_df.groupby(['date','code'])[['tree','wires','flooding']].sum().reset_index()

In [53]:
pt1 = pd.merge(pt1, pt2, on=['date','code'], how='inner')

In [55]:
pt1

Unnamed: 0,date,code,incident_type_name,how_many,ert_mins_avg,ert_mins_min,ert_mins_max,tree,wires,flooding
0,2025-03-27,1000,Bank Robbery/Hold Up,1,280.00000,280,280,0,0,0
1,2025-03-27,53A,Robbery/Hold Up Alarm,4,42.00000,16,95,0,0,0
2,2025-03-27,53P,Hold Up/Robbery In Progress,1,258.00000,258,258,0,0,0
3,2025-03-27,57P,Fight / Assault,4,135.50000,57,230,0,0,0
4,2025-03-27,57PJ,Fight / Assault,1,171.00000,171,171,0,0,0
...,...,...,...,...,...,...,...,...,...,...
391,2025-04-22,71A,Non-Residence Burglary Alarm,32,51.28125,7,287,0,0,0
392,2025-04-22,71P,Burglary-Non-Residence Break-In,4,164.00000,61,360,0,0,0
393,2025-04-22,83P,Shots Fired,5,60.00000,8,116,0,0,0
394,2025-04-22,87W,Safety Hazard,1,5.00000,5,5,0,1,0


In [57]:
pt1.to_csv(ad_w_codes, index=False)

In [59]:
input_folder = '../data/mnpd_calls_for_service_processed/'
codes = '../data/codes.csv'

mnpd_w_codes = '../data/mnpd_calls_daily_counts_w_codes.csv'
mnpd_no_codes = '../data/mnpd_calls_daily_counts_no_codes.csv'

In [61]:
final_df = pd.DataFrame(columns = ['date','code','incident_type_name','how_many','tree','wires','flooding'])

In [63]:
files = [f for f in os.listdir(input_folder)]

print('starting...')

# processing file X of Y: filename.csv

x = 1
y = len(files)

for file in files :
    print(f"\rprocessing file {x} of {y}: {file}                  ", end="")
    df = pd.read_csv(input_folder + str(file))

    merged_df = pd.merge(df, codes_df, on='code', how='inner')
    # use the category column as new code column
    merged_df = merged_df.drop(columns='code')
    merged_df.rename(columns={'category': 'code'}, inplace=True)
    
    pt1 = merged_df.groupby(['date','code','incident_type_name'])[['hr']].count().reset_index()
    pt1.rename(columns={'hr': 'how_many'}, inplace=True)
    pt2 = merged_df.groupby(['date','code'])[['tree','wires','flooding']].sum().reset_index()
    merged_df = pd.merge(pt1, pt2, on=['date','code'], how='inner')

    final_df = pd.concat([final_df, merged_df])
    x += 1

final_df.to_csv(mnpd_w_codes, index=False)
print()
print('done!')

starting...
processing file 8 of 8: 2025_ytd.csv                  
done!


In [64]:
final_df = final_df.groupby(['date'])[['how_many','tree','wires','flooding']].sum().reset_index()

In [65]:
final_df.to_csv(mnpd_no_codes, index=False)