In [233]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from scipy import stats

## Data Cleaning

In [234]:
raw_data = pd.read_csv("../data/CAD.csv")
raw_data.head(3)

Unnamed: 0.1,Unnamed: 0,IncidentNumber,Call_Created_Time,Call_First_Dispatched_Time,Call_First_On_Scene,Call_Cleared,Call_Zipcode,Beat,Call_Source,Call_Priority,InitialIncidentTypeDescription,IsPrimary,PrimaryUnitCallSign,RespondingUnitCallSign,Unit_Dispatched_Time,Unit_OnScene_Time,Unit_Cleared_Time,Disposition
0,1,OR-2016-01-01-16000001,1/1/16 0:00,1/1/16 0:04,1/1/16 0:09,1/1/16 0:54,97402.0,EP05,E911,3,ASSAULT,1,5.0000000000000004e+57,5E57,1/1/16 0:04,1/1/16 0:09,1/1/16 0:54,ADVISED
1,2,OR-2016-01-01-16000001,1/1/16 0:00,1/1/16 0:04,1/1/16 0:09,1/1/16 0:54,97402.0,EP05,E911,3,ASSAULT,0,5.0000000000000004e+57,4X40,1/1/16 0:09,1/1/16 0:09,1/1/16 0:46,ADVISED
2,3,OR-2016-01-01-16000001,1/1/16 0:00,1/1/16 0:04,1/1/16 0:09,1/1/16 0:54,97402.0,EP05,E911,3,ASSAULT,0,5.0000000000000004e+57,4E53,1/1/16 0:04,1/1/16 0:12,1/1/16 0:51,ADVISED


## Column Selection & Responder Scubbing

In [235]:
# Grabbing needed columns
data = raw_data[['Call_Priority', 'InitialIncidentTypeDescription',
            'RespondingUnitCallSign', 'Unit_Dispatched_Time', 'Unit_OnScene_Time']].copy()

# Dropping incomplete rows
print(f"Initial rows: {len(data)}")
data.dropna(inplace=True)
print(f"After dropping NA: {len(data)}")

data['RespondingUnitCallSign'] = data['RespondingUnitCallSign'].str.upper()

replace_pattern = r'(.J..|CAHOOT|CAHOT|CAHO.*)'
data['RespondingUnitCallSign'] = data['RespondingUnitCallSign'].str.replace(
    replace_pattern, 'CAHOOTS', regex=True
)


data['RespondingUnitCallSign'] = data['RespondingUnitCallSign'].apply(
    lambda x: 'CAHOOTS' if 'CAHOOT' in x or 'CAHOT' in x else x
)
def replace_other(x): 
    if x == 'CAHOOTS':
        return 'CAHOOTS'
    return 'EPD'

data['RespondingUnitCallSign'] = data['RespondingUnitCallSign'].apply(replace_other)
epd = data[data['RespondingUnitCallSign'] == 'EPD']
cahoots = data[data['RespondingUnitCallSign'].str.contains('CAHOOTS', case=False, na=False)]
epd_calls = epd['InitialIncidentTypeDescription'].unique()
cah_calls = cahoots['InitialIncidentTypeDescription'].unique()
same_call = np.intersect1d(cah_calls, epd_calls)
data = data[data['InitialIncidentTypeDescription'].isin(same_call)] if len(same_call) > 0 else data

Initial rows: 1048575
After dropping NA: 708301


In [236]:
data['Unit_Dispatched_Time'] = pd.to_datetime(data['Unit_Dispatched_Time'], errors='coerce')
data['Unit_OnScene_Time'] = pd.to_datetime(data['Unit_OnScene_Time'], format="%m/%d/%y %H:%M", errors='coerce')


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



## Varible Creation

In [251]:
def get_time_seg(data):
    """
    """
    hour = data.hour
    if 5 <= hour < 12:
        return "Morning"
    elif 12 <= hour < 17:
        return "Afternoon"
    elif 17 <= hour < 20:
        return "Evening"
    else: 
        return "Night"
data['Response_Time'] = (data['Unit_OnScene_Time'] - data['Unit_Dispatched_Time']).dt.total_seconds()/60
data['Day_Of_Week'] = data['Unit_Dispatched_Time'].dt.day_of_week
data['Time_Of_Day'] = data['Unit_Dispatched_Time'].apply(get_time_seg)
name_map = {0:"Sunday", 1:"Monday", 2:"Tuseday", 3:"Wednesday", 4:"Thursday", 5:"Friday", 6:"Saturday"}
data['Day_of_Week'] = data['Day_Of_Week'].map(name_map)

In [238]:
def categorize_incident(desc):
    desc = desc.upper()
    violent_keywords = ['ASSAULT', 'ROBBERY', 'RAPE', 'STAB', 'GUNSHOT', 'KIDNAP', 'ARMED', 'MENACING', 'WEAPON', 'ATTEMPT TO LOCATE DRUNK DRIVER', 'SHOTS FIRED', 'PURSUIT']
    traffic_keywords = ['TRAFFIC', 'DUII', 'RECKLESS DRIVING', 'HIT AND RUN', 'ACCIDENT', 'VEHICLE', 'MOTOR VEH', 'VEHICLE/PEDESTRIAN CRASH', 'TRAIN VS PED/BIKE CRASH', 'WRONG WAY DRIVER', 'RECKLESS ENDANGERING', 'RECKLESS BURNING', 'BLOCKED ALLEY', 'TRAFFIC HAZARD', 'PURSUIT']
    mental_medical_keywords = ['MENTAL','SUBJECT SCREAMING', 'SUICIDE', 'OVERDOSE','SUBJECT DOWN', 'CARDIAC ARREST', 'INJURED', 'SUICIDAL', 'DETOXIFICATION', 'ILL SUBJECT', 'DISORIENTED', 'DISORDERLY MEDICAL TRANSPORT', 'DECEASED SUBJECT', 'DEATH MESSAGE', 'MENTAL TRANSPORT', 'POISONING', 'INJURED SUBJECT', 'STAB WOUND', 'GUNSHOT WOUND', 'IN POSSESSION OF NARCOTICS','CHECK WELFARE']
    theft_keywords = ['THEFT', 'BURGLARY', 'CRIMINAL TRESPASS', 'SHOPLIFT', 'FRAUD', 'UNLAWFUL VEHICLE ENTRY', 'LOCATION STOLEN VEHICLE', 'RECOVERED STOLEN VEHICLE', 'LOCATION STOLEN PROPERTY', 'THEFT OF SERVICES', 'THEFT OF BICYCLE', 'THEFT OF IDENTITY', 'FOUND PROPERTY', 'FOUND CONTRABAND']
    disorder_keywords = ['DISORDERLY', 'DISPUTE', 'NOISE', 'ILLEGAL CAMPING', 'ILLEGAL DUMPING', 'BLOCKED SIDEWALK', 'VIOLATION', 'CIVIL STANDBY', 'PUBLIC INDECENCY', 'VIOLATION OF RESTRAINING ORDER', 'VIOLATION OF CITY ORDINANCE', 'MINOR IN POSSESSION', 'ALARM PANIC', 'ALARM DURESS']
    animal_keywords = ['ANIMAL', 'DOG', 'CAT', 'LIVESTOCK', 'DOG BITE', 'ANIMAL CRUELTY', 'ANIMAL ABUSE', 'ANIMAL ATTACK', 'FOUND ANIMAL', 'INJURED ANIMAL', 'ABANDONED ANIMAL', 'DOG VICIOUS']
    assistance_keywords = ['ASSIST', 'REQUEST', 'INFORMATION', 'CITIZEN CONTACT', 'FOLLOW UP', 'PATROL CHECK', 'BEAT INFORMATION', 'REQUEST COVER', 'REQUEST DRE', 'REQUEST ASSISTANCE', 'ASSIST FIRE DEPARTMENT', 'ASSIST OUTSIDE AGENCY', 'ASSIST SHERIFFS OFFICE', 'ASSIST MOTORIST', 'ASSIST PUBLIC WORKS', 'OFFICER SAFETY INFO', 'POLICE OFFICER HOLD']
    suspicious_keywords = ['SUSPICIOUS', 'LOCATION', 'IMPERSONATING', 'LOCATE', 'FOLLOW UP', 'ATTEMPT TO LOCATE', 'LOCATION WANTED SUBJECT', 'LOCATION RUNAWAY']
    abuse_keywords = ['CHILD ABUSE', 'ELDERLY ABUSE', 'CUSTODIAL INTERFERENCE', 'HARASSMENT', 'STALKING COMPLAINT', 'SEX ABUSE', 'VIOLATION OF RESTRAINING ORDER']
    misc_keywords = ['UNKNOWN PROBLEM', 'INCOMPLETE CALL', 'CITIZEN CONTACT', 'FOOT PATROL', 'DEAD ANIMAL', 'TRASH BIN FIRE', 'OPEN DOOR', 'OPEN CONTAINER', 'FLAGGED DOWN', 'EMERGENCY MESSAGE', 'BLOCKED ALLEY', 'LOST SUBJECT', 'WATER RESCUE', 'NUDE SUBJECT', 'INFORMATION- BOTH PD AND FD', 'POLICE OFFICER HOLD', 'TRAIN VS PED/BIKE CRASH', 'WARRANT SERVICE', 'SHOTS FIRED']
    
    if any(word in desc for word in violent_keywords): return 'Violent Crime'
    if any(word in desc for word in traffic_keywords): return 'Traffic & Vehicle'
    if any(word in desc for word in mental_medical_keywords): return 'Mental Health & Medical'
    if any(word in desc for word in theft_keywords): return 'Theft & Property'
    if any(word in desc for word in disorder_keywords): return 'Disorder & Public Safety'
    if any(word in desc for word in animal_keywords): return 'Animal-related'
    if any(word in desc for word in assistance_keywords): return 'Public Assistance'
    if any(word in desc for word in suspicious_keywords): return 'Suspicious / Investigative'
    if any(word in desc for word in abuse_keywords): return 'Abuse & Welfare'
    if any(word in desc for word in misc_keywords): return 'Miscellaneous'
    return 'Other / Unclassified'

data['IncidentCategory'] = data['InitialIncidentTypeDescription'].apply(categorize_incident)

In [239]:
len(data['IncidentCategory'].unique())

11

# Analysis

In [240]:
# Call Type
call = data[['IncidentCategory', 'Response_Time', 'RespondingUnitCallSign']]
call_grouped = call.groupby(['IncidentCategory', 'RespondingUnitCallSign']).agg(
    Mean_Response=('Response_Time', 'mean'),
    Std_Response=('Response_Time', 'std'),  
    Count=('Response_Time', 'count')
).reset_index()

call_grouped['Std_Error'] = call_grouped['Std_Response'] / np.sqrt(call_grouped['Count'])

# Time of Day 
tod = data[['Time_Of_Day', 'Response_Time', 'RespondingUnitCallSign']]
tod_grouped = tod.groupby(['Time_Of_Day', 'RespondingUnitCallSign']).agg(
    Mean_Response=('Response_Time', 'mean'),
    Std_Response=('Response_Time', 'std'),  
    Count=('Response_Time', 'count')
).reset_index()

tod_grouped['Std_Error'] = tod_grouped['Std_Response'] / np.sqrt(tod_grouped['Count'])

# Day of Week
dow = data[['Day_Of_Week', 'Response_Time', 'RespondingUnitCallSign']]
name_map = {0:"Sunday", 1:"Monday", 2:"Tuseday", 3:"Wednesday", 4:"Thursday", 5:"Friday", 6:"Saturday"}
dow['Day_Of_Week'] = dow['Day_Of_Week'].map(name_map)
dow_grouped = dow.groupby(['Day_Of_Week', 'RespondingUnitCallSign']).agg(
    Mean_Response=('Response_Time', 'mean'),
    Std_Response=('Response_Time', 'std'),  
    Count=('Response_Time', 'count')
).reset_index()

dow_grouped['Std_Error'] = dow_grouped['Std_Response'] / np.sqrt(dow_grouped['Count'])


# Call Priority
priority = data[['Call_Priority', 'Response_Time', 'RespondingUnitCallSign']]
priority_grouped = priority.groupby(['Call_Priority', 'RespondingUnitCallSign']).agg(
    Mean_Response=('Response_Time', 'mean'),
    Std_Response=('Response_Time', 'std'),  
    Count=('Response_Time', 'count')
).reset_index()

priority_grouped['Std_Error'] = priority_grouped['Std_Response'] / np.sqrt(priority_grouped['Count'])




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [269]:
priority_grouped

Unnamed: 0,Call_Priority,RespondingUnitCallSign,Mean_Response,Std_Response,Count,Std_Error
0,1,CAHOOTS,12.176271,12.701743,3540,0.213482
1,1,EPD,8.91186,112.659228,42455,0.546767
2,2,CAHOOTS,9.758621,5.927368,58,0.778302
3,2,EPD,7.364493,11.309273,5627,0.150764
4,3,CAHOOTS,11.295024,12.416025,10772,0.119628
5,3,EPD,8.69501,17.983701,267110,0.034796
6,4,CAHOOTS,13.406504,13.198539,246,0.841508
7,4,EPD,15.704565,31.624719,33212,0.173532
8,5,CAHOOTS,11.477089,11.140325,9646,0.113429
9,5,EPD,10.532405,39.041461,33405,0.213609


In [285]:
tables = (priority_grouped, tod_grouped, dow_grouped, call_grouped)

id = ['Priority', 'Time of Day', 'Day of Week', 'Call Type']
diff_table = {'Factor': [], 'var': [], 'Diff': []}

for df, key in zip(tables, id):
    for subset in df.iloc[:,0].unique():
        sub = df[df.iloc[:,0] == subset]
        diff = sub['Mean_Response'].iloc[0] - sub['Mean_Response'].iloc[1] 
        var = subset
        table = key

        diff_table['Factor'].append(table)
        diff_table['var'].append(var)
        diff_table['Diff'].append(diff)
diff_table = pd.DataFrame(diff_table)

diff_table

Unnamed: 0,Factor,var,Diff
0,Priority,1,3.264412
1,Priority,2,2.394128
2,Priority,3,2.600015
3,Priority,4,-2.298061
4,Priority,5,0.944684
5,Priority,6,4.45535
6,Priority,7,5.900541
7,Priority,8,2.611052
8,Priority,9,-0.96711
9,Priority,P,-1.037538


In [241]:
varaibles = [priority, call, tod, dow]
id = ['Priority', 'Call Type', 'Time of Day', 'Day of Week']
results = {'id': [], 'var': [], 't': [], 'p': []}

for df, key in zip(varaibles, id):
    for subset in df.iloc[:,0].unique():
        sub = df[df.iloc[:,0] == subset]
        cahoots = sub[sub['RespondingUnitCallSign'] == 'CAHOOTS']
        epd = sub[sub['RespondingUnitCallSign'] == 'EPD']

        t, p = stats.ttest_ind(cahoots['Response_Time'], epd['Response_Time'])
        results['id'].append(key)
        results['var'].append(subset)
        results['t'].append(t)
        results['p'].append(p)

results = pd.DataFrame.from_dict(results)
results['Sig'] = results['p'] <= 0.05

grey = results[results['Sig']== False]['var'].to_numpy()
results


Unnamed: 0,id,var,t,p,Sig
0,Priority,3,14.86312,5.978799e-50,True
1,Priority,6,9.393613,5.856312e-21,True
2,Priority,1,1.72308,0.08488076,False
3,Priority,P,-2.368587,0.01786028,True
4,Priority,7,11.591473,5.101464e-31,True
5,Priority,5,2.349007,0.01882803,True
6,Priority,4,-1.138983,0.2547186,False
7,Priority,2,1.60985,0.1074862,False
8,Priority,8,1.819236,0.06902421,False
9,Priority,9,-0.246382,0.805405,False


In [246]:
results['GRAPH'] = results.shape[0] * [1]
fig = px.sunburst(
    results, 
    path = ['id', 'var'],
    values = 'GRAPH',
    color = 'Sig',

    title= "Statistical Significance of Diffrerences in Response Times"
)
fig.write_image("../plots/Sunburst.png")

# General

In [265]:
color_map = {'EPD': 'lightblue', 'CAHOOTS': 'salmon'}
general = data[['Call_Priority', 'InitialIncidentTypeDescription', 'Day_Of_Week', 'RespondingUnitCallSign', 'Response_Time', 'Time_Of_Day']].melt(
    id_vars =('RespondingUnitCallSign', 'Response_Time'),).drop(columns =  {'value'}).groupby(['RespondingUnitCallSign', 'variable']).mean().reset_index()
fig = px.bar(general,
              x='variable',
                y='Response_Time',
                  color_discrete_map=color_map,
                    barmode='group',
                      title = 'Average Response Time by Category',

                       color = 'RespondingUnitCallSign',)
fig.write_image('../plots/General.png')
fig

### Priority

In [286]:
color_map = {'EPD': 'lightblue', 'CAHOOTS': 'salmon'}

fig = px.bar(
    priority_grouped, 
    x='Call_Priority', 
    y='Mean_Response', 
    color='RespondingUnitCallSign',
    error_y='Std_Error',  
    barmode='group',
    color_discrete_map=color_map,
    opacity=0.96,
    labels={'Mean_Response': 'Average Response Time (Minutes)'}
)
fig.update_traces(error_y_color='white')
# Customize layout
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)', 
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis={'type': 'category'},
    font=dict(color='white'),
    yaxis_title='Average Response Time (Minutes)',
    xaxis_title='Priority',  
    legend_title='Responding Unit',
    title="Average Response Time by Priority(±1 SD)",
    title_x=0.45,
    yaxis_range = [0,17],
    showlegend = True,
)
num_bars = len(fig.data[0].x)  


for trace in fig.data:
    trace.marker.color = [
        'grey' if day in grey else color_map[trace.name]
        for day in trace.x]


fig.add_trace(go.Bar(
     x=[None], y=[None],
     name='CAHOOTS',
     marker=dict(color='salmon'),
     showlegend=True,
     hoverinfo='skip'
))
fig.add_trace(go.Bar(
     x=[None], y=[None],
     name='EPD',
     marker=dict(color='lightblue'),
     showlegend=True,
     hoverinfo='skip'
))
fig.write_image("../plots/Priority.png")

fig.show()

priority_grouped

Unnamed: 0,Call_Priority,RespondingUnitCallSign,Mean_Response,Std_Response,Count,Std_Error
0,1,CAHOOTS,12.176271,12.701743,3540,0.213482
1,1,EPD,8.91186,112.659228,42455,0.546767
2,2,CAHOOTS,9.758621,5.927368,58,0.778302
3,2,EPD,7.364493,11.309273,5627,0.150764
4,3,CAHOOTS,11.295024,12.416025,10772,0.119628
5,3,EPD,8.69501,17.983701,267110,0.034796
6,4,CAHOOTS,13.406504,13.198539,246,0.841508
7,4,EPD,15.704565,31.624719,33212,0.173532
8,5,CAHOOTS,11.477089,11.140325,9646,0.113429
9,5,EPD,10.532405,39.041461,33405,0.213609


## Day of Week

In [289]:
color_map = {'EPD': 'lightblue', 'CAHOOTS': 'salmon'}

fig = px.bar(
    dow_grouped, 
    x='Day_Of_Week', 
    y='Mean_Response', 
    color='RespondingUnitCallSign',
    error_y='Std_Error',  
    barmode='group',
    category_orders = {
        "Day_Of_Week": ["Sunday", "Monday", "Tuseday", "Wednesday", "Thursday", "Friday", "Saturday"],  
        "RespondingUnitCallSign": ["CAHOOTS", "EPD"]   
        }, 
    color_discrete_map=color_map,
    opacity=0.96,
    labels={'Mean_Response': 'Average Response Time (Minutes)'}
)
fig.update_traces(error_y_color='white')

fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)', 
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis={'type': 'category'},
    font=dict(color='white'),
    yaxis_title='Average Response Time (Minutes)',
    xaxis_title='Day of Week',  
    legend_title='Responding Unit',
    title="Average Response Time by Day of Week (±1 SD)",
    title_x=0.45,
)
fig.for_each_trace(
    lambda trace: trace.update(marker_color='grey') 
    if trace.x[0] in grey  
    else ()
)
for trace in fig.data:
    trace.marker.color = [
        'grey' if day in grey else color_map[trace.name]
        for day in trace.x]

fig.write_image("../plots/DOW.png")

fig.show()

## Time of Day

In [None]:
color_map = {'EPD': 'lightblue', 'CAHOOTS': 'salmon'}

fig = px.bar(
    tod_grouped, 
    x='Time_Of_Day', 
    y='Mean_Response', 
    color='RespondingUnitCallSign',
    error_y='Std_Error',  
    barmode='group',
    color_discrete_map=color_map,
    opacity=0.96,
    labels={'Mean_Response': 'Average Response Time (Minutes)'}
)
fig.update_traces(error_y_color='white')
# Customize layout
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)', 
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis={'type': 'category'},
    font=dict(color='white'),
    yaxis_title='Average Response Time (Minutes)',
    xaxis_title='Time of Day',  

    title="Average Response Time by Time of Day (±1 SD)",
    title_x=0.45
)
fig.update_xaxes(categoryorder="array", 
                categoryarray=["Morning", "Afternoon", "Evening", "Night"])
for trace in fig.data:
    trace.marker.color = [
        'grey' if day in grey else color_map[trace.name]
        for day in trace.x]

fig.write_image("../plots/TOD.png")

fig.show()

## Call Type

In [266]:
color_map = {'EPD': 'lightblue', 'CAHOOTS': 'salmon'}

fig = px.bar(
    call_grouped, 
    x='IncidentCategory', 
    y='Mean_Response', 
    color='RespondingUnitCallSign',
    error_y='Std_Error',  
    barmode='group',
    color_discrete_map=color_map,
    opacity=0.96,
    labels={'Mean_Response': 'Average Response Time (Minutes)'}
)
fig.update_traces(error_y_color='white')
# Customize layout
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)', 
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis={'type': 'category'},
    font=dict(color='white'),
    yaxis_title='Average Response Time (Minutes)',
    xaxis_title='Incident Category',  
    legend_title='Responding Unit',
    title="Average Response Time by Call Type (±1 SD)",
    title_x=0.45
)
for trace in fig.data:
    trace.marker.color = [
        'grey' if day in grey else color_map[trace.name]
        for day in trace.x]
fig.add_trace(go.Bar(
     x=[None], y=[None],
     name='CAHOOTS',
     marker=dict(color='salmon'),
     showlegend=True,
     hoverinfo='skip'
))
fig.add_trace(go.Bar(
     x=[None], y=[None],
     name='EPD',
     marker=dict(color='lightblue'),
     showlegend=True,
     hoverinfo='skip'
))
fig.write_image("../plots/Call_Type.png")
fig.show()