# Updated Diversion Metrics Based on Natural Experiment 

In [214]:
import pandas as pd 
file_path = r'D:\dsci\CAHOOTS\Data\call_data_from_CAD.csv'
CAD_data = pd.read_csv(file_path)

In [215]:
CAD_data["Call_Created_Time"] = pd.to_datetime(CAD_data['Call_Created_Time'], errors='coerce')
CAD_data["year"] = CAD_data["Call_Created_Time"].dt.year
cahoots_identifiers = r"1J77\s*|3J79\s*|3J78\s*|3J77\s*|4J79\s*|3J81\s*|3J76\s*|2J28\s*|2J29\s*|CAHOOT\s*|CAHOT\s*|CAHO\s*"

CAD_data["PrimaryUnitCallSign"] = CAD_data["PrimaryUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)
CAD_data["RespondingUnitCallSign"] = CAD_data["RespondingUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)
# Standardize Cahoots identifiers 
cahoots_identifiers = r"1J77\s*|3J79\s*|3J78\s*|3J77\s*|4J79\s*|3J81\s*|3J76\s*|2J28\s*|2J29\s*|CAHOOT\s*|CAHOT\s*|CAHO\s*"

CAD_data["PrimaryUnitCallSign"] = CAD_data["PrimaryUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)
CAD_data["RespondingUnitCallSign"] = CAD_data["RespondingUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)

# Create an identifier for Cahoots involvement 
CAD_data['Cahoots_related'] = ((CAD_data['PrimaryUnitCallSign'] == 'CAHOOT') | (CAD_data['RespondingUnitCallSign'] == 'CAHOOT')).astype(int)

# Rename public Assist for clarity
CAD_data['InitialIncidentTypeDescription'] = CAD_data['InitialIncidentTypeDescription'].replace("ASSIST PUBLIC- POLICE", "ASSIST PUBLIC")


In [216]:
data_cleaned = CAD_data.drop(columns=['Unnamed: 0'])
data_cleaned = data_cleaned.sort_values(by="Call_Created_Time")

In [217]:
data_filtered = data_cleaned[data_cleaned['Call_Source'] != 'SELF']
data_unique_incidents = data_filtered.drop_duplicates(subset='IncidentNumber', keep='first')

In [218]:
# Marked for future import factoring 

def filter_df_by_time(df, start_time=None, end_time=None, time_column='Call_Created_Time'):
    """
    Filters the dataframe based on the time range specified by the user

    Parameters:
    df to filter
    start_time (str)
    end_time (str)
    time_column (str): The name of the column containing datetime values

    """
    df.loc[:, time_column] = pd.to_datetime(df[time_column])

    if start_time:
        if len(start_time) == 4:  # if only a year is given
            start_time = pd.to_datetime(f"{start_time}-01-01")
    else:
        start_time = df[time_column].min() 

    if end_time:
        if len(end_time) == 4:  
            end_time = pd.to_datetime(f"{end_time}-12-31 23:59:59")
    else:
        end_time = df[time_column].max()  

    filtered_df = df[(df[time_column] >= start_time) & (df[time_column] <= end_time)]

    return filtered_df

def dataset_builder(data, dispatched=False, arrived=False, solo_cahoots=False, time=None):
    """
    Parameters:
    time (list of strings) [start time, end time, time_col]
    
    """
    # Apply time range if specified
    if time:
        data = filter_df_by_time(data, time[0], time[1], time[2])
    
    # Dispatched filter is based on "Unit_Dispatched_Time"
    if dispatched:
        data = data[data["Unit_Dispatched_Time"].notna()]
    
    # Arrived filter is based on "Call_First_Dispatched_Time"
    if arrived:
        data = data[data["Unit_OnScene_Time"].notna()]
        data = data[data["Unit_Dispatched_Time"].notna()]
        
    # Only Include CAHOOTS calls where no other agencies are involved
    if solo_cahoots:
        data = data[~((data['PrimaryUnitCallSign'] == 'CAHOOTS') & (data['IsPrimary'] == 0))]
        
    return data

In [219]:
def calculate_diversions(data, by_year=True, incident_adjust={}):
    # Split the data into Cahoots-related and police-handled subsets
    cahoots_related = data[data["Cahoots_related"] == 1]
    police_handled = data[data["Cahoots_related"] == 0]
    
    if len(incident_adjust) > 0:
        # Initialize yearly_adjustment as a Series with index as years and default values as 0
        yearly_adjustment = pd.Series(0, index=cahoots_related["year"].unique())
        # Group the data once outside the loop
        incident_df = cahoots_related.groupby(["year", "InitialIncidentTypeDescription"]).size().reset_index(name='count')
        
        for key, value in incident_adjust.items():
            # Filter incident_df for the specific incident type
            by_yr_inc_df = incident_df[incident_df["InitialIncidentTypeDescription"] == key]
            # Calculate adjustments for each year
            adjustment = by_yr_inc_df.groupby("year")["count"].sum() * value
            # Add adjustments to yearly_adjustment
            yearly_adjustment = yearly_adjustment.add(adjustment, fill_value=0)
    else:
        # If no adjustments, initialize yearly_adjustment with zeros
        yearly_adjustment = pd.Series(0, index=cahoots_related["year"].unique())
    
    if by_year:
        # Calculate diversions by year
        cahoots_year = cahoots_related.groupby("year").size()
        cahoots_year = cahoots_year - yearly_adjustment
        epd_year = police_handled.groupby("year").size()
        total_year = cahoots_year + epd_year
        diversions_by_year = (cahoots_year / total_year) * 100
        return diversions_by_year.reset_index(name='Diversion Rate')
    else:
        # Calculate overall diversion rate
        total_adjustment = yearly_adjustment.sum()
        cahoots_related_count = len(cahoots_related) - total_adjustment
        total_count = len(police_handled) + cahoots_related_count
        diversion_rate = (cahoots_related_count / total_count) * 100
        return diversion_rate

In [235]:
incidents = {
    "CHECK WELFARE": 0.091,
    "DISPUTE": 0.085,
    "TRAFFIC HAZARD": 0.508,
    "SUBJECT SCREAMING": 0.536,
    "FOUND SYRINGE": 0.87,
    "ASSIST PUBLIC": 0.921,
    "TRANSPORT":  0.963
}

calculate_diversions(dataset_builder(data_unique_incidents, dispatched=True, arrived=False, time=['2016', '2021', 'Call_Created_Time']), by_year=True, incident_adjust=incidents)

Unnamed: 0,year,Diversion Rate
0,2016,9.556961
1,2017,12.918096
2,2018,14.147789
3,2019,14.157591
4,2020,14.862008
5,2021,14.854264


### Using dispatched EPD tables

In [226]:
dispatched_2021 = dataset_builder(data_unique_incidents, dispatched=True, arrived=False, time=['2021', '2021', 'Call_Created_Time'])

dispatched_cahoots_2021 = dispatched_2021[dispatched_2021["Cahoots_related"] == 1]
dispatched_cahoots_2021["InitialIncidentTypeDescription"].value_counts().head(30)

InitialIncidentTypeDescription
CHECK WELFARE             5840
ASSIST PUBLIC             5773
TRANSPORT                 1807
SUICIDAL SUBJECT          1394
TRAFFIC HAZARD             346
DISORDERLY SUBJECT         295
INTOXICATED SUBJECT        212
FOUND SYRINGE              192
ASSIST FIRE DEPARTMENT     182
CRIMINAL TRESPASS          135
DISORIENTED SUBJECT        135
DISPUTE                    109
SUSPICIOUS CONDITIONS       69
NUDE SUBJECT                54
DISORDERLY JUVENILES        50
UNKNOWN PROBLEM             44
OVERDOSE                    40
DEATH MESSAGE               19
SUSPICIOUS SUBJECT          17
ATTEMPT TO LOCATE           16
INDECENT EXPOSURE           15
CARDIAC ARREST              14
SUBJECT SCREAMING           12
MISSING PERSON              12
BEAT INFORMATION            11
ASSIST OUTSIDE AGENCY        9
ASSAULT                      9
INJURED SUBJECT              9
HARASSMENT                   9
BURGLARY                     7
Name: count, dtype: int64

In [3]:
incidents = {
    "CHECK WELFARE": 0.091,
    "DISPUTE": 0.085,
    "TRAFFIC HAZARD": 0.508,
    "SUBJECT SCREAMING": 0.536,
    "FOUND SYRINGE": 0.87,
    "ASSIST PUBLIC": 0.921,
    "TRANSPORT":  0.963
}

welfare_check = 6003  * (1-incidents["CHECK WELFARE"])
public_assist = 5788  * (1-incidents["ASSIST PUBLIC"])
transport = 1803  * (1-incidents["TRANSPORT"])
suicidal_subject = 1571
disorderly_subject = 457
traffic_hazard = 372  * (1-incidents["TRAFFIC HAZARD"])
dispute = 255  * (1-incidents["DISPUTE"])
criminal_trespass = 230
intoxicated_subject = 219
found_syringe = 192  * (1-incidents["FOUND SYRINGE"])

# Found from CAD DATA
screaming_subject = 12 * (1-incidents["SUBJECT SCREAMING"])

# Adjusted for screaming subject
other = 1216 - 12

# Dispatched total police and cahoots
dispatched_total_cfs = 68427

adjusted_dispatched_cahoots = [welfare_check, public_assist, transport, suicidal_subject, disorderly_subject, traffic_hazard, dispute, criminal_trespass, intoxicated_subject, found_syringe, screaming_subject, other]

In [11]:
pre_adjust_sum = 6003+ 5788+ 1803+ 1571+ 457+ 372+ 255+ 230+ 219+ 192+ 12+ other
pre_adjust_sum - sum(adjusted_dispatched_cahoots)


7997.433000000001

In [13]:
corrected_diversion_rate = sum(adjusted_dispatched_cahoots) / (dispatched_total_cfs - (pre_adjust_sum - sum(adjusted_dispatched_cahoots)))
corrected_diversion_rate

0.16727849464815792

In [14]:
7997.433000000001 + sum(adjusted_dispatched_cahoots)

18106.0

In [15]:
pre_adjust_sum

18106

In [17]:
dispatched_total_cfs - (pre_adjust_sum - sum(adjusted_dispatched_cahoots))

60429.566999999995

In [7]:
sum(adjusted_dispatched_cahoots) / 109854

0.0920181968794946

In [9]:
sum(adjusted_dispatched_cahoots)

10108.567

In [234]:
corrected_diversion_rate

0.17333399544531655

In [230]:
sum(adjusted_dispatched_cahoots)

10108.567

In [231]:
for i in adjusted_dispatched_cahoots:
    print(i)

5456.727
457.2519999999998
66.71100000000006
1571
457
183.024
233.32500000000002
230
219
24.96
5.568
1204


# REMEMBER TO UPDATE 3A CALCULATIONS BY SUBTRACTING THE NEW NUMERATOR

# denom calculations for diversion rates

In [32]:
dispatched_cahoots_calls = 18106
dispatched_welfare_checks = 5546
total_dispatched_calls = 68427

other_types = dispatched_cahoots_calls - dispatched_welfare_checks

total_removed = other_types + (dispatched_welfare_checks * 0.26)
total_removed

14001.96

In [34]:
dispatched_cahoots_calls - (dispatched_welfare_checks * 0.74)

14001.96

In [35]:
18106 - 4104

14002

In [37]:
denom = total_dispatched_calls - total_removed
denom

54425.04

In [28]:
(dispatched_welfare_checks * 0.74)/denom

0.07540720227307136

In [24]:
top_3_dispatched = 5791 + 5546 + 1781
top_3_dispatched

13118

In [25]:
# previously divertible table 2
18106 - 6003 - 5788 - 1803

4512

In [29]:
total_dispatched_calls - 5788 - 1803 - (dispatched_welfare_checks * 0.26)

59394.04