## **Natural Experiment**

In [16]:
# Import data
import pandas as pd
file_path = r'D:\dsci\CAHOOTS\Data\call_data_from_CAD.csv'
CAD_data = pd.read_csv(file_path)

In [17]:
# Standardize Cahoots identifiers 
cahoots_identifiers = r"1J77\s*|3J79\s*|3J78\s*|3J77\s*|4J79\s*|3J81\s*|3J76\s*|2J28\s*|2J29\s*|CAHOOT\s*|CAHOT\s*|CAHO\s*"
CAD_data["PrimaryUnitCallSign"] = CAD_data["PrimaryUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)
CAD_data["RespondingUnitCallSign"] = CAD_data["RespondingUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)

# Create an identifier for Cahoots involvement 
CAD_data['Cahoots_related'] = ((CAD_data['PrimaryUnitCallSign'] == 'CAHOOT') | (CAD_data['RespondingUnitCallSign'] == 'CAHOOT')).astype(int)

# Convert dt
CAD_data["Call_Created_Time"] = pd.to_datetime(CAD_data['Call_Created_Time'], errors='coerce')
CAD_data["Call_First_Dispatched_Time"] = pd.to_datetime(CAD_data['Call_First_Dispatched_Time'], errors='coerce')
CAD_data["Unit_OnScene_Time"] = pd.to_datetime(CAD_data['Unit_OnScene_Time'], errors='coerce')

CAD_data["year"] = CAD_data["Call_Created_Time"].dt.year

# Drop unneeded cols
CAD_data = CAD_data.drop(columns=['Unnamed: 0', 'Beat', 'Unit_Cleared_Time'])

In [18]:
# Drop non-public CFS
CAD_data = CAD_data[CAD_data["Call_Source"] != "SELF"]
CAD_data = CAD_data[CAD_data["Call_Source"] != "RPTO"]

# Drop duplicates and NA
CAD_data = CAD_data.drop_duplicates()
CAD_data = CAD_data.dropna(subset=["PrimaryUnitCallSign", "RespondingUnitCallSign"], how='all')

# Only include calls in Eugene 
zip_codes = [97402.0, 97401.0, 97405.0, 97404.0, 97403.0, 97408.0]
CAD_data = CAD_data[CAD_data['Call_Zipcode'].isin(zip_codes)]

# Remove 2022 and 2023 until data errors can be fixed
CAD_data = CAD_data[~CAD_data["year"].isin([2022, 2023])]

In [19]:
def filter_df_by_time(df, start_time=None, end_time=None, time_column='Call_Created_Time'):
    """
    Filters the dataframe based on the time range specified by the user

    Parameters:
    df to filter
    start_time (str)
    end_time (str)
    time_column (str): The name of the column containing datetime values

    """
    df.loc[:, time_column] = pd.to_datetime(df[time_column])

    if start_time:
        if len(start_time) == 4:  # if only a year is given
            start_time = pd.to_datetime(f"{start_time}-01-01")
    else:
        start_time = df[time_column].min() 

    if end_time:
        if len(end_time) == 4:  
            end_time = pd.to_datetime(f"{end_time}-12-31 23:59:59")
    else:
        end_time = df[time_column].max()  

    filtered_df = df[(df[time_column] >= start_time) & (df[time_column] <= end_time)]

    return filtered_df

In [20]:
def dataset_builder(data, dispatched=False, arrived=False, solo_cahoots=False, time=None):
    """
    Parameters:
    time (list of strings) [start time, end time, time_col]
    
    """
    # Apply time range if specified
    if time:
        data = filter_df_by_time(data, time[0], time[1], time[2])
    
    # Dispatched filter is based on "Unit_Dispatched_Time"
    if dispatched:
        data = data[data["Unit_Dispatched_Time"].notna()]
    
    # Arrived filter is based on "Call_First_Dispatched_Time"
    if arrived:
        data = data[data["Unit_OnScene_Time"].notna()]
        data = data[data["Unit_Dispatched_Time"].notna()]
        
    # Only Include CAHOOTS calls where no other agencies are involved
    if solo_cahoots:
        data = data[~((data['PrimaryUnitCallSign'] == 'CAHOOTS') & (data['IsPrimary'] == 0))]
        
    return data


In [21]:
CAD_2016 = dataset_builder(CAD_data, dispatched=False, arrived=False, time=['2016', '2016', 'Call_Created_Time'])
CAD_2017 = dataset_builder(CAD_data, dispatched=False, arrived=False, time=['2017', '2017', 'Call_Created_Time'])

In [26]:
CAD_2016_nat = CAD_2016[(CAD_2016["Unit_OnScene_Time"].dt.hour >= 5) & (CAD_2016["Unit_OnScene_Time"].dt.hour <= 10)]
CAD_2017_nat = CAD_2017[(CAD_2017["Unit_OnScene_Time"].dt.hour >= 5) & (CAD_2017["Unit_OnScene_Time"].dt.hour <= 10)]

In [28]:
CAD_2016_nat["InitialIncidentTypeDescription"].value_counts().head(40)

InitialIncidentTypeDescription
CRIMINAL TRESPASS                 1448
DISPUTE                           1287
CHECK WELFARE                     1270
DISORDERLY SUBJECT                 929
BURGLARY                           534
SUSPICIOUS CONDITIONS              529
ILLEGAL CAMPING                    382
SUSPICIOUS SUBJECT                 371
MOTOR VEH ACC UNKNOWN INJ          311
SUICIDAL SUBJECT                   289
UNAUTHORIZED USE OF VEHICLE        265
TRAFFIC HAZARD                     213
THEFT                              212
ASSAULT                            200
HARASSMENT                         129
ASSIST PUBLIC- POLICE              124
DOG AT LARGE                       105
WARRANT SERVICE                    104
LOCATION WANTED SUBJECT             97
INCOMPLETE CALL                     93
ALARM PANIC                         89
CARDIAC ARREST                      88
UNKNOWN PROBLEM                     85
MOTOR VEH ACC NO INJURY             85
TRANSPORT                        

In [29]:
CAD_2017_nat["InitialIncidentTypeDescription"].value_counts().head(20)

InitialIncidentTypeDescription
DISPUTE                        1528
CHECK WELFARE                  1508
CRIMINAL TRESPASS              1334
DISORDERLY SUBJECT              871
ASSIST PUBLIC- POLICE           710
TRANSPORT                       655
BURGLARY                        596
SUSPICIOUS CONDITIONS           511
ILLEGAL CAMPING                 435
SUSPICIOUS SUBJECT              397
SUICIDAL SUBJECT                374
MOTOR VEH ACC UNKNOWN INJ       318
TRAFFIC HAZARD                  303
UNAUTHORIZED USE OF VEHICLE     243
THEFT                           217
ASSAULT                         155
ROBBERY                         143
HARASSMENT                      135
LOCATION WANTED SUBJECT         123
CARDIAC ARREST                  109
Name: count, dtype: int64