# (1) Clean The CAD Data

In [42]:
#imports
import pandas as pd
from pathlib import Path

In [43]:
# Bring in the CAD Data and select relevant columns
CAD_data = pd.read_csv("data/call_data_from_CAD.csv")
CAD_data = CAD_data[["IncidentNumber", "Call_Created_Time", "Call_Source", "InitialIncidentTypeDescription", "Disposition",
                     "PrimaryUnitCallSign", "RespondingUnitCallSign", "IsPrimary", "Call_First_Dispatched_Time", "Call_First_On_Scene"]].copy()

# Convert Date Time objects
CAD_data["Call_Created_Time"] = pd.to_datetime(CAD_data['Call_Created_Time'], errors='coerce')
CAD_data["Call_First_Dispatched_Time"] = pd.to_datetime(CAD_data['Call_First_Dispatched_Time'], errors='coerce')
CAD_data["Call_First_On_Scene"] = pd.to_datetime(CAD_data['Call_First_On_Scene'], errors='coerce')


In [44]:
CAD_data = CAD_data[CAD_data['Call_Created_Time'].dt.year != 2023]
CAD_data = CAD_data[CAD_data['Call_Created_Time'].dt.year != 2022]
CAD_data = CAD_data[CAD_data['Call_Created_Time'].dt.year != 2016]
CAD_data

Unnamed: 0,IncidentNumber,Call_Created_Time,Call_Source,InitialIncidentTypeDescription,Disposition,PrimaryUnitCallSign,RespondingUnitCallSign,IsPrimary,Call_First_Dispatched_Time,Call_First_On_Scene
641,OR-2017-01-01-17000001,2017-01-01 00:00:16,E911,SHOTS FIRED,REFERRED TO OTHER AGENCY,,,0,NaT,NaT
642,OR-2017-01-01-17000002,2017-01-01 00:01:26,PHONE,ILLEGAL FIREWORKS,QUIET ON ARRIVAL,4F71,4F71,1,2017-01-01 03:20:20,2017-01-01 03:20:20
643,OR-2017-01-01-17000003,2017-01-01 00:01:43,SELF,TRAFFIC STOP,ADVISED,4E34,4E34,1,2017-01-01 00:01:44,2017-01-01 00:01:44
644,OR-2017-01-01-17000005,2017-01-01 00:02:17,E911,ILLEGAL FIREWORKS,INFORMATION ONLY,5E88,5E88,1,2017-01-01 02:18:57,NaT
645,OR-2017-01-01-17000008,2017-01-01 00:02:45,W911,DISPUTE,ARREST,4E42,4E42,1,2017-01-01 00:04:01,2017-01-01 00:06:20
...,...,...,...,...,...,...,...,...,...,...
1616031,OR-2021-12-31-21336952,2021-12-31 23:51:41,SELF,FIGHT,CITED IN LIEU OF CUSTODY,4F64,4E23,0,2021-12-31 23:51:41,2021-12-31 23:51:41
1616032,OR-2021-12-31-21336952,2021-12-31 23:51:41,SELF,FIGHT,CITED IN LIEU OF CUSTODY,4F64,4F64,1,2021-12-31 23:51:41,2021-12-31 23:51:41
1616033,OR-2021-12-31-21336952,2021-12-31 23:51:41,SELF,FIGHT,CITED IN LIEU OF CUSTODY,4F64,4F65,0,2021-12-31 23:51:41,2021-12-31 23:51:41
1616034,OR-2021-12-31-21336961,2021-12-31 23:59:50,E911,POISONING,DISREGARD,,,0,NaT,NaT


### Standardize Cahoots identifiers 

In [45]:
# Standardize Cahoots identifiers 
cahoots_identifiers = r"1J77\s*|3J79\s*|3J78\s*|3J77\s*|4J79\s*|3J81\s*|3J76\s*|2J28\s*|2J29\s*|CAHOOT\s*|CAHOT\s*|CAHO\s*"

CAD_data["PrimaryUnitCallSign"] = CAD_data["PrimaryUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)
CAD_data["RespondingUnitCallSign"] = CAD_data["RespondingUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)

# Create an identifier for Cahoots involvement 
CAD_data['Cahoots_related'] = ((CAD_data['PrimaryUnitCallSign'] == 'CAHOOT') | (CAD_data['RespondingUnitCallSign'] == 'CAHOOT')).astype(int)

In [51]:
from scripts import police_diversion_methodology as pdm

test = pdm.create_datasets(CAD_data[CAD_data["Call_Created_Time"].dt.year == 2020], filter_type='none')

In [59]:
data_2021 = CAD_data[CAD_data["Call_Created_Time"].dt.year == 2021]
data_2021

Unnamed: 0,IncidentNumber,Call_Created_Time,Call_Source,InitialIncidentTypeDescription,Disposition,PrimaryUnitCallSign,RespondingUnitCallSign,IsPrimary,Call_First_Dispatched_Time,Call_First_On_Scene,Cahoots_related
2847,OR-2021-01-01-21000001,2021-01-01 00:00:58,PHONE,BEAT INFORMATION,INFORMATION ONLY,6E31,6E31,1,2021-01-01 00:22:41,NaT,0
2848,OR-2021-01-01-21000002,2021-01-01 00:01:03,E911,ILLEGAL FIREWORKS,RELAYED TO LANE COUNTY SHERIFFS OFFICE,,,0,NaT,NaT,0
2849,OR-2021-01-01-21000004,2021-01-01 00:01:48,W911,TRAFFIC HAZARD,PATROL CHECK,4E53,4E53,1,2021-01-01 00:02:53,2021-01-01 00:06:38,0
2850,OR-2021-01-01-21000004,2021-01-01 00:01:48,W911,TRAFFIC HAZARD,PATROL CHECK,4E53,5E47,0,2021-01-01 00:02:53,2021-01-01 00:06:38,0
2851,OR-2021-01-01-21000004,2021-01-01 00:01:48,W911,TRAFFIC HAZARD,PATROL CHECK,4E53,CMD16,0,2021-01-01 00:02:53,2021-01-01 00:06:38,0
...,...,...,...,...,...,...,...,...,...,...,...
1616031,OR-2021-12-31-21336952,2021-12-31 23:51:41,SELF,FIGHT,CITED IN LIEU OF CUSTODY,4F64,4E23,0,2021-12-31 23:51:41,2021-12-31 23:51:41,0
1616032,OR-2021-12-31-21336952,2021-12-31 23:51:41,SELF,FIGHT,CITED IN LIEU OF CUSTODY,4F64,4F64,1,2021-12-31 23:51:41,2021-12-31 23:51:41,0
1616033,OR-2021-12-31-21336952,2021-12-31 23:51:41,SELF,FIGHT,CITED IN LIEU OF CUSTODY,4F64,4F65,0,2021-12-31 23:51:41,2021-12-31 23:51:41,0
1616034,OR-2021-12-31-21336961,2021-12-31 23:59:50,E911,POISONING,DISREGARD,,,0,NaT,NaT,0


In [74]:
test = pdm.create_datasets(data_2021, filter_type='cahoots_responded', min_responses=10000)

In [75]:
test['results']

{'Gross Divert Rate 1 (All Cahoots Associations)': 20.724245055071496,
 'Gross Divert Rate 2 (All Cahoots Dispatched CFS)': 17.74316705542713,
 'Gross Divert Rate 3 (All Cahoots Arrived CFS)': 14.90016003681893,
 'Adjusted Divert Rate (Excluding Top 3 Natures)': 2.7352698137087748,
 'Adjusted Divert Rate (Excluding Top 3 Natures dispatch only)': 5.469979935175181,
 'Adjusted Divert Rate with Check Welfare Adjustment': 7.544067736058017}

### Remove Disregards, duplicates, referrals and cancellations

In [6]:
# Disregards
CAD_data = CAD_data[CAD_data['Disposition'] != 'DISREGARD']
CAD_data = CAD_data[CAD_data['Disposition'] != 'DISREGARDED BY DISPATCH']
CAD_data = CAD_data[CAD_data['Disposition'] != 'DISREGARDED BY PATROL SUPERVISOR']

# Duplicate or no Dispatch
CAD_data = CAD_data[CAD_data['Disposition'] != 'ACCIDENTALLY CHOSE NEW EVENT']
CAD_data = CAD_data[CAD_data['Disposition'] != 'MOTOR VEHICLE ACCIDENT - NO DISPATCH']
CAD_data = CAD_data[CAD_data['Disposition'] != 'QUALITY OF LIFE - NO DISPATCH']
CAD_data = CAD_data[CAD_data['Disposition'] != 'UNABLE TO DISPATCH']
CAD_data = CAD_data[CAD_data['Disposition'] != 'WILL CALL BACK']
CAD_data = CAD_data[~(CAD_data["PrimaryUnitCallSign"].isna()) & ~(CAD_data["RespondingUnitCallSign"].isna())]

# Cancellations
CAD_data = CAD_data[CAD_data['Disposition'] != 'NO ACTION TAKEN']
CAD_data = CAD_data[CAD_data['Disposition'] != 'CANCEL WHILE ENROUTE']
CAD_data = CAD_data[CAD_data['Disposition'] != 'RESOLVED']
CAD_data = CAD_data[CAD_data['Disposition'] != 'CANCELED REPORT NUMBER']
CAD_data = CAD_data[CAD_data['Disposition'] != 'CANCEL FIRE UNIT FROM CALL']

# Referrals and relays
CAD_data = CAD_data[CAD_data['Disposition'] != 'REFERRED TO OTHER AGENCY']
CAD_data = CAD_data[CAD_data['Disposition'] != 'RELAYED TO UNIVERSITY OF OREGON POLICE']
CAD_data = CAD_data[CAD_data['Disposition'] != 'RELAYED TO OREGON STATE POLICE']
CAD_data = CAD_data[CAD_data['Disposition'] != 'RELAYED TO LANE COUNTY SHERIFFS OFFICE']
CAD_data = CAD_data[CAD_data['Disposition'] != 'RELAYED TO PARKING CONTROL']

CAD_data = CAD_data[CAD_data['Call_Created_Time'].dt.year != 2023]


CAD_data

Unnamed: 0,Call_Created_Time,Call_Zipcode,InitialIncidentTypeDescription,Disposition,PrimaryUnitCallSign,RespondingUnitCallSign,Total_Time_To_Dispatch,Total_Time_Dispatch_To_On_Scene,Total_Time_On_Scene_To_Clear,Handled_by_Cahoots
0,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,5E57,294.0,298.0,2663.0,0
1,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,4X40,577.0,0.0,2238.0,0
2,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,4E53,294.0,448.0,2372.0,0
3,2016-01-01 00:00:24,97401.0,TRAFFIC STOP,ADVISED,5T81,5T81,1.0,0.0,136.0,0
5,2016-01-01 00:02:45,97401.0,CHECK WELFARE,ASSISTED,3X90,4F72,124.0,0.0,198.0,0
...,...,...,...,...,...,...,...,...,...,...
1616466,2022-12-31 23:51:55,,PATROL CHECK,ADVISED,4F72,4F72,1.0,0.0,259.0,0
1616467,2022-12-31 23:51:55,,PATROL CHECK,ADVISED,4F72,4E11,11.0,36.0,346.0,0
1616470,2022-12-31 23:54:01,,SHOTS FIRED,PATROL CHECK,4E36,3E27,189.0,277.0,95.0,0
1616473,2022-12-31 23:54:01,,SHOTS FIRED,PATROL CHECK,4E36,4E36,189.0,185.0,362.0,0


In [7]:
CAD_data = CAD_data.sort_values(by="Call_Created_Time")
output_dir = Path("data/cleaned_data")
output_dir.mkdir(parents=True, exist_ok=True)

# Save the cleaned DataFrame 
output_path = output_dir / "cleaned_CAD_data.csv"
CAD_data.to_csv(output_path, index=False)

print(f"Cleaned data saved to {output_path}")

Cleaned data saved to data\cleaned_data\cleaned_CAD_data.csv


# (2) Clean Cahoots Data

In [8]:
# Load Data
cahoots_dataset = pd.read_excel("data/call_data_from_CAHOOTS_2021_2022.xlsx")

In [9]:
# Drop NA
cahoots_dataset = cahoots_dataset.dropna(subset=["Reason for Dispatch"])

# Combine Date and TimeOfCall into a single DateTime column
cahoots_dataset["Date"] = cahoots_dataset["Date"].astype(str)
cahoots_dataset["TimeOfCall"] = cahoots_dataset["TimeOfCall"].astype(str)
cahoots_dataset["DateTime"] = pd.to_datetime(cahoots_dataset["Date"] + " " + cahoots_dataset["TimeOfCall"], errors='coerce')

# Only select data from Eugene
cahoots_dataset = cahoots_dataset[cahoots_dataset["City"] == "Eugene"]

# Drop unneeded columns
cahoots_dataset = cahoots_dataset[["DateTime", "Reason for Dispatch"]]

In [10]:
cahoots_dataset

Unnamed: 0,DateTime,Reason for Dispatch
0,2021-01-04 20:10:31,Public Assist
5,2021-01-14 17:10:00,Check Welfare
6,2021-01-14 17:34:00,Check Welfare
7,2021-01-14 18:01:00,Public Assist
8,2021-01-14 19:20:00,Check Welfare
...,...,...
46887,2022-12-31 20:45:01,Public Assist
46888,2022-12-31 21:09:05,Check Welfare
46889,2022-12-31 21:26:08,Public Assist
46890,2022-12-31 21:37:01,Public Assist


In [11]:
# Save File
output_path = output_dir / "cleaned_cahoots_data.csv"
cahoots_dataset.to_csv(output_path, index=False)

print(f"Cleaned data saved to {output_path}")

Cleaned data saved to data\cleaned_data\cleaned_cahoots_data.csv


# Create CAD Diversions Dataset

In [12]:
CAD_data = pd.read_csv("data\cleaned_data\cleaned_CAD_data.csv")
CAD_data

Unnamed: 0,Call_Created_Time,Call_Zipcode,InitialIncidentTypeDescription,Disposition,PrimaryUnitCallSign,RespondingUnitCallSign,Total_Time_To_Dispatch,Total_Time_Dispatch_To_On_Scene,Total_Time_On_Scene_To_Clear,Handled_by_Cahoots
0,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,5E57,294.0,298.0,2663.0,0
1,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,4X40,577.0,0.0,2238.0,0
2,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,4E53,294.0,448.0,2372.0,0
3,2016-01-01 00:00:24,97401.0,TRAFFIC STOP,ADVISED,5T81,5T81,1.0,0.0,136.0,0
4,2016-01-01 00:02:45,97401.0,CHECK WELFARE,ASSISTED,3X90,4F72,124.0,0.0,198.0,0
...,...,...,...,...,...,...,...,...,...,...
930223,2022-12-31 23:51:55,,PATROL CHECK,ADVISED,4F72,4F72,1.0,0.0,259.0,0
930224,2022-12-31 23:51:55,,PATROL CHECK,ADVISED,4F72,4E11,11.0,36.0,346.0,0
930225,2022-12-31 23:54:01,,SHOTS FIRED,PATROL CHECK,4E36,4E36,189.0,185.0,362.0,0
930226,2022-12-31 23:54:01,,SHOTS FIRED,PATROL CHECK,4E36,3E27,189.0,277.0,95.0,0


### Remove all call types with 0 overlap between Police and Cahoots

In [13]:
cahoots_related = CAD_data[CAD_data['Handled_by_Cahoots'] == 1]
police_handled = CAD_data[CAD_data['Handled_by_Cahoots'] == 0]

# Unique incident types
cahoots_types = cahoots_related['InitialIncidentTypeDescription'].unique()
police_types = police_handled['InitialIncidentTypeDescription'].unique()

# Incident types never responded to by Cahoots
never_cahoots_type = list(set(police_types) - set(cahoots_types))

# Incident types never responded to by police
never_police_type = list(set(cahoots_types) - set(police_types))

# Unique dispositions
cahoots_disp = cahoots_related['Disposition'].unique()
police_disp = police_handled['Disposition'].unique()

# Dispositions never responded to by Cahoots
never_cahoots_disp = list(set(police_disp) - set(cahoots_disp))

# Dispositions never responded to by police
never_police_disp = list(set(cahoots_disp) - set(police_disp))

# Filter out incidents and dispositions never responded to by Cahoots or police
CAD_data_diversions = CAD_data[
    ~CAD_data['InitialIncidentTypeDescription'].isin(never_cahoots_type) &
    ~CAD_data['InitialIncidentTypeDescription'].isin(never_police_type)
]
CAD_data_diversions = CAD_data_diversions[
    ~CAD_data_diversions['Disposition'].isin(never_cahoots_disp) &
    ~CAD_data_diversions['Disposition'].isin(never_police_disp)
]
CAD_data_diversions

Unnamed: 0,Call_Created_Time,Call_Zipcode,InitialIncidentTypeDescription,Disposition,PrimaryUnitCallSign,RespondingUnitCallSign,Total_Time_To_Dispatch,Total_Time_Dispatch_To_On_Scene,Total_Time_On_Scene_To_Clear,Handled_by_Cahoots
0,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,5E57,294.0,298.0,2663.0,0
1,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,4X40,577.0,0.0,2238.0,0
2,2016-01-01 00:00:04,97402.0,ASSAULT,ADVISED,5E57,4E53,294.0,448.0,2372.0,0
3,2016-01-01 00:00:24,97401.0,TRAFFIC STOP,ADVISED,5T81,5T81,1.0,0.0,136.0,0
4,2016-01-01 00:02:45,97401.0,CHECK WELFARE,ASSISTED,3X90,4F72,124.0,0.0,198.0,0
...,...,...,...,...,...,...,...,...,...,...
930223,2022-12-31 23:51:55,,PATROL CHECK,ADVISED,4F72,4F72,1.0,0.0,259.0,0
930224,2022-12-31 23:51:55,,PATROL CHECK,ADVISED,4F72,4E11,11.0,36.0,346.0,0
930225,2022-12-31 23:54:01,,SHOTS FIRED,PATROL CHECK,4E36,4E36,189.0,185.0,362.0,0
930226,2022-12-31 23:54:01,,SHOTS FIRED,PATROL CHECK,4E36,3E27,189.0,277.0,95.0,0


### Remove call types below composite score threshold

In [14]:
from scipy.stats import zscore
import numpy as np

# Calculate total calls, Cahoots calls, and police calls for each type in CAD_data_diversions
total_calls = CAD_data_diversions['InitialIncidentTypeDescription'].value_counts()
cahoots_calls = CAD_data_diversions[CAD_data_diversions['Handled_by_Cahoots'] == 1]['InitialIncidentTypeDescription'].value_counts()
police_calls = CAD_data_diversions[CAD_data_diversions['Handled_by_Cahoots'] == 0]['InitialIncidentTypeDescription'].value_counts()

# Calculate proportions of Cahoots and police involvement in CAD_data_diversions
cahoots_proportion = cahoots_calls / total_calls
police_proportion = police_calls / total_calls

# Apply optional scaling
scaled_cahoots_calls = np.sqrt(cahoots_calls)
scaled_police_calls = np.sqrt(police_calls)

# Calculate harmonic mean of the proportions
harmonic_mean_proportion = 2 * (cahoots_proportion * police_proportion) / (cahoots_proportion + police_proportion + 1e-10)  # Adding a small value to avoid division by zero

# Composite score based on harmonic mean and call counts
composite_scores = harmonic_mean_proportion * ((scaled_cahoots_calls + scaled_police_calls))

# Apply z-score normalization to the composite scores
normalized_composite_scores = zscore(composite_scores)

# normalized composite scorethreshold
substantial_incident_types = composite_scores[normalized_composite_scores > 0.3].index

# Filter data
filtered_CAD_data_diversions = CAD_data_diversions[CAD_data_diversions['InitialIncidentTypeDescription'].isin(substantial_incident_types)]

filtered_CAD_data_diversions

Unnamed: 0,Call_Created_Time,Call_Zipcode,InitialIncidentTypeDescription,Disposition,PrimaryUnitCallSign,RespondingUnitCallSign,Total_Time_To_Dispatch,Total_Time_Dispatch_To_On_Scene,Total_Time_On_Scene_To_Clear,Handled_by_Cahoots
4,2016-01-01 00:02:45,97401.0,CHECK WELFARE,ASSISTED,3X90,4F72,124.0,0.0,198.0,0
5,2016-01-01 00:02:45,97401.0,CHECK WELFARE,ASSISTED,3X90,4F71,124.0,0.0,198.0,0
6,2016-01-01 00:02:45,97401.0,CHECK WELFARE,ASSISTED,3X90,3X90,80.0,0.0,857.0,0
13,2016-01-01 00:07:19,97401.0,DISORDERLY SUBJECT,ASSISTED,6E26,CAHOOT,392.0,256.0,1904.0,1
14,2016-01-01 00:07:19,97401.0,DISORDERLY SUBJECT,ASSISTED,6E26,6E26,227.0,32.0,2003.0,0
...,...,...,...,...,...,...,...,...,...,...
930216,2022-12-31 23:12:35,,ASSIST FIRE DEPARTMENT,ASSISTED,4E11,4E11,104.0,0.0,1530.0,0
930217,2022-12-31 23:14:05,,DISPUTE,CITED IN LIEU OF CUSTODY,6E55,5E46,1140.0,311.0,2346.0,0
930218,2022-12-31 23:14:05,,DISPUTE,CITED IN LIEU OF CUSTODY,6E55,6E55,1140.0,321.0,7830.0,0
930219,2022-12-31 23:37:15,,DISPUTE,REPORT TAKEN,4E66,4E53,235.0,447.0,1693.0,0


In [15]:
# Save File 
output_path = output_dir / "cleaned_CAD_diversions.csv"
filtered_CAD_data_diversions.to_csv(output_path, index=False)

print(f"Cleaned data saved to {output_path}")

Cleaned data saved to data\cleaned_data\cleaned_CAD_diversions.csv


# Police Replication Dataset

In [27]:
CAD_data = pd.read_csv("data/call_data_from_CAD.csv")

### Convert to DT

In [28]:
CAD_data["Call_Created_Time"] = pd.to_datetime(CAD_data['Call_Created_Time'], errors='coerce')
CAD_data["year"] = CAD_data["Call_Created_Time"].dt.year

In [29]:
# Standardize Cahoots identifiers 
cahoots_identifiers = r"1J77\s*|3J79\s*|3J78\s*|3J77\s*|4J79\s*|3J81\s*|3J76\s*|2J28\s*|2J29\s*|CAHOOT\s*|CAHOT\s*|CAHO\s*"

CAD_data["PrimaryUnitCallSign"] = CAD_data["PrimaryUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)
CAD_data["RespondingUnitCallSign"] = CAD_data["RespondingUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)
# Standardize Cahoots identifiers 
cahoots_identifiers = r"1J77\s*|3J79\s*|3J78\s*|3J77\s*|4J79\s*|3J81\s*|3J76\s*|2J28\s*|2J29\s*|CAHOOT\s*|CAHOT\s*|CAHO\s*"

CAD_data["PrimaryUnitCallSign"] = CAD_data["PrimaryUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)
CAD_data["RespondingUnitCallSign"] = CAD_data["RespondingUnitCallSign"].replace(cahoots_identifiers, 'CAHOOT', regex=True)

# Create an identifier for Cahoots involvement 
CAD_data['Cahoots_related'] = ((CAD_data['PrimaryUnitCallSign'] == 'CAHOOT') | (CAD_data['RespondingUnitCallSign'] == 'CAHOOT')).astype(int)

In [30]:
CAD_2021 = CAD_data[CAD_data["year"] == 2021].copy()
CAD_2021

Unnamed: 0.1,Unnamed: 0,IncidentNumber,Call_Created_Time,Call_First_Dispatched_Time,Call_First_On_Scene,Call_Cleared,Call_Zipcode,Beat,Call_Source,Call_Priority,InitialIncidentTypeDescription,IsPrimary,PrimaryUnitCallSign,RespondingUnitCallSign,Unit_Dispatched_Time,Unit_OnScene_Time,Unit_Cleared_Time,Disposition,year,Cahoots_related
2847,2848,OR-2021-01-01-21000001,2021-01-01 00:00:58,01/01/2021 00:22:41,,01/01/2021 00:22:47,97403.0,EP03,PHONE,5,BEAT INFORMATION,1,6E31,6E31,01/01/2021 00:22:41,,01/01/2021 00:22:47,INFORMATION ONLY,2021,0
2848,2849,OR-2021-01-01-21000002,2021-01-01 00:01:03,,,,97404.0,LS13,E911,5,ILLEGAL FIREWORKS,0,,,,,,RELAYED TO LANE COUNTY SHERIFFS OFFICE,2021,0
2849,2850,OR-2021-01-01-21000004,2021-01-01 00:01:48,01/01/2021 00:02:53,01/01/2021 00:06:38,01/01/2021 00:23:46,97402.0,EP05,W911,3,TRAFFIC HAZARD,1,4E53,4E53,01/01/2021 00:18:25,01/01/2021 00:18:25,01/01/2021 00:23:46,PATROL CHECK,2021,0
2850,2851,OR-2021-01-01-21000004,2021-01-01 00:01:48,01/01/2021 00:02:53,01/01/2021 00:06:38,01/01/2021 00:23:46,97402.0,EP05,W911,3,TRAFFIC HAZARD,0,4E53,5E47,01/01/2021 00:02:53,01/01/2021 00:06:38,01/01/2021 00:23:33,PATROL CHECK,2021,0
2851,2852,OR-2021-01-01-21000004,2021-01-01 00:01:48,01/01/2021 00:02:53,01/01/2021 00:06:38,01/01/2021 00:23:46,97402.0,EP05,W911,3,TRAFFIC HAZARD,0,4E53,CMD16,01/01/2021 00:07:49,01/01/2021 00:07:49,01/01/2021 00:14:38,PATROL CHECK,2021,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1616031,1616032,OR-2021-12-31-21336952,2021-12-31 23:51:41,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:31,97401.0,EP02,SELF,P,FIGHT,0,4F64,4E23,01/01/2022 00:02:03,01/01/2022 00:03:28,01/01/2022 00:18:31,CITED IN LIEU OF CUSTODY,2021,0
1616032,1616033,OR-2021-12-31-21336952,2021-12-31 23:51:41,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:31,97401.0,EP02,SELF,P,FIGHT,1,4F64,4F64,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:10,CITED IN LIEU OF CUSTODY,2021,0
1616033,1616034,OR-2021-12-31-21336952,2021-12-31 23:51:41,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:31,97401.0,EP02,SELF,P,FIGHT,0,4F64,4F65,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:10,CITED IN LIEU OF CUSTODY,2021,0
1616034,1616035,OR-2021-12-31-21336961,2021-12-31 23:59:50,,,,97402.0,EP05,E911,1,POISONING,0,,,,,,DISREGARD,2021,0


In [31]:
data_cleaned = CAD_2021.drop(columns=['Unnamed: 0'])

In [35]:
data_cleaned = data_cleaned.sort_values(by="Call_Created_Time")
output_dir = Path("data/cleaned_data")
output_dir.mkdir(parents=True, exist_ok=True)

# Save the cleaned DataFrame 
output_path = output_dir / "cleaned_CAD_data_2021.csv"
data_cleaned.to_csv(output_path, index=False)

print(f"Cleaned data saved to {output_path}")

Cleaned data saved to data\cleaned_data\cleaned_CAD_data_2021.csv


In [33]:
data_cleaned

Unnamed: 0,IncidentNumber,Call_Created_Time,Call_First_Dispatched_Time,Call_First_On_Scene,Call_Cleared,Call_Zipcode,Beat,Call_Source,Call_Priority,InitialIncidentTypeDescription,IsPrimary,PrimaryUnitCallSign,RespondingUnitCallSign,Unit_Dispatched_Time,Unit_OnScene_Time,Unit_Cleared_Time,Disposition,year,Cahoots_related
2847,OR-2021-01-01-21000001,2021-01-01 00:00:58,01/01/2021 00:22:41,,01/01/2021 00:22:47,97403.0,EP03,PHONE,5,BEAT INFORMATION,1,6E31,6E31,01/01/2021 00:22:41,,01/01/2021 00:22:47,INFORMATION ONLY,2021,0
2848,OR-2021-01-01-21000002,2021-01-01 00:01:03,,,,97404.0,LS13,E911,5,ILLEGAL FIREWORKS,0,,,,,,RELAYED TO LANE COUNTY SHERIFFS OFFICE,2021,0
2849,OR-2021-01-01-21000004,2021-01-01 00:01:48,01/01/2021 00:02:53,01/01/2021 00:06:38,01/01/2021 00:23:46,97402.0,EP05,W911,3,TRAFFIC HAZARD,1,4E53,4E53,01/01/2021 00:18:25,01/01/2021 00:18:25,01/01/2021 00:23:46,PATROL CHECK,2021,0
2850,OR-2021-01-01-21000004,2021-01-01 00:01:48,01/01/2021 00:02:53,01/01/2021 00:06:38,01/01/2021 00:23:46,97402.0,EP05,W911,3,TRAFFIC HAZARD,0,4E53,5E47,01/01/2021 00:02:53,01/01/2021 00:06:38,01/01/2021 00:23:33,PATROL CHECK,2021,0
2851,OR-2021-01-01-21000004,2021-01-01 00:01:48,01/01/2021 00:02:53,01/01/2021 00:06:38,01/01/2021 00:23:46,97402.0,EP05,W911,3,TRAFFIC HAZARD,0,4E53,CMD16,01/01/2021 00:07:49,01/01/2021 00:07:49,01/01/2021 00:14:38,PATROL CHECK,2021,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1616032,OR-2021-12-31-21336952,2021-12-31 23:51:41,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:31,97401.0,EP02,SELF,P,FIGHT,1,4F64,4F64,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:10,CITED IN LIEU OF CUSTODY,2021,0
1616030,OR-2021-12-31-21336952,2021-12-31 23:51:41,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:31,97401.0,EP02,SELF,P,FIGHT,0,4F64,4F63,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:10,CITED IN LIEU OF CUSTODY,2021,0
1616033,OR-2021-12-31-21336952,2021-12-31 23:51:41,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:31,97401.0,EP02,SELF,P,FIGHT,0,4F64,4F65,12/31/2021 23:51:41,12/31/2021 23:51:41,01/01/2022 00:18:10,CITED IN LIEU OF CUSTODY,2021,0
1616034,OR-2021-12-31-21336961,2021-12-31 23:59:50,,,,97402.0,EP05,E911,1,POISONING,0,,,,,,DISREGARD,2021,0
