In [163]:
#import the libraries
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt 
import warnings 
warnings.filterwarnings("ignore")
import re

#import sklearn libraries
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.stats import randint
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report,roc_auc_score, roc_curve

In [164]:
#load and read the dataset
df = pd.read_csv("Terry_Stops_20250909.csv")

#check the first five rows of the dataset
df.head()

Unnamed: 0,Subject Age Group,Subject ID,GO / SC Num,Terry Stop ID,Stop Resolution,Weapon Type,Officer ID,Officer YOB,Officer Gender,Officer Race,...,Reported Time,Initial Call Type,Final Call Type,Call Type,Officer Squad,Arrest Flag,Frisk Flag,Precinct,Sector,Beat
0,26 - 35,-1,20170000036835,234548,Offense Report,,4852,1953,Male,Asian,...,18:36:00.0000000,DISTURBANCE,THEFT - SHOPLIFT,911,NORTH PCT 2ND W - LINCOLN - PLATOON 1,N,N,North,L,L3
1,46 - 55,-1,20180000275629,481899,Field Contact,,8544,1993,Female,Hispanic,...,13:47:00.0000000,-,-,-,NORTH PCT 2ND WATCH - B/N RELIEF,N,N,West,Q,Q2
2,36 - 45,49326761681,20230000118635,49327076666,Field Contact,Knife/Cutting/Stabbing Instrument,7766,1984,Male,White,...,07:25:58.0000000,OBS - DOWN - CHECK FOR PERSON DOWN,SUSPICIOUS CIRCUM. - SUSPICIOUS PERSON,ONVIEW,WEST PCT 1ST W - QUEEN (DAVID) - PLATOON 1,N,Y,West,K,K3
3,36 - 45,53986235598,20240000029589,53986202139,Field Contact,-,8723,1994,Male,White,...,02:50:52.0000000,SUSPICIOUS STOP - OFFICER INITIATED ONVIEW,DISTURBANCE - OTHER,ONVIEW,WEST PCT 3RD W - KING - PLATOON 1,N,N,West,D,D2
4,18 - 25,-1,20150000002928,54115,Field Contact,,7745,1988,Female,Declined to Answer,...,00:22:00.0000000,-,-,-,SOUTH PCT 3RD W - SAM - PLATOON 2,N,N,-,-,-


In [165]:
#check the last rows of the dataset
df.tail()

Unnamed: 0,Subject Age Group,Subject ID,GO / SC Num,Terry Stop ID,Stop Resolution,Weapon Type,Officer ID,Officer YOB,Officer Gender,Officer Race,...,Reported Time,Initial Call Type,Final Call Type,Call Type,Officer Squad,Arrest Flag,Frisk Flag,Precinct,Sector,Beat
64732,18 - 25,-1,20170000325937,354197,Offense Report,,7755,1971,Male,White,...,22:58:00.0000000,-,-,-,WEST PCT 3RD W - KING - PLATOON 1,N,N,West,M,M2
64733,26 - 35,-1,20190000122094,549491,Arrest,,7489,1985,Female,White,...,13:01:00.0000000,SUSPICIOUS STOP - OFFICER INITIATED ONVIEW,WARRANT SERVICES - FELONY,ONVIEW,NORTH PCT OPS - ACT DAY,N,N,North,N,N3
64734,36 - 45,-1,20180000467077,512455,Arrest,,7758,1987,Male,White,...,04:35:00.0000000,OBS - BURG - IP/JO - COMM BURG (INCLUDES SCHOOLS),NARCOTICS - NARS REPORT,911,EAST PCT 1ST W - E/G RELIEF (CHARLIE),N,N,East,E,E1
64735,26 - 35,-1,20190000059044,532813,Offense Report,,7649,1986,Male,White,...,14:35:00.0000000,ASLT - CRITICAL (NO SHOOTINGS),"ASSAULTS, OTHER",911,SOUTH PCT 2ND W - R/S RELIEF,N,N,South,S,S1
64736,18 - 25,-1,20180000001424,412377,Field Contact,,8558,1993,Male,White,...,14:10:00.0000000,-,-,-,TRAINING - FIELD TRAINING SQUAD,N,N,-,-,-


In [166]:
#check the shape of the dataset
df.shape

(64737, 23)

In [167]:
#check the data types of each column
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64737 entries, 0 to 64736
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Subject Age Group         64737 non-null  object
 1   Subject ID                64737 non-null  int64 
 2   GO / SC Num               64737 non-null  int64 
 3   Terry Stop ID             64737 non-null  int64 
 4   Stop Resolution           64737 non-null  object
 5   Weapon Type               64737 non-null  object
 6   Officer ID                64737 non-null  object
 7   Officer YOB               64737 non-null  int64 
 8   Officer Gender            64737 non-null  object
 9   Officer Race              64737 non-null  object
 10  Subject Perceived Race    64737 non-null  object
 11  Subject Perceived Gender  64737 non-null  object
 12  Reported Date             64737 non-null  object
 13  Reported Time             64737 non-null  object
 14  Initial Call Type     

In [168]:
#check statistical summary for numerical columns
df.describe()

Unnamed: 0,Subject ID,GO / SC Num,Terry Stop ID,Officer YOB
count,64737.0,64737.0,64737.0,64737.0
mean,8993283000.0,20190080000000.0,15077240000.0,1984.46709
std,15383340000.0,84392610000.0,20699830000.0,9.189523
min,-8.0,-1.0,28020.0,1900.0
25%,-1.0,20170000000000.0,254737.0,1980.0
50%,-1.0,20190000000000.0,534713.0,1986.0
75%,7803391000.0,20210000000000.0,26674220000.0,1991.0
max,65811260000.0,20250000000000.0,65811200000.0,2003.0


In [169]:
#check statistical summary for categorical columns
df.describe(include="object").T

Unnamed: 0,count,unique,top,freq
Subject Age Group,64737,7,26 - 35,21576
Stop Resolution,64737,5,Field Contact,32234
Weapon Type,64737,23,,32565
Officer ID,64737,1435,7456,551
Officer Gender,64737,3,Male,57366
Officer Race,64737,9,White,46063
Subject Perceived Race,64737,11,White,31649
Subject Perceived Gender,64737,7,Male,51093
Reported Date,64737,3829,2015-10-01T00:00:00,101
Reported Time,64737,26254,03:09:00.0000000,52


In [170]:
#check unique values
for coln in df:
    uni_value = df[coln].unique()
    print(f" {coln}\n, {uni_value}\n")

 Subject Age Group
, ['26 - 35' '46 - 55' '36 - 45' '18 - 25' '-' '56 and Above' '1 - 17']

 Subject ID
, [         -1 49326761681 53986235598 ... 12282720243 15947861935
  7751138304]

 GO / SC Num
, [20170000036835 20180000275629 20230000118635 ... 20180000467077
 20190000059044 20180000001424]

 Terry Stop ID
, [     234548      481899 49327076666 ...      512455      532813
      412377]

 Stop Resolution
, ['Offense Report' 'Field Contact' 'Arrest' 'Referred for Prosecution'
 'Citation / Infraction']

 Weapon Type
, ['None' 'Knife/Cutting/Stabbing Instrument' '-'
 'Lethal Cutting Instrument' 'Handgun' 'Blunt Object/Striking Implement'
 'Firearm' 'Mace/Pepper Spray' 'Club' 'Club, Blackjack, Brass Knuckles'
 'Firearm Other' 'Fire/Incendiary Device' 'Firearm (unk type)'
 'None/Not Applicable' 'Other Firearm' 'Taser/Stun Gun'
 'Automatic Handgun' 'Shotgun' 'Rifle' 'Poison'
 'Personal Weapons (hands, feet, etc.)' 'Blackjack' 'Brass Knuckles']

 Officer ID
, ['4852' '8544' '7766' ... '5

In [171]:
#check value counts for each column
for coln in df:
    value_count = df[coln].value_counts()
    print(f"{coln}:\n{value_count}\n")

Subject Age Group:
26 - 35         21576
36 - 45         14705
18 - 25         12081
46 - 55          8189
56 and Above     3426
1 - 17           2392
-                2368
Name: Subject Age Group, dtype: int64

Subject ID:
-1              35221
 7753260438        30
 7774286580        22
 21375848115       22
 7726918259        22
                ...  
 7727594916         1
 7731121570         1
 37999033568        1
 31734906260        1
 16219707395        1
Name: Subject ID, Length: 18941, dtype: int64

GO / SC Num:
20150000190790    16
20160000378750    16
20240000319277    15
20230000049052    14
20180000134604    14
                  ..
20200000067947     1
20170000181613     1
20160000420117     1
20210000140555     1
20180000000296     1
Name: GO / SC Num, Length: 52005, dtype: int64

Terry Stop ID:
27511831225    3
36014210659    3
55477887782    3
19324329995    3
15045077325    3
              ..
499161         1
103896         1
12715288022    1
40982005202    1
131072    


Precinct:
West         18416
North        13464
-            10822
East          8871
South         7833
Southwest     4989
Unknown        200
OOJ            120
FK ERROR        22
Name: Precinct, dtype: int64

Sector:
-      10973
K       6495
M       5539
E       4677
N       3850
D       3644
R       3029
F       2974
B       2847
Q       2734
L       2605
O       2442
S       2363
U       2361
G       2291
W       2010
C       1901
J       1799
99       138
OOJ       65
Name: Sector, dtype: int64

Beat:
-      10967
K3      3901
M3      2756
N3      1944
E2      1942
E1      1581
R2      1455
D1      1452
N2      1440
K2      1433
M2      1403
M1      1383
D2      1342
Q3      1309
F2      1237
K1      1161
E3      1152
B2      1085
U2      1074
B1      1060
O1       984
S2       918
L2       905
F3       877
F1       860
L1       852
D3       850
L3       848
G2       847
W2       846
R1       846
U1       809
Q2       794
S3       786
G3       782
C1       777
O3       775
R3   

In [172]:
#Make a copy of the original dataset
df1 = df.copy(deep=True)
df1.head()

Unnamed: 0,Subject Age Group,Subject ID,GO / SC Num,Terry Stop ID,Stop Resolution,Weapon Type,Officer ID,Officer YOB,Officer Gender,Officer Race,...,Reported Time,Initial Call Type,Final Call Type,Call Type,Officer Squad,Arrest Flag,Frisk Flag,Precinct,Sector,Beat
0,26 - 35,-1,20170000036835,234548,Offense Report,,4852,1953,Male,Asian,...,18:36:00.0000000,DISTURBANCE,THEFT - SHOPLIFT,911,NORTH PCT 2ND W - LINCOLN - PLATOON 1,N,N,North,L,L3
1,46 - 55,-1,20180000275629,481899,Field Contact,,8544,1993,Female,Hispanic,...,13:47:00.0000000,-,-,-,NORTH PCT 2ND WATCH - B/N RELIEF,N,N,West,Q,Q2
2,36 - 45,49326761681,20230000118635,49327076666,Field Contact,Knife/Cutting/Stabbing Instrument,7766,1984,Male,White,...,07:25:58.0000000,OBS - DOWN - CHECK FOR PERSON DOWN,SUSPICIOUS CIRCUM. - SUSPICIOUS PERSON,ONVIEW,WEST PCT 1ST W - QUEEN (DAVID) - PLATOON 1,N,Y,West,K,K3
3,36 - 45,53986235598,20240000029589,53986202139,Field Contact,-,8723,1994,Male,White,...,02:50:52.0000000,SUSPICIOUS STOP - OFFICER INITIATED ONVIEW,DISTURBANCE - OTHER,ONVIEW,WEST PCT 3RD W - KING - PLATOON 1,N,N,West,D,D2
4,18 - 25,-1,20150000002928,54115,Field Contact,,7745,1988,Female,Declined to Answer,...,00:22:00.0000000,-,-,-,SOUTH PCT 3RD W - SAM - PLATOON 2,N,N,-,-,-


In [173]:
#Check the column names
df1.columns

Index(['Subject Age Group', 'Subject ID', 'GO / SC Num', 'Terry Stop ID',
       'Stop Resolution', 'Weapon Type', 'Officer ID', 'Officer YOB',
       'Officer Gender', 'Officer Race', 'Subject Perceived Race',
       'Subject Perceived Gender', 'Reported Date', 'Reported Time',
       'Initial Call Type', 'Final Call Type', 'Call Type', 'Officer Squad',
       'Arrest Flag', 'Frisk Flag', 'Precinct', 'Sector', 'Beat'],
      dtype='object')

In [174]:
#Rename the columns to remove whitespaces and convert the column names to lowercase
df1.columns = df1.columns.str.replace(" ", "_").str.lower()
df1.columns

Index(['subject_age_group', 'subject_id', 'go_/_sc_num', 'terry_stop_id',
       'stop_resolution', 'weapon_type', 'officer_id', 'officer_yob',
       'officer_gender', 'officer_race', 'subject_perceived_race',
       'subject_perceived_gender', 'reported_date', 'reported_time',
       'initial_call_type', 'final_call_type', 'call_type', 'officer_squad',
       'arrest_flag', 'frisk_flag', 'precinct', 'sector', 'beat'],
      dtype='object')

In [175]:
#Strip spaces from all string columns
df1 = df1.apply(lambda col: col.str.strip() if col.dtype == "object" else col)

In [176]:
def replace_dash_with_unknown(df1):
    """
    Replaces dash-like values ('-', '–', '—')  
    across all object columns in the DataFrame.
    """
    
    # Replace dash-like values with 'Unknown'
    df1 = df1.replace(r"^[-–—]$", "Unknown", regex=True)
    
    return df1

df1 = replace_dash_with_unknown(df1)

In [177]:
df1["weapon_type"] = df1["weapon_type"].replace(
    {
        "Firearm": "Other Firearm",
        "Firearm Other": "Other Firearm",
        "Firearm (unk type)": "Other Firearm",
        "Lethal Cutting Instrument": "Knife/Cutting/Stabbing Instrument", 
        "None/Not Applicable": "None",
        "Automatic Handgun":"Handgun",
        "Club": "Blunt Object/Striking Implement",
        "Blackjack": "Blunt Object/Striking Implement",
        "Brass Knuckles": "Blunt Object/Striking Implement",
        "Club, Blackjack, Brass Knuckles": "Blunt Object/Striking Implement"
    }
)
df1["weapon_type"].value_counts()

None                                    32586
Unknown                                 28035
Knife/Cutting/Stabbing Instrument        2976
Handgun                                   433
Other Firearm                             331
Blunt Object/Striking Implement           259
Mace/Pepper Spray                          64
Taser/Stun Gun                             20
Fire/Incendiary Device                     13
Rifle                                      11
Shotgun                                     6
Personal Weapons (hands, feet, etc.)        2
Poison                                      1
Name: weapon_type, dtype: int64

In [178]:
df1["officer_race"] = df1["officer_race"].replace("Declined to Answer", "Unknown")
df1["officer_race"].value_counts()

White                                        46063
Two or More Races                             4679
Hispanic                                      4304
Asian                                         3320
Unknown                                       2943
Black or African American                     2582
Native Hawaiian or Other Pacific Islander      599
American Indian or Alaska Native               247
Name: officer_race, dtype: int64

In [179]:
df1["subject_perceived_race"] = df1["subject_perceived_race"].replace("MULTIPLE SUBJECTS", "Other")       
df1["subject_perceived_race"].value_counts()

White                                        31649
Black or African American                    19516
Unknown                                       6763
Asian                                         2231
American Indian or Alaska Native              1746
Hispanic                                      1684
Multi-Racial                                   809
Native Hawaiian or Other Pacific Islander      186
Other                                          153
Name: subject_perceived_race, dtype: int64

In [180]:
df1["subject_perceived_gender"] = df1["subject_perceived_gender"].replace(
    {
    "Unable to Determine": "Unknown",
     "MULTIPLE SUBJECTS": "Unknown"  
    }
)
df1["subject_perceived_gender"].value_counts()

Male                                                         51093
Female                                                       12890
Unknown                                                        689
Gender Diverse (gender non-conforming and/or transgender)       65
Name: subject_perceived_gender, dtype: int64

In [181]:
df1["precinct"] = df1["precinct"].replace("FK ERROR", "Unknown")       
df1["precinct"].value_counts()

West         18416
North        13464
Unknown      11044
East          8871
South         7833
Southwest     4989
OOJ            120
Name: precinct, dtype: int64

In [182]:
#check for duplicates
df1.duplicated().sum()

1

In [183]:
#check value counts for each column
for coln in df1:
    value_count = df1[coln].value_counts()
    print(f"{coln}:\n{value_count}\n")

subject_age_group:
26 - 35         21576
36 - 45         14705
18 - 25         12081
46 - 55          8189
56 and Above     3426
1 - 17           2392
Unknown          2368
Name: subject_age_group, dtype: int64

subject_id:
-1              35221
 7753260438        30
 7774286580        22
 21375848115       22
 7726918259        22
                ...  
 7727594916         1
 7731121570         1
 37999033568        1
 31734906260        1
 16219707395        1
Name: subject_id, Length: 18941, dtype: int64

go_/_sc_num:
20150000190790    16
20160000378750    16
20240000319277    15
20230000049052    14
20180000134604    14
                  ..
20200000067947     1
20170000181613     1
20160000420117     1
20210000140555     1
20180000000296     1
Name: go_/_sc_num, Length: 52005, dtype: int64

terry_stop_id:
27511831225    3
36014210659    3
55477887782    3
19324329995    3
15045077325    3
              ..
499161         1
103896         1
12715288022    1
40982005202    1
131072    

In [195]:
import pandas as pd

def clean_reported_time(df1, time_col="reported_time"):
    """
    Cleans a reported_time column by:
    1. Stripping spaces
    2. Replacing dash or invalid placeholders with NaN
    3. Converting to datetime
    4. Removing microseconds (floor to seconds)
    5. Keeping only the time component
    """
    # Replace dash-like missing values with NaN
    df1[time_col] = df1[time_col].replace(["-", "", " "], pd.NA)
    
    # Strip leading/trailing spaces
    df1[time_col] = df1[time_col].astype(str).str.strip()
    
    # Convert to datetime, coerce errors to NaT
    df1[time_col] = pd.to_datetime(df1[time_col], format="%H:%M:%S.%f", errors="coerce")
    
    # Remove microseconds (round down to nearest second)
    df1[time_col] = df1[time_col].dt.floor("S")
    
    # Keep only the time component
    df1[time_col] = df1[time_col].dt.time
    
    return df1

# Usage
df1 = clean_reported_time(df1)


In [184]:
#check for null values
df1.isna().sum()

subject_age_group             0
subject_id                    0
go_/_sc_num                   0
terry_stop_id                 0
stop_resolution               0
weapon_type                   0
officer_id                    0
officer_yob                   0
officer_gender                0
officer_race                  0
subject_perceived_race        0
subject_perceived_gender      0
reported_date                 0
reported_time                 0
initial_call_type             0
final_call_type               0
call_type                     0
officer_squad               569
arrest_flag                   0
frisk_flag                    0
precinct                      0
sector                        0
beat                          0
dtype: int64

In [189]:
# Show rows where officer_squad is null
df1[df1["officer_squad"].isna()]


Unnamed: 0,subject_age_group,subject_id,go_/_sc_num,terry_stop_id,stop_resolution,weapon_type,officer_id,officer_yob,officer_gender,officer_race,...,initial_call_type,final_call_type,call_type,officer_squad,arrest_flag,frisk_flag,precinct,sector,beat,year
142,26 - 35,-1,20150000111374,33538,Offense Report,,7521,1986,Female,White,...,DISTURBANCE - DV CRITICAL,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",911,,N,N,east,G,G2,2015
336,36 - 45,-1,20160000000012,114952,Field Contact,,-9,1900,Non-Specified,Unknown,...,Unknown,Unknown,Unknown,,N,N,north,N,N3,2016
370,46 - 55,7743686531,20190000354957,10396648540,Field Contact,Unknown,Unknown,1900,Non-Specified,Unknown,...,Unknown,Unknown,Unknown,,N,N,east,E,E3,2019
388,46 - 55,-1,20190000120272,548421,Offense Report,,8328,1985,Male,White,...,UNKNOWN - COMPLAINT OF UNKNOWN NATURE,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",ONVIEW,,N,N,east,E,E2,2019
432,36 - 45,8597903457,20190000243853,8597867579,Field Contact,Unknown,5469,1967,Male,White,...,TRESPASS,SUSPICIOUS CIRCUM. - SUSPICIOUS PERSON,ONVIEW,,N,N,unknown,Unknown,Unknown,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63557,26 - 35,21514169827,20210000049063,21514152257,Field Contact,Unknown,X647,1956,Male,White,...,Unknown,Unknown,Unknown,,N,N,unknown,Unknown,Unknown,2021
63826,36 - 45,7734473862,20230000361937,53439369124,Field Contact,Unknown,7932,1991,Male,White,...,"NARCOTICS - VIOLATIONS (LOITER, USE, SELL, NARS)",NARCOTICS - OTHER,ONVIEW,,N,N,west,M,M3,2023
63928,1 - 17,-1,20150000147414,41108,Offense Report,,7745,1988,Female,Unknown,...,ASLT - DV CRITICAL,"ASSAULTS, OTHER",911,,N,N,east,G,G2,2015
64318,46 - 55,9638620853,20200000326941,17954968854,Field Contact,Unknown,7722,1989,Male,White,...,FOLLOW UP,SUSPICIOUS CIRCUM. - SUSPICIOUS PERSON,ONVIEW,,N,Y,west,K,K3,2020


In [185]:
df1[(df1["subject_id"] == -1) & (df1["weapon_type"] == "None")]


Unnamed: 0,subject_age_group,subject_id,go_/_sc_num,terry_stop_id,stop_resolution,weapon_type,officer_id,officer_yob,officer_gender,officer_race,...,reported_time,initial_call_type,final_call_type,call_type,officer_squad,arrest_flag,frisk_flag,precinct,sector,beat
0,26 - 35,-1,20170000036835,234548,Offense Report,,4852,1953,Male,Asian,...,18:36:00.0000000,DISTURBANCE,THEFT - SHOPLIFT,911,NORTH PCT 2ND W - LINCOLN - PLATOON 1,N,N,North,L,L3
1,46 - 55,-1,20180000275629,481899,Field Contact,,8544,1993,Female,Hispanic,...,13:47:00.0000000,Unknown,Unknown,Unknown,NORTH PCT 2ND WATCH - B/N RELIEF,N,N,West,Q,Q2
4,18 - 25,-1,20150000002928,54115,Field Contact,,7745,1988,Female,Unknown,...,00:22:00.0000000,Unknown,Unknown,Unknown,SOUTH PCT 3RD W - SAM - PLATOON 2,N,N,Unknown,Unknown,Unknown
5,18 - 25,-1,20160000273397,180515,Offense Report,,6968,1981,Male,White,...,14:05:00.0000000,DISTURBANCE,DISTURBANCE - OTHER,911,SOUTHWEST PCT 2ND W - FRANK - PLATOON 2,N,Y,Southwest,F,F1
6,Unknown,-1,20160000465080,224398,Offense Report,,7700,1990,Male,White,...,19:16:00.0000000,"OBS WEAPN-IP/JO-GUN,DEADLY WPN (NO THRT/ASLT/D...",WARRANT SERVICES - MISDEMEANOR,911,SOUTH PCT 2ND W - ROBERT - PLATOON 2,N,Y,South,R,R2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64732,18 - 25,-1,20170000325937,354197,Offense Report,,7755,1971,Male,White,...,22:58:00.0000000,Unknown,Unknown,Unknown,WEST PCT 3RD W - KING - PLATOON 1,N,N,West,M,M2
64733,26 - 35,-1,20190000122094,549491,Arrest,,7489,1985,Female,White,...,13:01:00.0000000,SUSPICIOUS STOP - OFFICER INITIATED ONVIEW,WARRANT SERVICES - FELONY,ONVIEW,NORTH PCT OPS - ACT DAY,N,N,North,N,N3
64734,36 - 45,-1,20180000467077,512455,Arrest,,7758,1987,Male,White,...,04:35:00.0000000,OBS - BURG - IP/JO - COMM BURG (INCLUDES SCHOOLS),NARCOTICS - NARS REPORT,911,EAST PCT 1ST W - E/G RELIEF (CHARLIE),N,N,East,E,E1
64735,26 - 35,-1,20190000059044,532813,Offense Report,,7649,1986,Male,White,...,14:35:00.0000000,ASLT - CRITICAL (NO SHOOTINGS),"ASSAULTS, OTHER",911,SOUTH PCT 2ND W - R/S RELIEF,N,N,South,S,S1


In [186]:
df1[(df1["subject_id"] == -1) & ~(df1["weapon_type"] == "None")]


Unnamed: 0,subject_age_group,subject_id,go_/_sc_num,terry_stop_id,stop_resolution,weapon_type,officer_id,officer_yob,officer_gender,officer_race,...,reported_time,initial_call_type,final_call_type,call_type,officer_squad,arrest_flag,frisk_flag,precinct,sector,beat
15,26 - 35,-1,20170000064956,241073,Arrest,Knife/Cutting/Stabbing Instrument,7634,1977,Male,White,...,12:56:00.0000000,"SUSPICIOUS PERSON, VEHICLE, OR INCIDENT",WARRANT SERVICES - FELONY,911,WEST PCT 1ST W - DAVID - PLATOON 1,N,Y,West,K,K2
24,26 - 35,-1,20190000114043,546950,Arrest,Knife/Cutting/Stabbing Instrument,7575,1985,Male,White,...,15:37:00.0000000,DISTURBANCE - DV CRITICAL,DV - DOMESTIC VIOL/ASLT (ARREST MANDATORY),911,WEST PCT 2ND W - D/M RELIEF,N,Y,West,D,D3
28,36 - 45,-1,20180000419918,500809,Offense Report,Knife/Cutting/Stabbing Instrument,5489,1964,Male,White,...,02:20:00.0000000,UNKNOWN - ANI/ALI - WRLS PHNS (INCL OPEN LINE),CRISIS COMPLAINT - GENERAL,911,NORTH PCT 3RD W - B/N RELIEF,N,Y,North,B,B1
35,Unknown,-1,20210000064922,21923094829,Field Contact,Unknown,8788,1986,Male,Unknown,...,03:17:49.0000000,ASLT - PERSON SHOT OR SHOT AT,AUTOMOBILES - AUTO THEFT & RECOVERY,911,TRAINING - FIELD TRAINING SQUAD,N,N,Southwest,F,F1
49,18 - 25,-1,20170000190550,269774,Arrest,Handgun,7761,1986,Male,White,...,06:17:00.0000000,"OBS WEAPN-IP/JO-GUN,DEADLY WPN (NO THRT/ASLT/D...","WEAPON, PERSON WITH - GUN",911,EAST PCT 1ST W - E/G RELIEF (CHARLIE),N,Y,East,G,G3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64675,26 - 35,-1,20170000323532,303776,Referred for Prosecution,Knife/Cutting/Stabbing Instrument,8426,1987,Male,White,...,19:31:00.0000000,OBS - DOWN - CHECK FOR PERSON DOWN,"WEAPON,PERSON WITH - OTHER WEAPON",911,NORTH PCT 2ND WATCH - B/N RELIEF,N,N,North,N,N3
64685,26 - 35,-1,20190000000889,538130,Field Contact,Knife/Cutting/Stabbing Instrument,7793,1986,Male,White,...,16:53:00.0000000,Unknown,Unknown,Unknown,SOUTHWEST PCT 2ND W - FRANK - PLATOON 2,N,Y,Unknown,Unknown,Unknown
64702,Unknown,-1,20250000133817,64032315733,Offense Report,Unknown,9020,1900,Male,Unknown,...,11:12:50.0000000,ASLT - DV CRITICAL,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",911,TRAINING - FIELD TRAINING SQUAD,N,N,West,K,K3
64713,26 - 35,-1,20190000048136,530293,Arrest,Knife/Cutting/Stabbing Instrument,5360,1966,Male,Asian,...,23:56:00.0000000,"OBS WEAPN-IP/JO-GUN,DEADLY WPN (NO THRT/ASLT/D...","WEAPON,PERSON WITH - OTHER WEAPON",911,WEST PCT 3RD W - K/Q RELIEF,N,Y,West,M,M2


In [187]:
# Convert officer_squad and precinct to lowercase
df1["officer_squad"] = df1["officer_squad"].str.lower()
df1["precinct"] = df1["precinct"].str.lower()

# Extract the first word from officer_squad
first_word = df1["officer_squad"].str.split().str[0]

# Compare with precinct and count matches
match_count = (first_word == df1["precinct"]).sum()

print("Number of matching rows:", match_count)


Number of matching rows: 42403


In [188]:
df1["year"] = df1["go_/_sc_num"].astype(str).str[:4]
df1.head()

Unnamed: 0,subject_age_group,subject_id,go_/_sc_num,terry_stop_id,stop_resolution,weapon_type,officer_id,officer_yob,officer_gender,officer_race,...,initial_call_type,final_call_type,call_type,officer_squad,arrest_flag,frisk_flag,precinct,sector,beat,year
0,26 - 35,-1,20170000036835,234548,Offense Report,,4852,1953,Male,Asian,...,DISTURBANCE,THEFT - SHOPLIFT,911,north pct 2nd w - lincoln - platoon 1,N,N,north,L,L3,2017
1,46 - 55,-1,20180000275629,481899,Field Contact,,8544,1993,Female,Hispanic,...,Unknown,Unknown,Unknown,north pct 2nd watch - b/n relief,N,N,west,Q,Q2,2018
2,36 - 45,49326761681,20230000118635,49327076666,Field Contact,Knife/Cutting/Stabbing Instrument,7766,1984,Male,White,...,OBS - DOWN - CHECK FOR PERSON DOWN,SUSPICIOUS CIRCUM. - SUSPICIOUS PERSON,ONVIEW,west pct 1st w - queen (david) - platoon 1,N,Y,west,K,K3,2023
3,36 - 45,53986235598,20240000029589,53986202139,Field Contact,Unknown,8723,1994,Male,White,...,SUSPICIOUS STOP - OFFICER INITIATED ONVIEW,DISTURBANCE - OTHER,ONVIEW,west pct 3rd w - king - platoon 1,N,N,west,D,D2,2024
4,18 - 25,-1,20150000002928,54115,Field Contact,,7745,1988,Female,Unknown,...,Unknown,Unknown,Unknown,south pct 3rd w - sam - platoon 2,N,N,unknown,Unknown,Unknown,2015
