In [10]:
# Import our dependencies
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import tensorflow as tf
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

#  Import and read the data 
violent_crime_df = pd.read_csv("Resources/NYPD_Complaint_Data.csv")
violent_crime_df.head()

Unnamed: 0,CMPLNT_NUM,ADDR_PCT_CD,BORO_NM,CMPLNT_FR_DT,CMPLNT_FR_TM,CMPLNT_TO_DT,CMPLNT_TO_TM,CRM_ATPT_CPTD_CD,HADEVELOPT,HOUSING_PSA,...,TRANSIT_DISTRICT,VIC_AGE_GROUP,VIC_RACE,VIC_SEX,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,Lat_Lon,New Georeferenced Column
0,854455675,48,,09/29/2021,12:50:00,,,COMPLETED,,,...,,<18,BLACK HISPANIC,M,1016838,250252,40.853507,-73.882203,"(40.85350696200004, -73.88220309599996)",POINT (-73.88220309599996 40.85350696200004)
1,474078722,79,,09/16/2021,05:15:00,,,COMPLETED,,,...,,25-44,BLACK,M,1001523,192971,40.696329,-73.937711,"(40.696328949000076, -73.93771061299998)",POINT (-73.93771061299998 40.696328949000076)
2,843162354,25,,09/15/2021,01:13:00,,,COMPLETED,,,...,,25-44,BLACK,M,1001508,232696,40.805364,-73.937663,"(40.80536404400005, -73.93766276299993)",POINT (-73.93766276299993 40.80536404400005)
3,275119984,47,,09/07/2021,19:35:00,,,COMPLETED,,,...,,25-44,BLACK,M,1023942,264415,40.892351,-73.85644,"(40.89235092600006, -73.85643985999997)",POINT (-73.85643985999997 40.89235092600006)
4,446379952,47,,09/03/2021,20:35:00,,,COMPLETED,,,...,,25-44,BLACK,M,1027062,262581,40.887303,-73.845167,"(40.88730259600004, -73.84516691)",POINT (-73.84516691 40.88730259600004)


In [11]:
# Drop Unneccessary Columns
violent_crime_df = violent_crime_df.drop(columns=["HADEVELOPT","HOUSING_PSA","PARKS_NM","STATION_NAME",
                                                      "TRANSIT_DISTRICT","X_COORD_CD","Y_COORD_CD",
                                                     "Lat_Lon","New Georeferenced Column"])

# Drop Rows with Null Values
violent_crime_df = violent_crime_df.dropna()

In [12]:
#Explore the Shape of the Data
violent_crime_df.shape

(201853, 27)

In [13]:
# See Value Counts for Offense Descriptions
violent_crime_df["OFNS_DESC"].value_counts()

HARRASSMENT 2                           41901
PETIT LARCENY                           39230
ASSAULT 3 & RELATED OFFENSES            24811
CRIMINAL MISCHIEF & RELATED OF          15857
GRAND LARCENY                           15245
FELONY ASSAULT                          11679
OFF. AGNST PUB ORD SENSBLTY &            9996
MISCELLANEOUS PENAL LAW                  7937
ROBBERY                                  6358
BURGLARY                                 6156
SEX CRIMES                               3349
GRAND LARCENY OF MOTOR VEHICLE           3227
OFFENSES AGAINST PUBLIC ADMINI           2966
DANGEROUS DRUGS                          2425
THEFT-FRAUD                              1308
DANGEROUS WEAPONS                        1171
CRIMINAL TRESPASS                        1122
RAPE                                      899
FRAUDS                                    880
VEHICLE AND TRAFFIC LAWS                  787
UNAUTHORIZED USE OF A VEHICLE             667
FORGERY                           

In [14]:
# Group violent crimes into list
violent_crimes = ["ASSAULT 3 & RELATED OFFENSES","FELONY ASSAULT","ROBBERY",
 "RAPE","HOMICIDE-NEGLIGENT,UNCLASSIFIE",
 "HOMICIDE-NEGLIGENT,UNCLASSIFIE"]

In [15]:
# Add violent crime column in dataframe
violent_crime_df["VIOLENT_CRIME"] = 0
violent_crime_df["VIOLENT_CRIME"]

50        0
51        0
53        0
56        0
57        0
         ..
323808    0
323810    0
323811    0
323814    0
323816    0
Name: VIOLENT_CRIME, Length: 201853, dtype: int64

In [16]:
# Add 1 if violent or keep 0 for non-violent crime
violent_crime_df.loc[(violent_crime_df.OFNS_DESC == "ASSAULT 3 & RELATED OFFENSES") |
                     (violent_crime_df.OFNS_DESC == "FELONY ASSAULT") |
                     (violent_crime_df.OFNS_DESC == "ROBBERY") |
                     (violent_crime_df.OFNS_DESC == "RAPE") |
                     (violent_crime_df.OFNS_DESC == "HOMICIDE-NEGLIGENT,UNCLASSIFIE") |
                     (violent_crime_df.OFNS_DESC == "HOMICIDE-NEGLIGENT,UNCLASSIFIE"), "VIOLENT_CRIME"] = 1
violent_crime_df[["OFNS_DESC", "VIOLENT_CRIME"]].head(10)

Unnamed: 0,OFNS_DESC,VIOLENT_CRIME
50,OFFENSES AGAINST PUBLIC ADMINI,0
51,ASSAULT 3 & RELATED OFFENSES,1
53,FELONY ASSAULT,1
56,ASSAULT 3 & RELATED OFFENSES,1
57,ROBBERY,1
59,PETIT LARCENY,0
60,SEX CRIMES,0
62,GRAND LARCENY,0
63,HARRASSMENT 2,0
64,CRIMINAL MISCHIEF & RELATED OF,0


# Clean victim and suspect age ranges

In [17]:
# Remove Age Values for victims That don't make sense
violent_crime_df = violent_crime_df.loc[((violent_crime_df['VIC_AGE_GROUP'] == "<18") |
                                            (violent_crime_df['VIC_AGE_GROUP'] == "18-24") |
                                            (violent_crime_df['VIC_AGE_GROUP'] == "25-44") |
                                            (violent_crime_df['VIC_AGE_GROUP'] == "45-64") |
                                           (violent_crime_df['VIC_AGE_GROUP'] == "65+"))]

In [20]:
# Remove Age Values for Suspects That don't make sense
violent_crime_df = violent_crime_df.loc[((violent_crime_df['SUSP_AGE_GROUP'] == "<18") |
                                            (violent_crime_df['SUSP_AGE_GROUP'] == "18-24") |
                                            (violent_crime_df['SUSP_AGE_GROUP'] == "25-44") |
                                            (violent_crime_df['SUSP_AGE_GROUP'] == "45-64") |
                                           (violent_crime_df['SUSP_AGE_GROUP'] == "65+"))]

In [18]:
violent_crime_df['VIC_AGE_GROUP'].value_counts()

25-44    77405
45-64    40460
18-24    19001
65+       9390
<18       6196
Name: VIC_AGE_GROUP, dtype: int64

In [21]:
violent_crime_df['SUSP_AGE_GROUP'].value_counts()

25-44    48701
45-64    16964
18-24    12941
<18       2722
65+       2188
Name: SUSP_AGE_GROUP, dtype: int64

# Export to CSV

In [None]:
# Write new dataframe to CSV
violent_crime_df.to_csv("./Resources/violent_crime_v2.csv")