# Crime Data 2021

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, date

In [3]:
#Import csv
raw2021 = pd.read_csv('C:/Users/hanna/Documents/CSV/CRIMESDATA/2021-PART_I_AND_II_CRIMES.csv', sep = ',')

In [4]:
#Rows and columns
raw2021.shape

(141800, 19)

In [5]:
#Data Types
raw2021.dtypes

LURN_SAK                    int64
INCIDENT_DATE              object
INCIDENT_REPORTED_DATE     object
CATEGORY                   object
STAT                        int64
STAT_DESC                  object
ADDRESS                    object
STREET                     object
CITY                       object
ZIP                       float64
INCIDENT_ID                object
REPORTING_DISTRICT          int64
SEQ                         int64
GANG_RELATED               object
UNIT_ID                    object
UNIT_NAME                  object
LONGITUDE                 float64
LATITUDE                  float64
PART_CATEGORY               int64
dtype: object

In [6]:
#Convert to datetime
raw2021['INCIDENT_DATE'] = pd.to_datetime(raw2021['INCIDENT_DATE'], format = '%m/%d/%Y %I:%M:%S %p')
raw2021['INCIDENT_REPORTED_DATE'] = pd.to_datetime(raw2021['INCIDENT_REPORTED_DATE'], format = '%m/%d/%Y')

#Sort by Incident_Date, Incident_Reported_Date, ID
raw2021 = raw2021.sort_values(by=['INCIDENT_DATE', 'INCIDENT_REPORTED_DATE', 'INCIDENT_ID'], ascending=[True, True, True])
raw2021.dtypes

LURN_SAK                           int64
INCIDENT_DATE             datetime64[ns]
INCIDENT_REPORTED_DATE    datetime64[ns]
CATEGORY                          object
STAT                               int64
STAT_DESC                         object
ADDRESS                           object
STREET                            object
CITY                              object
ZIP                              float64
INCIDENT_ID                       object
REPORTING_DISTRICT                 int64
SEQ                                int64
GANG_RELATED                      object
UNIT_ID                           object
UNIT_NAME                         object
LONGITUDE                        float64
LATITUDE                         float64
PART_CATEGORY                      int64
dtype: object

In [7]:
#Filter by Q1/Q2 2021
start_date = '2020-12-31 23:59:59'
end_date = '2021-06-30 23:59:59'

mask = (raw2021['INCIDENT_DATE'] > start_date) & (raw2021['INCIDENT_DATE'] <= end_date)
s2021 = raw2021.loc[mask]
s2021.reset_index(drop=True)

Unnamed: 0,LURN_SAK,INCIDENT_DATE,INCIDENT_REPORTED_DATE,CATEGORY,STAT,STAT_DESC,ADDRESS,STREET,CITY,ZIP,INCIDENT_ID,REPORTING_DISTRICT,SEQ,GANG_RELATED,UNIT_ID,UNIT_NAME,LONGITUDE,LATITUDE,PART_CATEGORY
0,19467967,2021-01-01 00:00:00,2021-01-01,VANDALISM,263,VANDALISM FELONY,"11000 LOUISE AVE, LYNWOOD, CA 90262",11000 LOUISE AVE,LYNWOOD,90262.0,921-00003-2115,2115,3,NO,CA01900V3,CENTURY,-118.180,33.927,2
1,19468111,2021-01-01 00:00:00,2021-01-01,LARCENY THEFT,340,VEHICLE BURGLARY: Auto/Passenger Van Burglary,"200 E 3RD ST, SAN DIMAS, CA 91773",200 E 3RD ST,SAN DIMAS,91773.0,921-00007-0813,813,7,NO,CA0190008,SAN DIMAS,-117.802,34.109,1
2,19468023,2021-01-01 00:00:00,2021-01-01,VEHICLE / BOATING LAWS,250,"VEHICLE AND BOATING LAWS: Hit And Run, Misdeme...","1600 W AVENUE J-8, LANCASTER, CA",1600 W AVENUE J-8,LANCASTER,,921-00013-1126,1126,13,NO,CA0190024,LANCASTER,-118.159,34.682,2
3,19468250,2021-01-01 00:00:00,2021-01-01,VEHICLE / BOATING LAWS,250,"VEHICLE AND BOATING LAWS: Hit And Run, Misdeme...","FLOWER ST AND STATE ST, LYNWOOD, CA",FLOWER ST AND STATE ST,LYNWOOD,,921-00029-2116,2116,29,NO,CA01900V3,CENTURY,-118.218,33.926,2
4,19468888,2021-01-01 00:00:00,2021-01-02,NON-AGGRAVATED ASSAULTS,146,"ASSAULT, NON-AGGRAVATED: DOMESTIC VIOLENCE","1300 S ATLANTIC DR, COMPTON, CA 90221",1300 S ATLANTIC DR,COMPTON,90221.0,021-00053-2847,2847,53,NO,CA0190042,COMPTON,-118.192,33.884,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72248,19637974,2021-06-30 23:45:00,2021-07-01,LARCENY THEFT,384,"THEFT, PETTY: From Auto (Except Parts/Accessor...","1400 DESCANSO DR, LA CANADA FLINTRIDGE, CA 91011",1400 DESCANSO DR,LA CANADA FLINTRIDGE,91011.0,921-01103-1264,1264,1103,NO,CA0190012,CRESCENTA VALLEY,-118.209,34.203,1
72249,19637603,2021-06-30 23:50:00,2021-07-01,NARCOTICS,181,Felony Transport. &/or Sale of Controlled Subs...,"GARVEY AVE AND ROSEMEAD BLVD, ROSEMEAD, CA 91770",GARVEY AVE AND ROSEMEAD BLVD,ROSEMEAD,91770.0,921-07118-0534,534,7118,NO,CA0190005,TEMPLE,-118.073,34.062,2
72250,19647167,2021-06-30 23:59:30,2021-07-12,LARCENY THEFT,389,"THEFT, PETTY: Other (From Prvt Res, Boat, Plan...","2700 RAMBLA PACIFICO ST, MALIBU, CA 90265",2700 RAMBLA PACIFICO ST,MALIBU,90265.0,921-04036-1026,1026,4036,NO,CA0190022,MALIBU/LOST HILLS,-118.646,34.051,1
72251,19657469,2021-06-30 23:59:30,2021-07-24,FRAUD AND NSF CHECKS,112,FRAUD: Fraud By False Pretenses,"UNK UNKNOWN, LOS ANGELES, CA",UNK UNKNOWN,LOS ANGELES,,921-09981-0277,277,9981,NO,CA0190002,EAST LOS ANGELES,,,2


In [8]:
#Filter columns
f2021 = s2021[["INCIDENT_DATE", "CATEGORY", "STAT", "STAT_DESC", "ADDRESS", "STREET", "CITY", "ZIP", "INCIDENT_ID", "REPORTING_DISTRICT", "GANG_RELATED", "UNIT_ID", "UNIT_NAME", "LONGITUDE", "LATITUDE", "PART_CATEGORY"]]
f2021.reset_index(drop=True)

Unnamed: 0,INCIDENT_DATE,CATEGORY,STAT,STAT_DESC,ADDRESS,STREET,CITY,ZIP,INCIDENT_ID,REPORTING_DISTRICT,GANG_RELATED,UNIT_ID,UNIT_NAME,LONGITUDE,LATITUDE,PART_CATEGORY
0,2021-01-01 00:00:00,VANDALISM,263,VANDALISM FELONY,"11000 LOUISE AVE, LYNWOOD, CA 90262",11000 LOUISE AVE,LYNWOOD,90262.0,921-00003-2115,2115,NO,CA01900V3,CENTURY,-118.180,33.927,2
1,2021-01-01 00:00:00,LARCENY THEFT,340,VEHICLE BURGLARY: Auto/Passenger Van Burglary,"200 E 3RD ST, SAN DIMAS, CA 91773",200 E 3RD ST,SAN DIMAS,91773.0,921-00007-0813,813,NO,CA0190008,SAN DIMAS,-117.802,34.109,1
2,2021-01-01 00:00:00,VEHICLE / BOATING LAWS,250,"VEHICLE AND BOATING LAWS: Hit And Run, Misdeme...","1600 W AVENUE J-8, LANCASTER, CA",1600 W AVENUE J-8,LANCASTER,,921-00013-1126,1126,NO,CA0190024,LANCASTER,-118.159,34.682,2
3,2021-01-01 00:00:00,VEHICLE / BOATING LAWS,250,"VEHICLE AND BOATING LAWS: Hit And Run, Misdeme...","FLOWER ST AND STATE ST, LYNWOOD, CA",FLOWER ST AND STATE ST,LYNWOOD,,921-00029-2116,2116,NO,CA01900V3,CENTURY,-118.218,33.926,2
4,2021-01-01 00:00:00,NON-AGGRAVATED ASSAULTS,146,"ASSAULT, NON-AGGRAVATED: DOMESTIC VIOLENCE","1300 S ATLANTIC DR, COMPTON, CA 90221",1300 S ATLANTIC DR,COMPTON,90221.0,021-00053-2847,2847,NO,CA0190042,COMPTON,-118.192,33.884,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72248,2021-06-30 23:45:00,LARCENY THEFT,384,"THEFT, PETTY: From Auto (Except Parts/Accessor...","1400 DESCANSO DR, LA CANADA FLINTRIDGE, CA 91011",1400 DESCANSO DR,LA CANADA FLINTRIDGE,91011.0,921-01103-1264,1264,NO,CA0190012,CRESCENTA VALLEY,-118.209,34.203,1
72249,2021-06-30 23:50:00,NARCOTICS,181,Felony Transport. &/or Sale of Controlled Subs...,"GARVEY AVE AND ROSEMEAD BLVD, ROSEMEAD, CA 91770",GARVEY AVE AND ROSEMEAD BLVD,ROSEMEAD,91770.0,921-07118-0534,534,NO,CA0190005,TEMPLE,-118.073,34.062,2
72250,2021-06-30 23:59:30,LARCENY THEFT,389,"THEFT, PETTY: Other (From Prvt Res, Boat, Plan...","2700 RAMBLA PACIFICO ST, MALIBU, CA 90265",2700 RAMBLA PACIFICO ST,MALIBU,90265.0,921-04036-1026,1026,NO,CA0190022,MALIBU/LOST HILLS,-118.646,34.051,1
72251,2021-06-30 23:59:30,FRAUD AND NSF CHECKS,112,FRAUD: Fraud By False Pretenses,"UNK UNKNOWN, LOS ANGELES, CA",UNK UNKNOWN,LOS ANGELES,,921-09981-0277,277,NO,CA0190002,EAST LOS ANGELES,,,2


In [9]:
#Change data types
f2021 = pd.DataFrame(f2021)
f2021[['CATEGORY', 'STAT_DESC', 'ADDRESS', 'STREET', 'CITY', 'INCIDENT_ID', 'GANG_RELATED', 'UNIT_ID', 'UNIT_NAME']] = f2021[['CATEGORY', 'STAT_DESC', 'ADDRESS', 'STREET', 'CITY', 'INCIDENT_ID', 'GANG_RELATED', 'UNIT_ID', 'UNIT_NAME']].astype('string')
f2021['ZIP'] = f2021['ZIP'].astype('Int64')

### Data Checks

In [10]:
#Data types
f2021.dtypes

INCIDENT_DATE         datetime64[ns]
CATEGORY                      string
STAT                           int64
STAT_DESC                     string
ADDRESS                       string
STREET                        string
CITY                          string
ZIP                            Int64
INCIDENT_ID                   string
REPORTING_DISTRICT             int64
GANG_RELATED                  string
UNIT_ID                       string
UNIT_NAME                     string
LONGITUDE                    float64
LATITUDE                     float64
PART_CATEGORY                  int64
dtype: object

In [11]:
#Rows and columns
f2021.shape

(72253, 16)

In [12]:
#Duplicates
f2021.duplicated()

41944     False
82300     False
67405     False
14700     False
48755     False
          ...  
110341    False
139860    False
115152    False
122337    False
70235     False
Length: 72253, dtype: bool

In [13]:
#Duplicates (Count)
f2021.duplicated().sum()

0

In [14]:
#Count nulls for each column
f2021.isnull().sum()

INCIDENT_DATE             0
CATEGORY                  0
STAT                      0
STAT_DESC                 0
ADDRESS                1243
STREET                 1723
CITY                   1243
ZIP                   34375
INCIDENT_ID               0
REPORTING_DISTRICT        0
GANG_RELATED              0
UNIT_ID                   0
UNIT_NAME                 0
LONGITUDE              4345
LATITUDE               4345
PART_CATEGORY             0
dtype: int64

In [15]:
#Unique values
f2021.nunique()

INCIDENT_DATE         49721
CATEGORY                 30
STAT                    226
STAT_DESC               226
ADDRESS               42566
STREET                34957
CITY                    260
ZIP                     290
INCIDENT_ID           72253
REPORTING_DISTRICT     1109
GANG_RELATED              2
UNIT_ID                  52
UNIT_NAME                51
LONGITUDE              1155
LATITUDE               1023
PART_CATEGORY             2
dtype: int64

In [19]:
f2021.dtypes

INCIDENT_DATE         datetime64[ns]
CATEGORY                      string
STAT                           int64
STAT_DESC                     string
ADDRESS                       string
STREET                        string
CITY                          string
ZIP                            Int64
INCIDENT_ID                   string
REPORTING_DISTRICT             int64
GANG_RELATED                  string
UNIT_ID                       string
UNIT_NAME                     string
LONGITUDE                    float64
LATITUDE                     float64
PART_CATEGORY                  int64
dtype: object

### Export

In [16]:
#Export to csv
f2021.to_csv('C:/Users/hanna/Documents/CSV/CRIMESDATA/LA_2021.csv')

### 2021 July - December

In [17]:
#Filter by Q3/Q4 2021
start_date = '2021-06-30 23:59:59'
end_date = '2021-12-31 23:59:59'

mask = (raw2021['INCIDENT_DATE'] > start_date) & (raw2021['INCIDENT_DATE'] <= end_date)
e2021 = raw2021.loc[mask]
e2021.reset_index(drop=True)
e2021.shape

(69547, 19)

### 2021 - Incidents that did not occur in 2021 but were reported in 2021

In [18]:
mask = (raw2021['INCIDENT_DATE'].dt.year != 2021)
nr2021 = raw2021.loc[mask]
nr2021.reset_index(drop=True)
nr2021.shape

(0, 19)