In [77]:
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go
from folium import plugins
from folium.plugins import HeatMap

In [2]:
# Aim 1: Conduct EDA to identify trends
# Aim 2: Investigate into crimes which require special attention from responding officers, i.e. high risk crimes 
# Aim 3: Identify actionable intelligence with regards to law enforcement and high risk crimes

# Feature Engineering

In [78]:
crime = pd.read_csv('Crime_Data_from_2020_to_Present.csv')
pd.set_option('display.max_columns', None)
crime.columns= crime.columns.str.lower()

In [79]:
crime.isnull().sum()

dr_no                  0
date rptd              0
date occ               0
time occ               0
area                   0
area name              0
rpt dist no            0
part 1-2               0
crm cd                 0
crm cd desc            0
mocodes           114856
vict age               0
vict sex          109299
vict descent      109307
premis cd             10
premis desc          492
weapon used cd    540459
weapon desc       540459
status                 0
status desc            0
crm cd 1              10
crm cd 2          768750
crm cd 3          827720
crm cd 4          829717
location               0
cross street      697270
lat                    0
lon                    0
dtype: int64

In [80]:
#converting date occurred to datetime format
crime['date occ'] = crime['date occ'].str.replace('12:00:00 AM', '')
crime['date occ']= pd.to_datetime(crime['date occ'])

In [81]:
#mapping out dates to day of the week, which will be useful for analysis later on
crime['day_of_week'] = crime['date occ'].dt.dayofweek.map({0:'mon',1:'tues',2:'weds',3:'thurs',4:'fri',5:'sat',6:'sun'})

In [82]:
#changing the date occured into 3 separate columns(year occured, month occured, day occured) for future analysis
crime['date occ'] = crime['date occ'].astype('string')
crime[['year_occ','month_occ','day_occ']] = crime['date occ'].str.split('-', expand = True)
crime.drop(columns = ['date occ'], inplace = True)

In [83]:
#formatting the time to allow for analysis later on, as 0001 gets shortened to 1, which is then taken as 1am instead of 12.01 am
crime['time occ']=crime['time occ'].astype('string')
crime['time occ']=crime['time occ'].str.zfill(4)
crime['hour_occ']=crime['time occ'].str[0:2]

In [85]:
#converting values back to int 
crime[['year_occ','month_occ','day_occ','hour_occ']] = crime[['year_occ','month_occ','day_occ','hour_occ']].astype('int')

In [47]:
crime

Unnamed: 0,dr_no,date rptd,time occ,area,area name,rpt dist no,part 1-2,crm cd,crm cd desc,mocodes,vict age,vict sex,vict descent,premis cd,premis desc,weapon used cd,weapon desc,status,status desc,crm cd 1,crm cd 2,crm cd 3,crm cd 4,location,cross street,lat,lon,day_of_week,year_occ,month_occ,day_occ,hour_occ
0,10304468,01/08/2020 12:00:00 AM,2230,3,Southwest,377,2,624,BATTERY - SIMPLE ASSAULT,0444 0913,36,F,B,501.0,SINGLE FAMILY DWELLING,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",AO,Adult Other,624.0,,,,1100 W 39TH PL,,34.0141,-118.2978,weds,2020,1,8,22
1,190101086,01/02/2020 12:00:00 AM,0330,1,Central,163,2,624,BATTERY - SIMPLE ASSAULT,0416 1822 1414,25,M,H,102.0,SIDEWALK,500.0,UNKNOWN WEAPON/OTHER WEAPON,IC,Invest Cont,624.0,,,,700 S HILL ST,,34.0459,-118.2545,weds,2020,1,1,3
2,200110444,04/14/2020 12:00:00 AM,1200,1,Central,155,2,845,SEX OFFENDER REGISTRANT OUT OF COMPLIANCE,1501,0,X,X,726.0,POLICE FACILITY,,,AA,Adult Arrest,845.0,,,,200 E 6TH ST,,34.0448,-118.2474,thurs,2020,2,13,12
3,191501505,01/01/2020 12:00:00 AM,1730,15,N Hollywood,1543,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),0329 1402,76,F,W,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",,,IC,Invest Cont,745.0,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,weds,2020,1,1,17
4,191921269,01/01/2020 12:00:00 AM,0415,19,Mission,1998,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",0329,31,X,X,409.0,BEAUTY SUPPLY STORE,,,IC,Invest Cont,740.0,,,,14400 TITUS ST,,34.2198,-118.4468,weds,2020,1,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
829773,231604807,01/27/2023 12:00:00 AM,1800,16,Foothill,1663,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",1300 0329,23,M,H,122.0,"VEHICLE, PASSENGER/TRUCK",,,IC,Invest Cont,740.0,,,,12500 BRANFORD ST,,34.2466,-118.4054,thurs,2023,1,26,18
829774,231606525,03/22/2023 12:00:00 AM,1000,16,Foothill,1602,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0416 0411 1822,25,F,H,102.0,SIDEWALK,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,230.0,,,,12800 FILMORE ST,,34.2790,-118.4116,weds,2023,3,22,10
829775,231210064,04/12/2023 12:00:00 AM,1630,12,77th Street,1239,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0601 0445 0416 0359,29,M,B,222.0,LAUNDROMAT,500.0,UNKNOWN WEAPON/OTHER WEAPON,IC,Invest Cont,230.0,,,,6100 S VERMONT AV,,33.9841,-118.2915,weds,2023,4,12,16
829776,230115220,07/02/2023 12:00:00 AM,0001,1,Central,154,1,352,PICKPOCKET,1822 0344,24,F,H,735.0,NIGHT CLUB (OPEN EVENINGS ONLY),,,IC,Invest Cont,352.0,,,,500 S MAIN ST,,34.0467,-118.2485,sat,2023,7,1,0


# EDA

In [86]:
#to get an idea of what the top crimes in LA are
top_crime = crime.groupby(['crm cd','crm cd desc'], as_index = False).agg({'year_occ':'count'}).sort_values(by='year_occ',ascending = False)
top_crime=top_crime.rename(columns = ({'year_occ':'count'}))
top_crime.head(50)

Unnamed: 0,crm cd,crm cd desc,count
56,510,VEHICLE - STOLEN,88892
61,624,BATTERY - SIMPLE ASSAULT,66149
26,354,THEFT OF IDENTITY,52321
15,330,BURGLARY FROM VEHICLE,50941
79,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",50556
13,310,BURGLARY,50535
6,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",47711
38,440,THEFT PLAIN - PETTY ($950 & UNDER),42158
63,626,INTIMATE PARTNER - SIMPLE ASSAULT,41776
28,420,THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER),32026


In [87]:
fig = px.bar(top_crime.head(10), x='crm cd desc', y='count', title='Top crimes (2020-2023)')
fig.show()

In [88]:
vehicle_theft = crime.loc[crime['crm cd'] == 510]
vehicle_theft = vehicle_theft.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
vehicle_theft = vehicle_theft.rename(columns = {"year_occ":"count"})
fig = px.line(vehicle_theft, x="hour_occ", y="count", color='day_of_week',title='Vehicle theft by day,hour and count')
fig.show()

In [16]:
burglary_from_vehicle = crime.loc[crime['crm cd'] == 330]
burglary_from_vehicle = burglary_from_vehicle.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
burglary_from_vehicle = burglary_from_vehicle.rename(columns = {"year_occ":"count"})
fig = px.line(burglary_from_vehicle, x="hour_occ", y="count", color='day_of_week',title='Theft from vehicle by day,hour and count')
fig.show()

In [17]:
aggravated_assault = crime.loc[crime['crm cd'] == 230]
aggravated_assault = aggravated_assault.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
aggravated_assault = aggravated_assault.rename(columns = {"year_occ":"count"})
fig = px.line(aggravated_assault, x="hour_occ", y="count", color='day_of_week',title='Aggravated assault by day,hour and count')
fig.show()

In [18]:
shoplifting = crime.loc[crime['crm cd'] == 442]
shoplifting = shoplifting.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
shoplifting = shoplifting.rename(columns = {"year_occ":"count"})
fig = px.line(shoplifting, x="hour_occ", y="count", color='day_of_week',title='Shoplifting by day,hour and count')
fig.show()

In [19]:
battery = crime.loc[crime['crm cd'] == 624]
battery = battery.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
battery = battery.rename(columns = {"year_occ":"count"})
fig = px.line(battery, x="hour_occ", y="count", color='day_of_week',title='Battery (simple assault) by day,hour and count')
fig.show()

In [20]:
robbery = crime.loc[crime['crm cd'] == 210]
robbery = robbery.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
robbery = robbery.rename(columns = {"year_occ":"count"})
fig = px.line(robbery, x="hour_occ", y="count", color='day_of_week',title='Robbery by day,hour and count')
fig.show()

In [21]:
tresspassing = crime.loc[crime['crm cd'] == 888]
tresspassing = tresspassing.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
tresspassing = tresspassing.rename(columns = {"year_occ":"count"})
fig = px.line(tresspassing, x="hour_occ", y="count", color='day_of_week',title='Tresspassing by day,hour and count')
fig.show()

In [22]:
partner_violence = crime.loc[crime['crm cd'] == 236]
partner_violence = partner_violence.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
partner_violence = partner_violence.rename(columns = {"year_occ":"count"})
fig = px.line(partner_violence, x="hour_occ", y="count", color='day_of_week',title='Intimate partner violence by day,hour and count')
fig.show()

In [23]:
armed_assault = crime.loc[crime['crm cd'] == 230]
armed_assault = armed_assault.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
armed_assault = armed_assault.rename(columns = {"year_occ":"count"})
fig = px.line(armed_assault, x="hour_occ", y="count", color='day_of_week',title='Armed assault by day,hour and count')
fig.show()

In [24]:
firearms_discharge = crime.loc[crime['crm cd'] == 753]
firearms_discharge = firearms_discharge.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
firearms_discharge = firearms_discharge.rename(columns = {"year_occ":"count"})
fig = px.line(firearms_discharge, x="hour_occ", y="count", color='day_of_week',title='Firearms discharge by day,hour and count')
fig.show()

In [25]:
Homicide = crime.loc[crime['crm cd'] == 110]
Homicide = Homicide.groupby(['day_of_week','hour_occ'], as_index = False).agg({'year_occ':'count'})
Homicide = Homicide.rename(columns = {"year_occ":"count"})
fig = px.line(Homicide, x="hour_occ", y="count", color='day_of_week',title='Homicide by day,hour and count')
fig.show()

# Examining crimes involving weapons

In [89]:
#creating a dataframe for all crimes involving weapons, which we will classify as high risk crimes (HRCs)
high_risk_crimes = crime.loc[(crime['crm cd']==230)|(crime['crm cd']==761)|(crime['crm cd']==251)|(crime['crm cd']==110)|(crime['crm cd']==753)]
high_risk_crimes

Unnamed: 0,dr_no,date rptd,time occ,area,area name,rpt dist no,part 1-2,crm cd,crm cd desc,mocodes,vict age,vict sex,vict descent,premis cd,premis desc,weapon used cd,weapon desc,status,status desc,crm cd 1,crm cd 2,crm cd 3,crm cd 4,location,cross street,lat,lon,day_of_week,year_occ,month_occ,day_occ,hour_occ
24,200100546,01/15/2020 12:00:00 AM,0700,1,Central,166,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0416 0913 2004 1218,62,M,A,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",500.0,UNKNOWN WEAPON/OTHER WEAPON,AO,Adult Other,230.0,,,,600 SAN JULIAN ST,,34.0428,-118.2461,weds,2020,1,15,7
27,200100552,01/19/2020 12:00:00 AM,2000,1,Central,111,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",2004 0305 0411 1202 1414 1402 1822 0906,71,M,W,148.0,PUBLIC RESTROOM/OUTSIDE*,500.0,UNKNOWN WEAPON/OTHER WEAPON,AA,Adult Arrest,230.0,,,,ALAMEDA,LOS ANGELES,34.0578,-118.2371,sun,2020,1,19,20
42,200100578,01/31/2020 12:00:00 AM,2040,1,Central,163,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0947 0913 0416 0447 1402 0319 0443 1420 2004,52,M,H,401.0,MINI-MART,302.0,BLUNT INSTRUMENT,AA,Adult Arrest,230.0,,,,7TH ST,BROADWAY,34.0456,-118.2540,thurs,2020,1,30,20
43,200100583,02/04/2020 12:00:00 AM,0920,1,Central,146,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0416 2004 1822 1402,38,F,H,203.0,OTHER BUSINESS,212.0,BOTTLE,AA,Adult Arrest,230.0,998.0,,,200 WINSTON ST,,34.0467,-118.2470,tues,2020,2,4,9
47,200218458,12/11/2020 12:00:00 AM,0420,2,Rampart,233,1,761,BRANDISH WEAPON,1822 0334 0319 0416 0400 1402 1309 0216,34,M,W,108.0,PARKING LOT,109.0,SEMI-AUTOMATIC PISTOL,AO,Adult Other,761.0,998.0,,,2500 BEVERLY BL,,34.0708,-118.2798,fri,2020,12,11,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
829743,230615744,09/25/2023 12:00:00 AM,1740,6,Hollywood,625,1,761,BRANDISH WEAPON,0319 0334 1822 1501,31,M,W,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",219.0,SCREWDRIVER,IC,Invest Cont,761.0,,,,2000 N LAS PALMAS AV,,34.1131,-118.3410,mon,2023,9,25,17
829754,230512110,08/09/2023 12:00:00 AM,1310,5,Harbor,516,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0913 0342 0416 0334,59,M,H,102.0,SIDEWALK,200.0,KNIFE WITH BLADE 6INCHES OR LESS,IC,Invest Cont,230.0,,,,1200 N AVALON BL,,33.7868,-118.2658,weds,2023,8,9,13
829760,231514016,08/27/2023 12:00:00 AM,0030,15,N Hollywood,1548,1,761,BRANDISH WEAPON,0334,38,M,H,101.0,STREET,200.0,KNIFE WITH BLADE 6INCHES OR LESS,AO,Adult Other,761.0,,,,11000 MCCORMICK ST,,34.1660,-118.3705,sun,2023,8,27,0
829774,231606525,03/22/2023 12:00:00 AM,1000,16,Foothill,1602,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0416 0411 1822,25,F,H,102.0,SIDEWALK,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,230.0,,,,12800 FILMORE ST,,34.2790,-118.4116,weds,2023,3,22,10


In [90]:
#getting the crime code, description and count for each of the HRCs
high_risk_crimes_definition = high_risk_crimes.groupby(['crm cd','crm cd desc'], as_index = False).agg({'year_occ':'count'}).sort_values(by='year_occ',ascending = False)
high_risk_crimes_definition=high_risk_crimes_definition.rename(columns = ({'year_occ':'count'}))
high_risk_crimes_definition=high_risk_crimes_definition.reset_index(drop=True)
high_risk_crimes_definition

Unnamed: 0,crm cd,crm cd desc,count
0,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",47711
1,761,BRANDISH WEAPON,12934
2,753,DISCHARGE FIREARMS/SHOTS FIRED,2410
3,251,SHOTS FIRED AT INHABITED DWELLING,1506
4,110,CRIMINAL HOMICIDE,1415


In [91]:
#HRCs by day, hour and count
high_risk_crimes_line = high_risk_crimes.groupby(['day_of_week','hour_occ'], as_index = False).agg({'lat':'count'})
high_risk_crimes_line = high_risk_crimes_line.rename(columns = {"lat":"count"})
fig = px.line(high_risk_crimes_line, x="hour_occ", y="count", color='day_of_week',title='High Risk Crimes')
fig.show()

In [92]:
#creating a dataframe for coordinates of HRCs to plot the heatmap
high_risk_crimes_coordinates = high_risk_crimes.groupby(['location','lat','lon'], as_index=False).agg({'year_occ' : 'count'}).sort_values(by='year_occ',ascending = False)
high_risk_crimes_coordinates = high_risk_crimes_coordinates.rename(columns = {"year_occ":"count"})
high_risk_crimes_coordinates = high_risk_crimes_coordinates[['lat','lon','count']]
high_risk_crimes_coordinates

Unnamed: 0,lat,lon,count
17504,34.0595,-118.2749,93
21043,34.0561,-118.2375,83
9873,34.0601,-118.2761,64
15587,34.0437,-118.2455,58
17589,34.0421,-118.2469,50
...,...,...,...
14272,33.9952,-118.3309,1
14271,34.0060,-118.2513,1
14270,34.0057,-118.2474,1
14268,34.1009,-118.2868,1


In [93]:
#plotting the heatmap for HRCs
hm = folium.Map(location=[34.052235, -118.243683],
               #tiles="CartoDB Positron",
               zoom_start=14)
HeatMap(high_risk_crimes_coordinates,  
        min_opacity=0.01,
        blur = 20
               ).add_to(folium.FeatureGroup(name='Heat Map').add_to(hm))
folium.LayerControl().add_to(hm)

folium.CircleMarker([34.0595, -118.2749],
                    radius=30,
                    color='red',
                    ).add_to(hm)
folium.CircleMarker([34.0561, -118.2375],
                    radius=30,
                    color='red',
                    ).add_to(hm)
folium.CircleMarker([34.0601, -118.2761],
                    radius=30,
                    color='red',
                    ).add_to(hm)
folium.CircleMarker([34.0437, -118.2455],
                    radius=30,
                    color='green',
                    ).add_to(hm)
folium.CircleMarker([34.0421, -118.2469],
                    radius=30,
                    color='green',
                    ).add_to(hm)
folium.CircleMarker([34.0460, -118.2493],
                    radius=30,
                    color='green',
                    ).add_to(hm)

folium.Marker([34.05671, -118.26699], popup='LAPD Rampart Station').add_to(hm)
folium.Marker([34.052, -118.244], popup='LAPD Headquarters').add_to(hm)
folium.Marker([34.04402,  -118.2473], popup='Central Community Police Station').add_to(hm)


hm

In [31]:
import chart_studio
import chart_studio.plotly as py
chart_studio.tools.set_credentials_file(username='***********', api_key='****************')

In [94]:
#creating dataframe of HRCs by day, hour and count for plot sheet analysis, while converting hour_occ into degrees for plot sheet
high_risk_crimes_plot = high_risk_crimes.groupby(['day_of_week','hour_occ'], as_index=False).agg({'year_occ' : 'count'}).sort_values(by='year_occ',ascending = False)
high_risk_crimes_plot = high_risk_crimes_plot.rename(columns = {'year_occ':'count'})
high_risk_crimes_plot['day_of_week'] = pd.Categorical(high_risk_crimes_plot['day_of_week'], ordered=True, categories=['mon', 'tues', 'weds', 'thurs','fri','sat','sun'])
high_risk_crimes_plot = high_risk_crimes_plot.sort_values(by=['day_of_week','hour_occ','count'])
high_risk_crimes_plot['hour_occ'] = high_risk_crimes_plot['hour_occ'].map({0:0,1:15,2:30,3:45,4:60,5:75,6:90,7:105,8:120,9:135,10:150,11:165,12:180,13:195,14:210,15:225,16:240,17:255,18:270,19:285,20:300,21:315,22:330,23:345})
high_risk_crimes_plot

Unnamed: 0,day_of_week,hour_occ,count
24,mon,0,410
25,mon,15,325
26,mon,30,258
27,mon,45,210
28,mon,60,174
...,...,...,...
91,sun,285,582
92,sun,300,587
93,sun,315,595
94,sun,330,597


In [33]:
#creating the plot sheet for HRCs, edited using custom template in plotly
fig = px.scatter_polar(high_risk_crimes_plot, r='day_of_week', theta='hour_occ',
                       size="count",
                       color_discrete_sequence=px.colors.sequential.Plasma_r)

fig.update_layout(
    polar = dict(
      angularaxis = dict(
            tickvals=[0,15,30,45,60,75,90,105,120,135,150,165,180,195,210,225,240,255,270,285,300,315,330,345],
            ticktext=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
            )
    ))

py.iplot(fig, filename="Plot Sheet (HRC)")

# HRC downtown

In [95]:
#creating dataframe for HRCs committed in downtown area
high_risk_crime_downtown = high_risk_crimes[(high_risk_crimes['lat']>=34.0279) & (high_risk_crimes['lat']<=34.076) &(high_risk_crimes['lon']>= -118.2693) &(high_risk_crimes['lon']<= -118.2143) ]
high_risk_crime_downtown

Unnamed: 0,dr_no,date rptd,time occ,area,area name,rpt dist no,part 1-2,crm cd,crm cd desc,mocodes,vict age,vict sex,vict descent,premis cd,premis desc,weapon used cd,weapon desc,status,status desc,crm cd 1,crm cd 2,crm cd 3,crm cd 4,location,cross street,lat,lon,day_of_week,year_occ,month_occ,day_occ,hour_occ
24,200100546,01/15/2020 12:00:00 AM,0700,1,Central,166,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0416 0913 2004 1218,62,M,A,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",500.0,UNKNOWN WEAPON/OTHER WEAPON,AO,Adult Other,230.0,,,,600 SAN JULIAN ST,,34.0428,-118.2461,weds,2020,1,15,7
27,200100552,01/19/2020 12:00:00 AM,2000,1,Central,111,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",2004 0305 0411 1202 1414 1402 1822 0906,71,M,W,148.0,PUBLIC RESTROOM/OUTSIDE*,500.0,UNKNOWN WEAPON/OTHER WEAPON,AA,Adult Arrest,230.0,,,,ALAMEDA,LOS ANGELES,34.0578,-118.2371,sun,2020,1,19,20
42,200100578,01/31/2020 12:00:00 AM,2040,1,Central,163,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0947 0913 0416 0447 1402 0319 0443 1420 2004,52,M,H,401.0,MINI-MART,302.0,BLUNT INSTRUMENT,AA,Adult Arrest,230.0,,,,7TH ST,BROADWAY,34.0456,-118.2540,thurs,2020,1,30,20
43,200100583,02/04/2020 12:00:00 AM,0920,1,Central,146,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0416 2004 1822 1402,38,F,H,203.0,OTHER BUSINESS,212.0,BOTTLE,AA,Adult Arrest,230.0,998.0,,,200 WINSTON ST,,34.0467,-118.2470,tues,2020,2,4,9
61,200104038,01/01/2020 12:00:00 AM,1200,1,Central,157,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0337 0340 0416 0445 1822 1218 2021 1019,57,M,B,102.0,SIDEWALK,500.0,UNKNOWN WEAPON/OTHER WEAPON,IC,Invest Cont,230.0,,,,7TH,SAN PEDRO,34.0401,-118.2459,weds,2020,1,1,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
828985,230100849,09/22/2023 12:00:00 AM,1815,1,Central,156,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",1822 1218 2004 1402 0400 0411,39,F,B,102.0,SIDEWALK,216.0,SCISSORS,AO,Adult Other,230.0,998.0,,,6TH,SAN PEDRO,34.0423,-118.2453,fri,2023,9,22,18
829107,230110355,04/11/2023 12:00:00 AM,1000,1,Central,156,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",1218 1822 2004 0400 0411,42,M,B,102.0,SIDEWALK,207.0,OTHER KNIFE,IC,Invest Cont,230.0,,,,400 E 6TH ST,,34.0437,-118.2455,tues,2023,4,11,10
829281,230412790,09/25/2023 12:00:00 AM,1830,4,Hollenbeck,477,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",0400 1218 0446,48,M,H,101.0,STREET,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,230.0,,,,2400 WHITTIER BL,,34.0344,-118.2157,mon,2023,9,25,18
829475,230112394,05/15/2023 12:00:00 AM,1250,1,Central,166,1,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",1218 2004 0603 0913 1402 0431,64,M,B,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",512.0,MACE/PEPPER SPRAY,IC,Invest Cont,230.0,998.0,,,600 WALL ST,,34.0435,-118.2471,mon,2023,5,15,12


In [96]:
#sorting out HRCs committed in downtown area by day, hour, and count for plotting, while converting hour_occ into degrees
high_risk_crime_downtown_plot = high_risk_crime_downtown.groupby(['day_of_week','hour_occ'], as_index=False).agg({'year_occ' : 'count'}).sort_values(by='year_occ',ascending = False)
high_risk_crime_downtown_plot = high_risk_crime_downtown_plot.rename(columns = {'year_occ':'count'})
high_risk_crime_downtown_plot['day_of_week'] = pd.Categorical(high_risk_crime_downtown_plot['day_of_week'], ordered=True, categories=['mon', 'tues', 'weds', 'thurs','fri','sat','sun'])
high_risk_crime_downtown_plot = high_risk_crime_downtown_plot.sort_values(by=['day_of_week','hour_occ','count'])
high_risk_crime_downtown_plot['hour_occ'] = high_risk_crime_downtown_plot['hour_occ'].map({0:0,1:15,2:30,3:45,4:60,5:75,6:90,7:105,8:120,9:135,10:150,11:165,12:180,13:195,14:210,15:225,16:240,17:255,18:270,19:285,20:300,21:315,22:330,23:345})
high_risk_crime_downtown_plot

Unnamed: 0,day_of_week,hour_occ,count
24,mon,0,53
25,mon,15,44
26,mon,30,37
27,mon,45,23
28,mon,60,26
...,...,...,...
91,sun,285,62
92,sun,300,52
93,sun,315,66
94,sun,330,58


In [49]:
# plotting the plot sheet for HRCs committed in downtown area, edited using custom template in plotly
fig = px.scatter_polar(high_risk_crime_downtown_plot, r='day_of_week', theta='hour_occ',
                       size="count",
                       color_discrete_sequence=px.colors.sequential.Plasma_r)

fig.update_layout(
    polar = dict(
      angularaxis = dict(
            tickvals=[0,15,30,45,60,75,90,105,120,135,150,165,180,195,210,225,240,255,270,285,300,315,330,345],
            ticktext=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
            )
    ))

py.iplot(fig, filename="Plot Sheet (HRC - Downtown)")

In [100]:
#breakdown of HRCs committed in each year by month
high_risk_crime_downtown_2020 = high_risk_crime_downtown.groupby(['year_occ','month_occ'], as_index = False).agg({'lat':'count'})
high_risk_crime_downtown_2020 = high_risk_crime_downtown_2020.rename(columns = {"lat":"count"})
high_risk_crime_downtown_2020 = high_risk_crime_downtown_2020.loc[high_risk_crime_downtown_2020['year_occ']== 2020].sort_values(by = 'month_occ')
fig = px.bar(high_risk_crime_downtown_2020, x='month_occ', y='count', title='Distribution of high risk crimes by month (2020)')
fig.update_layout(yaxis_range=[0,200])
fig.show()

In [101]:
high_risk_crime_downtown_2021 = high_risk_crime_downtown.groupby(['year_occ','month_occ'], as_index = False).agg({'lat':'count'})
high_risk_crime_downtown_2021 = high_risk_crime_downtown_2021.rename(columns = {"lat":"count"})
high_risk_crime_downtown_2021 = high_risk_crime_downtown_2021.loc[high_risk_crime_downtown_2021['year_occ']== 2021].sort_values(by = 'month_occ')
fig = px.bar(high_risk_crime_downtown_2021, x='month_occ', y='count', title='Distribution of high risk crimes by month (2021)')
fig.update_layout(yaxis_range=[0,200])
fig.show()

In [102]:
high_risk_crime_downtown_2022 = high_risk_crime_downtown.groupby(['year_occ','month_occ'], as_index = False).agg({'lat':'count'})
high_risk_crime_downtown_2022 = high_risk_crime_downtown_2022.rename(columns = {"lat":"count"})
high_risk_crime_downtown_2022 = high_risk_crime_downtown_2022.loc[high_risk_crime_downtown_2022['year_occ']== 2022].sort_values(by = 'month_occ')
fig = px.bar(high_risk_crime_downtown_2022, x='month_occ', y='count', title='Distribution of high risk crimes by month (2022)')
fig.update_layout(yaxis_range=[0,200])
fig.show()

In [103]:
high_risk_crime_downtown_2023 = high_risk_crime_downtown.groupby(['year_occ','month_occ'], as_index = False).agg({'lat':'count'})
high_risk_crime_downtown_2023 = high_risk_crime_downtown_2023.rename(columns = {"lat":"count"})
high_risk_crime_downtown_2023 = high_risk_crime_downtown_2023.loc[high_risk_crime_downtown_2023['year_occ']== 2023].sort_values(by = 'month_occ')
fig = px.bar(high_risk_crime_downtown_2023, x='month_occ', y='count', title='Distribution of high risk crimes by month (2023)')
fig.update_layout(yaxis_range=[0,200])
fig.show()