In [1]:
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objects as go
import folium

In [2]:
df = pd.read_csv('NYPD_Complaint_Data_Historic.csv', low_memory=False)

# Historical trend of # of complaints

In [4]:
df['CMPLNT_FR_DT'] = df['CMPLNT_FR_DT'].dropna()
df['CMPLNT_FR_YEAR'] = df['CMPLNT_FR_DT'].apply(lambda x: str(x)[6:])
df['CMPLNT_FR_MONTH'] = df['CMPLNT_FR_DT'].apply(lambda x: str(x)[:2])

In [19]:
df_line = df[df['CMPLNT_FR_YEAR'] >= '2006']
df_line = df_line.groupby(['CMPLNT_FR_YEAR', 'CMPLNT_FR_MONTH']).count().reset_index()
df_line['time'] = df_line[['CMPLNT_FR_YEAR', 'CMPLNT_FR_MONTH']].agg('-'.join, axis=1)
df_line['MA12'] = df_line['CMPLNT_NUM'].rolling(12).mean()

In [20]:
# Create traces
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_line['time'], y=df_line['CMPLNT_NUM'],
                    mode='lines+markers',
                    name='Complaints'))

# yearly moving average plot on same graph
fig.add_trace(go.Scatter(x=df_line['time'], y=df_line['MA12'],
                    mode='lines',
                    name='1Y Moving Average'))

# fig.add_annotation(x='2014-07', y=45042,
#                    text="Chokehold ban",
#                    showarrow=True,
#                    arrowhead=1,
#                    yshift=10)

fig.show()
# seasonality of crime
# trend -- going down

# Location analysis of complaints

In [14]:
m = folium.Map(location=[40.7128, -74.0060], tiles='OpenStreetMap')
    
def fill_map(data, year, crime):
    '''
    Filter dataframe. Groupby precinct, calculate number of crimes, and find centroid of coordinates
    '''
    df_map = data[data['ADDR_PCT_CD'] > 0]
    df_map = df_map[df_map['CMPLNT_FR_YEAR'] == year]
    df_map = df_map[df_map['LAW_CAT_CD'] == crime]
    
    df_map = df_map[['CMPLNT_NUM', 'ADDR_PCT_CD', 'Latitude', 'Longitude']].groupby('ADDR_PCT_CD').agg({
        'CMPLNT_NUM': 'size',
        'Latitude': 'mean',
        'Longitude': 'mean'
    })
    
    df_map['Radius'] = df_map['CMPLNT_NUM']/df_map['CMPLNT_NUM'].max() * 10
        
    for each in df_map.iterrows():
        folium.CircleMarker(location=[each[1].Latitude, each[1].Longitude],
                            radius=each[1].Radius, 
                            weight=each[1].Radius,
                            popup=each[1].CMPLNT_NUM).add_to(m)
    
    #Set the zoom to the maximum possible
    m.fit_bounds(m.get_bounds())

    # Save the map to an HTML file
    m.save('simple_dot_plot.html')

    # Display map
    return m

In [16]:
fill_map(df, '2019', 'FELONY')