In [15]:
%matplotlib widget
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
plt.ion()

# Prune data
data = pd.read_csv("NYPD_arrests.csv", parse_dates=["ARREST_DATE"])
data = data[["ARREST_DATE", "OFNS_DESC"]]
data = data.dropna(subset=['OFNS_DESC'])
data["YEAR"] = pd.DatetimeIndex(data['ARREST_DATE']).year
data["MONTH"] = pd.DatetimeIndex(data['ARREST_DATE']).month


# Filter data by offense
def get_offence_data(data, offence):
    return data.loc[data['OFNS_DESC'] == offence]

In [16]:
# Get different offense data
data_drugs = get_offence_data(data, 'DANGEROUS DRUGS')
data_robbery = get_offence_data(data, 'ROBBERY')
data_assault = get_offence_data(data, 'ASSAULT 3 & RELATED OFFENSES')

In [17]:
# Show trend for drugs, robbery and assault since 2006
fig, ax = plt.subplots()
fig.set_size_inches(10, 5)
for category in [data_drugs, data_robbery, data_assault]:    
    ax.plot(category.groupby(pd.Grouper(key='ARREST_DATE', freq='M')).size())

ax.legend(["Dangerous Drugs", "Robbery", "Assault"])
ax.set(xlabel='Date', ylabel='Number of arrests', title='Arrests per month since 2006 (Trend)')
ax.grid()

fig.savefig("Trend.png")
plt.show()

In [18]:
def get_normalized_arrests(crime_type):
    normalized_arrests = [0 for i in range(12)]
    total_arrests_per_year = crime_type.groupby(["YEAR"]).size()
    arrests_per_month = crime_type.groupby(["YEAR", "MONTH"]).size()

    # For each year in dataset
    for year in range(2006, 2021):
        for index, arrests in enumerate(arrests_per_month[year]):
            normalized_arrests[index] += arrests / total_arrests_per_year[year]
    
    normalized_ = []
    for month in normalized_arrests:
        temp.append(month*len(normalized_arrests)/sum(normalized_arrests))
    return temp

In [23]:
normalized_drugs = get_normalized_arrests(data_drugs)
normalized_assault = get_normalized_arrests(data_assault)
normalized_robbery = get_normalized_arrests(data_robbery)

fig, ax = plt.subplots()
fig.set_size_inches(10, 5)
for crime_type in [normalized_drugs, normalized_assault, normalized_robbery]:
    ax.plot(crime_type)

ax.legend(["Dangerous drugs", "Assault", "Robbery"])
ax.set(xlabel='Month', ylabel='Number of arrests', title='Total arrests for each month')
ax.set_xticks(np.arange(0,12))
ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
ax.grid()

fig.savefig("Yearly.png")
plt.show()