In [None]:
##importing modules

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
##Reading latest data 

## df1 = pd.read_csv("Chicago_Crimes_2001_to_2004.csv",error_bad_lines = False)
## df2 = pd.read_csv("Chicago_Crimes_2005_to_2007.csv",error_bad_lines = False)
## df3 = pd.read_csv("Chicago_Crimes_2008_to_2011.csv",error_bad_lines = False)
Chicago = pd.read_csv("Chicago_Crimes_2012_to_2017.csv",error_bad_lines = False)


# DataSet Info


 - ID : Unique identifier for the record.

 - Case Number : The Chicago Police Department RD Number (Records Division Number), which is unique to the incident.

 - Date - Date when the incident occurred. this is sometimes a best estimate.

 - Block - The partially redacted address where the incident occurred, placing it on the same block as the actual address.

 - IUCR - The Illinois Unifrom Crime Reporting code. This is directly linked to the Primary Type and Description. See the list                   of IUCR codes at https://data.cityofchicago.org/d/c7ck-438e.

 - Primary Type - The primary description of the IUCR code.

 - Description - The secondary description of the IUCR code, a subcategory of the primary description.

 - Location Description - Description of the location where the incident occurred.

 - Arrest - Indicates whether an arrest was made.

 - Domestic - Indicates whether the incident was domestic-related as defined by the Illinois Domestic Violence Act.

 - Beat - Indicates the beat where the incident occurred. A beat is the smallest police geographic area – each beat has a dedicated police beat car. Three to five beats make up a police sector, and three sectors make up a police district. The Chicago Police Department has 22 police districts. See the beats at https://data.cityofchicago.org/d/aerh-rz74.

 - District - Indicates the police district where the incident occurred. See the districts at https://data.cityofchicago.org/d/fthy-xz3r.

 - Ward - The ward (City Council district) where the incident occurred. See the wards at https://data.cityofchicago.org/d/sp34-6z76.

 - Community Area - Indicates the community area where the incident occurred. Chicago has 77 community areas. See the community areas at https://data.cityofchicago.org/d/cauq-8yn6.

 - FBI Code - Indicates the crime classification as outlined in the FBI's National Incident-Based Reporting System (NIBRS). See the Chicago Police Department listing of these classifications at http://gis.chicagopolice.org/clearmap_crime_sums/crime_types.html.

- X Coordinate - The x coordinate of the location where the incident occurred in State Plane Illinois East NAD 1983 projection. This location is shifted from the actual location for partial redaction but falls on the same block.

 - Y Coordinate - The y coordinate of the location where the incident occurred in State Plane Illinois East NAD 1983 projection. This location is shifted from the actual location for partial redaction but falls on the same block.

 - Year - Year the incident occurred.

 - Updated On - Date and time the record was last updated.

 - Latitude - The latitude of the location where the incident occurred. This location is shifted from the actual location for partial redaction but falls on the same block.

 - Longitude - The longitude of the location where the incident occurred. This location is shifted from the actual location for partial redaction but falls on the same block.

 - Location - The location where the incident occurred in a format that allows for creation of maps and other geographic operations on this data portal. This location is shifted from the actual location for partial redaction but falls on the same block.

In [None]:
Chicago.head(100)

In [None]:
Chicago.info()

In [None]:
Chicago.describe()

# Checking for null values

In [None]:
Chicago.isna().sum()

In [None]:
Chicago.shape

# Removing null values

In [None]:
Chicago = Chicago.dropna()

In [None]:
Chicago.shape

# Drop unecessary columns 

In [None]:
Chicago.drop(["Unnamed: 0","ID","Case Number"],axis=1,inplace=True)

In [None]:
Chicago.drop(['IUCR','X Coordinate','Y Coordinate','Location'],axis=1,inplace=True)

# Chaging Datatypes

In [None]:
##set index of Datetime

Chicago.Date = pd.to_datetime(Chicago.Date,format = '%m/%d/%Y %I:%M:%S %p')
Chicago.index = pd.DatetimeIndex(Chicago.Date)

In [None]:
Chicago.info()

# Handling Duplicates

In [None]:
Chicago.duplicated()

In [None]:
Chicago.duplicated().sum()

In [None]:
Chicago = Chicago.drop_duplicates()

In [None]:
Chicago.shape

In [None]:
Chicago.describe()

# Handling Columns

In [None]:
Chicago['Primary Type']

In [None]:
Chicago['Primary Type'].unique()

# deleting non-criminal acts

In [None]:

Chicago.drop(Chicago.index [Chicago[ 'Primary Type' ] == 'PUBLIC INDECENCY' ] , inplace = True)
Chicago.drop(Chicago.index [Chicago[ 'Primary Type' ] == 'NON-CRIMINAL (SUBJECT SPECIFIED)' ] , inplace = True)
Chicago.drop(Chicago.index [Chicago[ 'Primary Type' ] == 'NON-CRIMINAL' ] , inplace = True)
Chicago.drop(Chicago.index [Chicago[ 'Primary Type' ] == 'NON - CRIMINAL' ] , inplace = True)
Chicago.drop(Chicago.index [Chicago[ 'Primary Type' ] == 'OBSCENITY' ] , inplace = True)
Chicago.drop(Chicago.index [Chicago[ 'Primary Type' ] == 'CONCEALED CARRY LICENSE VIOLATION' ] , inplace = True)

In [None]:
Chicago['Primary Type'].unique()

# Vizualizations

# making similar crimes together

In [None]:


condition = [(Chicago['Primary Type'] == 'MOTOR VEHICLE THEFT'),
                (Chicago['Primary Type'] == 'THEFT'),
                (Chicago['Primary Type'] == 'ROBBERY'),
                (Chicago['Primary Type'] == 'BURGLARY'),
                (Chicago['Primary Type'] == 'ASSAULT'),
                (Chicago['Primary Type'] == 'PROSTITUTION'),
                (Chicago['Primary Type'] == 'BATTERY'),
                (Chicago['Primary Type'] == 'CRIM SEXUAL ASSAULT'),
                (Chicago['Primary Type'] == 'SEX OFFENSE'),
                (Chicago['Primary Type'] == 'INTIMIDATION'),
                (Chicago['Primary Type'] == 'STALKING'),
                (Chicago['Primary Type'] == 'ARSON'),
                 (Chicago['Primary Type'] == 'KIDNAPPING'),
                (Chicago['Primary Type'] == 'OFFENSE INVOLVING CHILDREN'),
                (Chicago['Primary Type'] =='PUBLIC PEACE VIOLATION'),
                (Chicago['Primary Type'] == 'OTHER NARCOTIC VIOLATION'),
                 (Chicago['Primary Type'] == 'NARCOTICS'),
                (Chicago['Primary Type'] == 'LIQUOR LAW VIOLATION'),
                (Chicago['Primary Type'] == 'CRIMINAL DAMAGE'),
                (Chicago['Primary Type'] == 'HUMAN TRAFFICKING'),
                 (Chicago['Primary Type'] == 'WEAPONS VIOLATION'),
                (Chicago['Primary Type'] == 'INTERFERENCE WITH PUBLIC OFFICER'),
                (Chicago['Primary Type'] == 'CRIMINAL TRESPASS'),
                 (Chicago['Primary Type'] == 'HOMICIDE'),
                (Chicago['Primary Type'] == 'DECEPTIVE PRACTICE'),
                (Chicago['Primary Type'] == 'OTHER OFFENSE'),
                (Chicago['Primary Type'] == 'GAMBLING'
                )
               ]
categ = ['THEFT', 'THEFT', 'THEFT','THEFT', 
            'ASSAULT' , 'ASSAULT' , 'ASSAULT' , 'ASSAULT', 'ASSAULT','ASSAULT','ASSAULT','ASSAULT','ASSAULT','ASSAULT', 'ASSAULT',
            'NARCOTICS', 'NARCOTICS', 'NARCOTICS',
            'CRIMINAL DAMAGE','CRIMINAL DAMAGE','CRIMINAL DAMAGE', 'CRIMINAL DAMAGE', 'CRIMINAL DAMAGE',
            'OTHER OFFENSE','OTHER OFFENSE','OTHER OFFENSE', 'OTHER OFFENSE']
Chicago['Type'] = np.select(condition , categ)

In [None]:
Chicago['Type']

# crimes by type

In [None]:


plt.figure(figsize=(14,10))
plt.title('Amount of Crimes by Primary Type')
plt.ylabel('Crime Type')
plt.xlabel('Amount of Crimes')
Chicago.groupby([Chicago['Primary Type']]).size().sort_values(ascending=True).plot(kind='barh')


In [None]:
plt.figure(figsize=(14,10))
plt.title('Amount of Crimes by Primary Type')
plt.ylabel('Crime Type')
plt.xlabel('Amount of Crimes')
Chicago.groupby([Chicago['Type']]).size().sort_values(ascending=True).plot(kind='barh')

In [None]:
plt.figure(figsize=(14,10))
plt.title('Amount of Crimes by Primary Type')
plt.ylabel('Crime Type')
plt.xlabel('Amount of Crimes')
Chicago.groupby([Chicago['Type']]).size().sort_values(ascending=True).plot(kind='barh')

# crimes by domestic


In [None]:


plt.figure(figsize=(11,10))
plt.title('Amount of Crimes by Domestic')
plt.ylabel('Domestic')
plt.xlabel('Amount of Crimes')
Chicago.groupby([Chicago['Domestic']]).size().sort_values(ascending=True).plot(kind='barh')

In [None]:
Chicago.head(50)

In [None]:
Chicago['Location Description'].unique()

# crimes by Place

In [None]:


import seaborn as sns

plt.figure(figsize=(11,10))
sns.countplot(y='Location Description', data=Chicago ,order=Chicago['Location Description'].value_counts().iloc[:15].index)
plt.title('Top 15 Places where happend (Crimes of Chicago)')

# Number of Arrested Cases

In [None]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.pie(Chicago,names="Arrest",title='Number of arrested cases' , color_discrete_sequence= px.colors.qualitative.Set2)

fig.show()

# Amount of Crimes by Type

In [None]:
plt.figure(figsize=(20, 8), dpi=80)
sns.set_theme(style="whitegrid")
sns.countplot(x="Type", palette='magma', data=Chicago , dodge=True , order = Chicago['Type'].value_counts().index )
order1 = Chicago['Type'].value_counts().to_list()
for i in range(5):
    count = order1[i]
    strt='{:.0f}'.format(count)
    plt.text(i,count,strt,ha='center', zorder=10)
plt.show()

# Crimes Per Year

In [None]:
plt.plot(Chicago.resample('Y').size())
plt.title('Crime count per year')
plt.xlabel('Years')
plt.ylabel('Number of Crimes')

# Crimes per month over years

In [None]:
plt.plot(Chicago.resample('M').size())
plt.title('Crime count per month')
plt.xlabel('Months')
plt.ylabel('Number of Crimes')

# Crimes per Quarter

In [None]:
q = Chicago.resample('Q')
q
q.size()

In [None]:
plt.plot(Chicago.resample('Q').size())
plt.title('Crime count per quarter')
plt.xlabel('Quarter')
plt.ylabel('Number of Crimes')

In [None]:
Max_crimes_per_day = pd.DataFrame(Chicago['Date'].value_counts() , Chicago['Date'].value_counts().index)

In [None]:
Max_crimes_per_day.head(15)

# Correaltion

In [None]:
Chicago.corr()

In [None]:
matrix = Chicago.corr().round(1)
sns.heatmap(matrix, annot=True)
plt.show()

In [None]:
matrix = Chicago.corr().round(1)
mask = np.triu(np.ones_like(matrix, dtype=bool))
sns.heatmap(matrix, annot=True, vmax=1, vmin=-1, center=0, cmap='vlag', mask=mask)
plt.show()

In [None]:
Chicago.dtypes

In [None]:
Chicago.Date

# ForeCasting

In [None]:
from prophet import Prophet

In [None]:
df1 = Chicago[['Date', 'Primary Type']].groupby('Date').count().reset_index()
df1 = df1.rename(columns={'Date': 'ds', 'Primary Type': 'y'})


In [None]:
df1 = Chicago.resample('D').size().reset_index()

In [None]:
df1

In [None]:
df1 = df1.rename(columns={'Date':'ds', 0:'y'})

In [None]:
df1

In [None]:
#define model

model = Prophet()

In [None]:
##fit into model

model.fit(df1)

In [None]:
#generating future forecast for next 365 days

future = model.make_future_dataframe(periods=365)

In [None]:
# Make predictions

forecast = model.predict(future)

In [None]:
model.plot(forecast)