# Project 3, Part 1
*  **Nicholas Giuffrida**

In [1]:
import pandas as pd
import glob
import matplotlib.pyplot as plt
import numpy as np
import holidays
import datetime as dt
import seaborn as sns
from holidays import country_holidays
plt.rcParams['figure.facecolor']='white'
plt.rcParams['figure.figsize']=(12,4)
import matplotlib.ticker as mticks
import matplotlib.dates as mdates
sns.set_context("talk", font_scale=0.9)

In [2]:
crime_files = sorted(glob.glob("Data/Chicago-Crime_*.csv"))
crime_files


['Data/Chicago-Crime_2001.csv',
 'Data/Chicago-Crime_2002.csv',
 'Data/Chicago-Crime_2003.csv',
 'Data/Chicago-Crime_2004.csv',
 'Data/Chicago-Crime_2005.csv',
 'Data/Chicago-Crime_2006.csv',
 'Data/Chicago-Crime_2007.csv',
 'Data/Chicago-Crime_2008.csv',
 'Data/Chicago-Crime_2009.csv',
 'Data/Chicago-Crime_2010.csv',
 'Data/Chicago-Crime_2011.csv',
 'Data/Chicago-Crime_2012.csv',
 'Data/Chicago-Crime_2013.csv',
 'Data/Chicago-Crime_2014.csv',
 'Data/Chicago-Crime_2015.csv',
 'Data/Chicago-Crime_2016.csv',
 'Data/Chicago-Crime_2017.csv',
 'Data/Chicago-Crime_2018.csv',
 'Data/Chicago-Crime_2019.csv',
 'Data/Chicago-Crime_2020.csv',
 'Data/Chicago-Crime_2021.csv',
 'Data/Chicago-Crime_2022.csv']

In [None]:
# Use read_csv in a list comprehension and combine with concat to load all files
df = pd.concat([pd.read_csv(f) for f in crime_files] )
df.head()

In [None]:
df.columns

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df = df.dropna(subset=["District"])

In [None]:
df['District'] = df['District'].astype(int)

In [None]:
df.info()

In [None]:
now = dt.datetime.now()
now

In [None]:
fmt = "%m/%d/%Y %I:%M:%S %p"
now.strftime(fmt)

In [None]:
df['Date'] = pd.to_datetime(df['Date'], format = fmt)
df.info()

In [None]:
melted = pd.melt(df.drop(columns=['Latitude','Longitude','Ward','Arrest','Beat', 
                                  'Description', 'Location Description', 'Domestic']),
                 id_vars=['Date', 'Primary Type', 
                             'District'], 
                 value_name="keyword operator from-rainbow">='Date',var_name="Crime")
melted

In [None]:
melted = melted.set_index("Date")
melted = melted.sort_index()
melted.head(3)

## Topic 1) Comparing Police Districts
*  Which district had the most crimes in 2022?
* Which had the least?

In [None]:
crimes_by_dist = melted.groupby('Date')['District'].value_counts().sort_index()
crimes_by_dist

In [None]:
plot_crime = crimes_by_dist.to_frame('Crimes').reset_index()
plot_crime

In [None]:
plot_crime = plot_crime.drop(plot_crime[plot_crime['Crimes']==0].index)
plot_crime.head()


In [None]:
plot_crime.dtypes

In [None]:
plot_crime.value_counts()

In [None]:
plot_crime.info()

###  Total Aproach
*  We can simply check the total number of crimes reported by district.

In [None]:
sns.set_context("poster", font_scale=0.6)
ax = sns.countplot(data=plot_crime, x='District')

###  Total Aproach answer:
*  The most crimes were reported in Districts 11 and 8 .
*  The fewest were in Districts 21, and 31

In [None]:
crime_perc_by_dist = plot_crime.groupby('District')['Crimes'].value_counts(normalize=True).sort_index()
crime_perc_by_dist

In [None]:
plot_df_perc = crime_perc_by_dist.to_frame('Crime Perc').reset_index()
plot_df_perc

In [None]:
plot_df_perc.describe()

###  Normalized Approach
*  View the totals as a portion of the total reported.

In [None]:
ax = sns.countplot(data=plot_df_perc, x='District')
fig = ax.get_figure()
fig.autofmt_xdate()

###  Normalized Aproach answer
*  We can see that While 21 and 31 still represent the low end of the spectrum :

 - District 11 has dropped to the middle of the pack, and
  - Disrict 25 has become the new entry into the *Most Crimes* category.

##  Topic 4) Comparing Months
1. What months have the most crime? What months have the least?
1. Are there any individual crimes that do not follow this pattern? If so, which crimes?

In [None]:
# Engineering features by date time
melted['Month']  = melted.index.month
melted['Month'] = melted['Month']

In [None]:
melted['Month'].value_counts()

In [None]:
ax = sns.countplot(data=melted, x=('Month'))
fig = ax.get_figure()
fig.autofmt_xdate()

###  Topic 2 Part 1 answer
-  January and February  had the fewest Crimes, While July and August had the most

 -  Let's Break that down to see if certain crimes do not follow the same trend.

In [None]:
crimes_by_month = melted.groupby('Primary Type')['Month'].value_counts().sort_index()
crimes_by_month

In [None]:
# convert to dataframe 
plot_df = crimes_by_month.to_frame('Crimes').reset_index()
plot_df

In [None]:
ax = sns.lineplot(data=plot_df, x='Month',y='Crimes')

*  Here we can see that the overall crime trend within the city can fluctuate greatly depending on the actual Solar Season.

 - Crime up when Temps up.  

In [None]:
fig, ax = plt.subplots(figsize=(12,20))
sns.lineplot(data=plot_df, x='Month',y='Crimes',markers=True,
                  hue='Primary Type',ax=ax,)
ax.legend(bbox_to_anchor=[1,1]);

###  Captain Chicago
 *  It seems Obvious that we are working for a super hero.
 * He Is going to single handedly put a dent in Crime in Chicago
 * He needs to know What Time of year to focus his efforts on Which Types of Crimes.
 * He is certainly NOT Dan Akroyd(probably).

In [None]:
plot_df['Primary Type'].value_counts()

###  Remove Statutory
*  **Captain Chicago** Has no time for Statutory Crime.

 -  That's his motto.
 -  It's not the best motto
 
* We will remove the *statutory crimes* by
 - 1st defining all the values that constitute *Statutory*.
  - then excluding those from our Visualizations

In [None]:
plot_df['Primary Type'] = plot_df['Primary Type'].replace({'CONCEALED CARRY LICENSE VIOLATION':'STATUTORY OFFENSE', 
                                                           "WEAPONS VIOLATION":'STATUTORY OFFENSE',
                                                           'CRIMINAL SEXUAL ASSAULT':'SEX OFFENSE', 
                                                          'PUBLIC INDECENCY':'STATUTORY OFFENSE', 
                                                           'PUBLIC PEACE VIOLATION':'STATUTORY OFFENSE', 
                                                          'LIQUOR LAW VIOLATION':'STATUTORY OFFENSE',
                                                          'PROSTITUTION':'STATUTORY OFFENSE',
                                                          'OTHER NARCOTIC VIOLATION':'STATUTORY OFFENSE', 
                                                          'OBSCENITY':'STATUTORY OFFENSE', 
                                                          'NON-CRIMINAL':'STATUTORY OFFENSE', 
                                                          'NARCOTICS':'STATUTORY OFFENSE', 
                                                          'CRIM SEXUAL ASSAULT':'SEX OFFENSE',
                                                           'GAMBLING':'STATUTORY OFFENSE', 
                                                          'INTERFERENCE WITH PUBLIC OFFICER':'STATUTORY OFFENSE', 
                                                          'RITUALISM':'STATUTORY OFFENSE', 
                                                          'NON - CRIMINAL':'STATUTORY OFFENSE', 
                                                          'NON-CRIMINAL (SUBJECT SPECIFIED)':'STATUTORY OFFENSE'})
plot_df['Primary Type'].value_counts()

In [None]:
plot_df = plot_df.drop(plot_df[plot_df['Primary Type']=='STATUTORY OFFENSE'].index)
plot_df.head()
plot_df['Primary Type'].value_counts()

In [None]:
g = sns.catplot(data=plot_df, x='Month', y='Crimes',
                kind='point',aspect=2, height=4,
                hue='Primary Type', col='Primary Type',col_wrap=2,
               sharey=False, sharex=False)
[ax.grid(axis='x') for ax in g.axes];

###   Topic 4 part 2 answer
*  The most notable Crime Type that goes against the Overall seasonal trend is the Deceptive Practice, which rises to it's peak around the end of the calender year

 -  We can not rule out that Santa Clause may have something to do with these "Deceptive Practices".
 - Captain Chicago (probably not Dan Akroyd) is on it!

##  Topic 2  Crimes Across the Years:
1. Is the total number of crimes increasing or decreasing across the years?
1.Are there any individual crimes that are doing the opposite?

In [None]:
melted['Year']  = melted.index.year
melted['Year']

In [None]:
ax = sns.countplot(data=melted, x='Year')
fig = ax.get_figure()
fig.autofmt_xdate()

###  Topic 2 part 1 answer
*  The overall trend is that crime hes been steadily decreasing from 2001- Present.

 -  Let's see if any individual crimes buck that trend.

In [None]:
crimes_by_year = melted.groupby('Primary Type')['Year'].value_counts().sort_index()
crimes_by_year

In [None]:
plot_df_y = crimes_by_year.to_frame('Crimes').reset_index()
plot_df_y

In [None]:
plot_df_y['Primary Type'] = plot_df_y['Primary Type'].replace({'CONCEALED CARRY LICENSE VIOLATION':'STATUTORY OFFENSE', 
                                                           "WEAPONS VIOLATION":'STATUTORY OFFENSE',
                                                           'CRIMINAL SEXUAL ASSAULT':'SEX OFFENSE', 
                                                          'PUBLIC INDECENCY':'STATUTORY OFFENSE', 
                                                           'PUBLIC PEACE VIOLATION':'STATUTORY OFFENSE', 
                                                          'LIQUOR LAW VIOLATION':'STATUTORY OFFENSE',
                                                          'PROSTITUTION':'STATUTORY OFFENSE',
                                                          'OTHER NARCOTIC VIOLATION':'STATUTORY OFFENSE', 
                                                          'OBSCENITY':'STATUTORY OFFENSE', 
                                                          'NON-CRIMINAL':'STATUTORY OFFENSE', 
                                                          'NARCOTICS':'STATUTORY OFFENSE', 
                                                          'CRIM SEXUAL ASSAULT':'SEX OFFENSE',
                                                           'GAMBLING':'STATUTORY OFFENSE', 
                                                          'INTERFERENCE WITH PUBLIC OFFICER':'STATUTORY OFFENSE', 
                                                          'RITUALISM':'STATUTORY OFFENSE', 
                                                          'NON - CRIMINAL':'STATUTORY OFFENSE', 
                                                          'NON-CRIMINAL (SUBJECT SPECIFIED)':'STATUTORY OFFENSE'})
plot_df_y['Primary Type'].value_counts()

In [None]:
plot_df_y = plot_df_y.drop(plot_df_y[plot_df_y['Primary Type']=='STATUTORY OFFENSE'].index)
plot_df_y['Primary Type'].value_counts()

In [None]:
g = sns.catplot(data=plot_df_y, x='Year', y='Crimes',
                kind='point',aspect=3, height=4,
                hue='Primary Type', col='Primary Type',col_wrap=2,
               sharey=False, sharex=False)
[ax.grid(axis='x') for ax in g.axes];

###  Topic 2 part 2 Answer
-  We can see that *Human Trafficking*, *Stalking*, *Sex Offense*, and *Homicide* Each have gone against the greater, downward trend in Crime.