# Imports

In [None]:
#%%time
import numpy
import geopandas
import folium
import pandas
from mpl_toolkits.mplot3d import Axes3D
import seaborn
import matplotlib.pyplot as plt
from ipywidgets import widgets
from IPython.display import clear_output
import os

%matplotlib inline
%pylab inline

seaborn.set_style('whitegrid')

print('numpy: ' + numpy.__version__)
print('geopandas: ' + geopandas.__version__)
print('folium: ' + folium.__version__)
print('pandas: ' + pandas.__version__)
print('seaborn: ' + seaborn.__version__)
print('matplotlib: ' + matplotlib.__version__)

# Homicide reports 1980-2014
## Reading & filtering data
### First attempt to read data

In [None]:
homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014.csv.gz")
# homicide.columns returns the column name on index 16
homicide[homicide.columns[16]].head(5)

### Perpetrator age contains int try and convert to int

In [None]:
# Abuse python type converter to find what our dirty data is
try:
    #homicide['Perpetrator Age'].apply(pandas.to_numeric)
    homicide['Perpetrator Age'].astype(int)
except ValueError as e:
    print(str(type(e).__name__) + ": " + str(e.args))

### Check dirty data field

In [None]:
homicide[homicide['Perpetrator Age'] == ' ']

In [None]:
homicide.iloc[634666]

In [None]:
homicide[(homicide['Crime Solved'] != 'No') & (homicide['Crime Solved'] != 'Yes')]

### Filter and generalize data and save to a csv for future use

In [None]:
%%time

homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014.csv.gz", dtype={"Perpetrator Age": object}) \
                            .drop(['Record ID', 'Agency Code'], axis=1)

# loc[row index, column index] when assiging a value the column index is important.
homicide.loc[homicide['Crime Solved'] == 'No', 'Crime Solved'] = False
homicide['Crime Solved'] = homicide['Crime Solved'].astype(bool)

# Perpetrator Age contains dirty data
# homicide['Perpetrator Age'] = homicide['Perpetrator Age'].replace(' ', 0).apply(pandas.to_numeric)

homicide.loc[homicide['Perpetrator Age'] == ' ', 'Perpetrator Age'] = 0
# set dtype to int
homicide['Perpetrator Age'] = homicide['Perpetrator Age'].astype(int)

homicide['General relation'] = homicide['Relationship']
homicide.loc[(homicide['Relationship'] == 'Wife') | (homicide['Relationship'] == 'Ex-Wife') |
             (homicide['Relationship'] == 'Girlfriend') |
             (homicide['Relationship'] == 'Common-Law Wife'), 'General relation'] = 'Partner-F'

homicide.loc[(homicide['Relationship'] == 'Husband') | (homicide['Relationship'] == 'Ex-Husband') |
             (homicide['Relationship'] == 'Boyfriend') |
             (homicide['Relationship'] == 'Common-Law Husband'), 'General relation'] = 'Partner-M'

homicide.loc[(homicide['Relationship'] == 'Father') | (homicide['Relationship'] == 'In-Law') |
             (homicide['Relationship'] == 'Mother') | (homicide['Relationship'] == 'Stepfather') |
             (homicide['Relationship'] == 'Stepmother'), 'General relation'] = 'Parent'

homicide.loc[(homicide['Relationship'] == 'Daughter') | (homicide['Relationship'] == 'Son') |
             (homicide['Relationship'] == 'Stepdaughter') |
             (homicide['Relationship'] == 'Stepson'), 'General relation'] = 'Children'

homicide.loc[(homicide['Relationship'] == 'Brother') | (homicide['Relationship'] == 'Sister'),
             'General relation'] = 'Sibling'

homicide.loc[(homicide['Relationship'] == 'Employee') | (homicide['Relationship'] == 'Employer'),
             'General relation'] = 'Work'

homicide.loc[(homicide['Relationship'] == 'Boyfriend/Girlfriend') & (homicide['Victim Sex'] == 'Female'),
             'General relation'] = 'Partner-F'

homicide.loc[(homicide['Relationship'] == 'Boyfriend/Girlfriend') & ((homicide['Victim Sex'] == 'Male') |
            (homicide['Victim Sex'] == 'Unknown')), 'General relation'] = 'Partner-M'

homicide.to_csv('./datasets/homocide_reports_1980-2014_filtered.csv.gz', index = False, compression = 'gzip')

homicide

### Load pre-filtered csv

In [None]:
%time homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014_filtered.csv.gz")

### Check if we have null fields in our columns

In [None]:
%time homicide.isnull().any()

### Check types per comumn

In [None]:
%time homicide.dtypes

### Print description of our data set

In [None]:
%time homicide.describe()

## Show victim sex

In [None]:
inputGender = widgets.Text(description="Perpetrator Sex:")
display(inputGender)

def submit(sender):
    clear_output()
    display(inputGender)
    gender = inputGender.value
    homicidefiltered = homicide[homicide['Perpetrator Sex']== gender]
    seaborn.countplot(x=homicidefiltered['Victim Sex'], data=homicidefiltered)
    
inputGender.on_submit(submit)

### Show pepetrator sex and victim sex

In [None]:
%time pandas.crosstab(homicide['Victim Sex'], homicide['Perpetrator Sex'])

### Show weapon use by sex

In [None]:
%%time

pyplot.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
ax = seaborn.heatmap(pandas.crosstab(homicide.Weapon, homicide['Perpetrator Sex'])
                     .apply(lambda r: r / r.sum(), axis=1), annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Weapon Use vs Gender')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
ax.plot()

### Show weapon use by sex with unkown filtered out.

In [None]:
%%time

pyplot.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
# ax.set_ticklabels(['0%', '20%', '75%', '100%'])
unknown_filter = homicide[homicide['Perpetrator Sex'] != 'Unknown']
ax = seaborn.heatmap(pandas.crosstab(unknown_filter.Weapon, unknown_filter['Perpetrator Sex'])
                     .apply(lambda r: r / r.sum(), axis=1), annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Weapon Use vs Gender')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
ax.plot()

# Favoriet wapen man vs vrouw

In [None]:
%%time

Male_Filter = unknown_filter.loc[unknown_filter['Perpetrator Sex'] == "Male"]#['Weapon']
Female_Filter = unknown_filter.loc[unknown_filter['Perpetrator Sex'] == "Female"]##['Weapon']

In [None]:
s = seaborn.factorplot(x='Weapon', data=Male_Filter,kind="count", aspect=3,
                      order=['Handgun','Knife','Blunt Object', 'Strangulation', 'Rifle', 'Firearm',
                       'Shotgun', 'Fall',  'Drowning', 'Suffocation', 
                       'Fire', 'Drugs', 'Explosives', 'Gun', 'Poison','Unknown' ])
# s.set_xticklabels(rotation=90)
# plt.ylim(0,300000)

In [None]:
s = seaborn.factorplot(x='Weapon', data=Female_Filter,kind="count", aspect=3,
                       order=['Handgun','Knife','Blunt Object', 'Strangulation', 'Rifle', 'Firearm',
                       'Shotgun', 'Fall',  'Drowning', 'Suffocation',
                       'Fire', 'Drugs', 'Explosives', 'Gun', 'Poison','Unknown', ])
s.set_xticklabels(rotation=90)

### Amount of homicides per month

In [None]:
%%time

s = seaborn.countplot(x='Month', data=homicide, palette="husl")

### Show perpetrator age with unkown ages filtered out

In [None]:
%%time

s = seaborn.factorplot(x='Perpetrator Age', data=homicide[homicide['Perpetrator Age'] != 0],
                        kind="count", aspect=4)
s.set_xticklabels(rotation=90)

### Show perpetrator age with unkown ages filtered out grouped by perpetrator age

In [None]:
bins = [0,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,np.inf]
labels = ['1-10','10 - 15','15 - 20','20 - 25','25 - 30','30 - 35','35 - 40','40 - 45','45 - 50','50 - 55','55 - 60','60 - 65','65 - 70','70 - 75','75 - 80','80 - 85', '85 - 90', '90+']
age_groups = pandas.cut(homicide['Perpetrator Age'], bins, labels=labels)
homicide['Age group'] = age_groups

s = seaborn.factorplot(x='Age group', data=homicide[homicide['Perpetrator Age'] != 0],
                        kind="count", aspect=4)
s.set_xticklabels(rotation=90)

### Show homcide count by race

In [None]:
%%time

seaborn.countplot(x='Perpetrator Race', data=homicide, palette="dark")

### Pepetrator gender vs weapon used

In [None]:
%%time

plt.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
# ax.set_ticklabels(['0%', '25%', '75%', '100%'])
ax = seaborn.heatmap(pandas.crosstab(homicide['Perpetrator Sex'], homicide.Weapon).apply(lambda r: r / r.sum(), axis=1),
                     annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Perpetrator Gender vs Weapon Use')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])


### Count of homcides per state

In [None]:
%%time

seaborn.countplot(x='State', data=homicide, palette="husl")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

# Sex offenders
## Read data set

In [None]:
%%time

sexoff = pandas.read_csv("./datasets/Sex_Offenders.csv", dtype={"AGE": int})

## Show data

In [None]:
%time sexoff

## Check if tables contain null

In [None]:
%time sexoff.isnull().any()

## Check data types

In [None]:
%time sexoff.dtypes

## Show sex offenders by race

In [None]:
%%time

seaborn.countplot(x='RACE', data=sexoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=60)

seaborn.plt.show()


## Show amount of sex offenders by age when crime was commited

In [None]:
%%time

seaborn.countplot(x='AGE', data=sexoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

# Show amount of sex offenders grouped by age when crime was commited 

In [None]:
%%time

bins = [0,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,np.inf]
labels = ['1-10','10 - 15','15 - 20','20 - 25','25 - 30','30 - 35','35 - 40','40 - 45','45 - 50','50 - 55','55 - 60','60 - 65','65 - 70','70 - 75','75 - 80','80 - 85', '85 - 90', '90+']
age_groups = pandas.cut(sexoff.AGE, bins, labels=labels)
sexoff['Age group'] = age_groups

seaborn.countplot(x='Age group', data=sexoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

## Show amount of sex offenders by gender

In [None]:
%%time

seaborn.countplot(x='GENDER', data=sexoff, palette="dark")

## Show amount of sex offenders by height

In [None]:
%%time

seaborn.countplot(x='HEIGHT', data=sexoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

## Show amount of minor victims

In [None]:
%%time

seaborn.countplot(x='VICTIM MINOR', data=sexoff, palette="dark")

## Show amount of minor and no minor victims by targeted by offender sex

In [None]:
%%time

seaborn.countplot(x='GENDER', hue='VICTIM MINOR', data=sexoff, palette="Paired")

# Gun offenders in chicago
## Read data

In [None]:
%%time

gunoff = pandas.read_csv("./datasets/Gun_Offenders.csv")

## Print first 10 rows

In [None]:
%time gunoff

## Check if our columns contains null data

In [None]:
%time gunoff.isnull().any()

## Check which types our columns contains

In [None]:
%time gunoff.dtypes

## Show amount of gun offenders by race

In [None]:
%%time

seaborn.countplot(x='race', data=gunoff, palette="dark")

## Show amount of gun offenders by Sex

In [None]:
%%time

seaborn.countplot(x='sex', data=gunoff, palette="dark")

## Show ammount of gun offenders per state

In [None]:
%%time

seaborn.countplot(x='state', data=gunoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

# Map US With Homicide rates per State



In [None]:
%%time

homicides_per_state = pandas.read_csv("./datasets/homicides_per_state.csv")


In [None]:
#%%time
 


us_states = os.path.join('datasets', 'us-states.json')



map = folium.Map(location=[48, -102], zoom_start=3)

map.choropleth(geo_data=us_states, 
               data=homicides_per_state,
               columns=['STATE', 'RATE'],
               key_on='feature.id',
               fill_color='YlOrRd',
              legend_name='Homicide Rate (%)')            
map

## Map US with Firearm mortality rates per state:

In [None]:
%%time

firearms = pandas.read_csv("./datasets/firearms.csv")


In [None]:
map = folium.Map(location=[48, -102], zoom_start=3)

map.choropleth(geo_data=us_states, 
               data=firearms,
               columns=['STATE', 'RATE'],
               key_on='feature.id',
               fill_color='YlOrRd',
              legend_name='Gun Mortality Rate (%)')            
map