# Imports

In [5]:
%%time

import numpy
import pandas
from mpl_toolkits.mplot3d import Axes3D
import seaborn
import matplotlib.pyplot as plt

%matplotlib inline
%pylab inline

seaborn.set_style('whitegrid')


Populating the interactive namespace from numpy and matplotlib
CPU times: user 8.18 ms, sys: 0 ns, total: 8.18 ms
Wall time: 22.1 ms


# Homicide reports 1980-2014
## Reading & filtering data
### First attempt to read data

In [None]:
homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014.csv.gz") \
                            .drop(['Record ID', 'Agency Code'], axis=1)
homicide.head()

### Perpetrator age contains int try and convert to int

In [None]:
homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014.csv.gz", dtype={"Perpetrator Age": int}) \
                            .drop(['Record ID', 'Agency Code'], axis=1)
homicide[homicide['Perpetrator Age'] == ' '].head()

### Check dirty data field

In [None]:
homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014.csv.gz", dtype={"Perpetrator Age": object}) \
                            .drop(['Record ID', 'Agency Code'], axis=1)
homicide[homicide['Perpetrator Age'] == ' '].head()

### Filter and generalize data and save to a csv for future use

In [None]:
%%time

homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014.csv.gz", dtype={"Perpetrator Age": object}) \
                            .drop(['Record ID', 'Agency Code'], axis=1)
# Perpetrator Age contains dirty data

homicide['Perpetrator Age'] = homicide['Perpetrator Age'].replace(' ', 0).apply(pandas.to_numeric)

homicide.loc[(homicide['Relationship'] == 'Wife') | (homicide['Relationship'] == 'Ex-Wife') |
             (homicide['Relationship'] == 'Girlfriend') |
             (homicide['Relationship'] == 'Common-Law Wife'), 'Rel_Category'] = 'Partner-F'

homicide.loc[(homicide['Relationship'] == 'Husband') | (homicide['Relationship'] == 'Ex-Husband') |
             (homicide['Relationship'] == 'Boyfriend') |
             (homicide['Relationship'] == 'Common-Law Husband'), 'Rel_Category'] = 'Partner-M'

homicide.loc[(homicide['Relationship'] == 'Father') | (homicide['Relationship'] == 'In-Law') |
             (homicide['Relationship'] == 'Mother') | (homicide['Relationship'] == 'Stepfather') |
             (homicide['Relationship'] == 'Stepmother'), 'Rel_Category'] = 'Parent'

homicide.loc[(homicide['Relationship'] == 'Daughter') | (homicide['Relationship'] == 'Son') |
             (homicide['Relationship'] == 'Stepdaughter') |
             (homicide['Relationship'] == 'Stepson'), 'Rel_Category'] = 'Children'

homicide.loc[(homicide['Relationship'] == 'Brother') | (homicide['Relationship'] == 'Sister'),
             'Rel_Category'] = 'Sibling'

homicide.loc[(homicide['Relationship'] == 'Employee') | (homicide['Relationship'] == 'Employer'),
             'Rel_Category'] = 'Work'

homicide.loc[(homicide['Relationship'] == 'Boyfriend/Girlfriend') & (homicide['Victim Sex'] == 'Female'),
             'Rel_Category'] = 'Partner-F'

homicide.loc[(homicide['Relationship'] == 'Boyfriend/Girlfriend') & ((homicide['Victim Sex'] == 'Male') |
            (homicide['Victim Sex'] == 'Unknown')), 'Rel_Category'] = 'Partner-M'

homicide.head(10)
homicide.to_csv('./datasets/homocide_reports_1980-2014_filtered.csv.gz', compression='gzip')

### Load pre-filtered csv

In [None]:
%time homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014_filtered.csv.gz").head(10)

### Check if we have null fields in our columns

In [None]:
%time homicide.isnull().any()

### Check types per comumn

In [None]:
%time homicide.dtypes

### Print description of our data set

In [None]:
%time homicide.describe()

### Show pepetrator sex and victim sex

In [None]:
%time pandas.crosstab(homicide['Victim Sex'], homicide['Perpetrator Sex'])

### Show weapon use by sex

In [None]:
%%time

pyplot.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
ax = seaborn.heatmap(pandas.crosstab(homicide.Weapon, homicide['Perpetrator Sex'])
                     .apply(lambda r: r / r.sum(), axis=1), annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Weapon Use vs Gender')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
ax.plot()


### Show weapon use by sex with unkown filtered out.

In [None]:
%%time

pyplot.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
# ax.set_ticklabels(['0%', '20%', '75%', '100%'])
unknown_filter = homicide[homicide['Perpetrator Sex'] != 'Unknown']
ax = seaborn.heatmap(pandas.crosstab(unknown_filter.Weapon, unknown_filter['Perpetrator Sex'])
                     .apply(lambda r: r / r.sum(), axis=1), annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Weapon Use vs Gender')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
ax.plot()


### Amount of homicides per month

In [None]:
%%time

seaborn.countplot(x='Month', data=homicide, palette="husl")
seaborn.plt.tight_layout()


### Show perpetrator age with unkown ages filtered out

In [None]:
%%time

s = seaborn.factorplot(x='Perpetrator Age', data=homicide[homicide['Perpetrator Age'] != 0],
                        kind="count", aspect=4)
s.set_xticklabels(rotation=90)
seaborn.plt.tight_layout()


### Show homcide count by race

In [None]:
%%time

seaborn.countplot(x='Perpetrator Race', data=homicide, palette="dark")
seaborn.plt.tight_layout()


### Pepetrator gender vs weapon used

In [None]:
%%time

plt.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
# ax.set_ticklabels(['0%', '25%', '75%', '100%'])
ax = seaborn.heatmap(pandas.crosstab(homicide['Perpetrator Sex'], homicide.Weapon).apply(lambda r: r / r.sum(), axis=1),
                     annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Perpetrator Gender vs Weapon Use')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])


### Count of homcides per state

In [None]:
%%time

seaborn.countplot(x='State', data=homicide, palette="husl")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

seaborn.plt.tight_layout()


# Sex offenders
## Read data set

In [None]:
%%time

sexoff = pandas.read_csv("./datasets/Sex_Offenders.csv", dtype={"AGE": int})

## Show data

In [None]:
%time sexoff.head(10)

## Check if tables contain null

In [None]:
%time sexoff.isnull().any()

## Check data types

In [None]:
%time sexoff.dtypes

## Show sex offenders by race

In [None]:
%%time

seaborn.countplot(x='RACE', data=sexoff, palette="dark")
seaborn.plt.show()


## Show amount of sex offenders by age when crime was commited

In [None]:
%%time

seaborn.countplot(x='AGE', data=sexoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

seaborn.plt.show()


## Show amount of sex offenders by gender

In [None]:
%%time

seaborn.countplot(x='GENDER', data=sexoff, palette="dark")
seaborn.plt.show()

## Show amount of sex offenders by height

In [None]:
%%time

seaborn.countplot(x='HEIGHT', data=sexoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

seaborn.plt.show()


## Show amount of minor victims

In [None]:
%%time

seaborn.countplot(x='VICTIM MINOR', data=sexoff, palette="dark")
seaborn.plt.show()


## Show amount of minor and no minor victims by targeted by offender sex

In [None]:
%%time

seaborn.countplot(x='GENDER', hue='VICTIM MINOR', data=sexoff, palette="Paired")
seaborn.plt.show()


# Gun offenders in chicago
## Read data

In [None]:
%%time

gunoff = pandas.read_csv("./datasets/Gun_Offenders.csv")


## Print first 10 rows

In [None]:
%time gunoff.head(10)


## Check if our columns contains null data

In [None]:
%time gunoff.isnull().any()


## Check which types our columns conain

In [None]:
%time gunoff.dtypes


## Show amount of gun offenders by race

In [None]:
%%time

seaborn.countplot(x='race', data=gunoff, palette="dark")
seaborn.plt.show()


## Show amount of gun offenders by age

In [None]:
%%time

seaborn.countplot(x='sex', data=gunoff, palette="dark")
seaborn.plt.show()


## Show ammount of gun offenders per state

In [None]:
%%time

seaborn.countplot(x='state', data=gunoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

seaborn.plt.show()


# Map US met hopelijk usefull data.



In [47]:
%%time
 

state_geo = './datatsets/state.geo.json'



#Let Folium determine the scale
map = folium.Map(location=[48, -102], zoom_start=3)
map.geo_json(geo_path=state_geo, data=homicide,
             columns=['State'],
             key_on='feature.id',
             fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
             legend_name='Murders(%)')
map

ModuleNotFoundError: No module named 'Folium'