In [None]:
%%time

import numpy
import pandas
from mpl_toolkits.mplot3d import Axes3D
import seaborn
import matplotlib.pyplot as plt

%matplotlib inline
%pylab inline

seaborn.set_style('whitegrid')


In [None]:
homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014.csv.gz", dtype={"Perpetrator Age": object}) \
                            .drop(['Record ID', 'Agency Code'], axis=1)
homicide[homicide['Perpetrator Age'] == ' '].head()

In [None]:
%%time

homicide = pandas.read_csv("./datasets/homocide_reports_1980-2014.csv.gz", dtype={"Perpetrator Age": object}) \
                            .drop(['Record ID', 'Agency Code'], axis=1)
# Perpetrator Age contains dirty data

homicide['Perpetrator Age'] = homicide['Perpetrator Age'].replace(' ', 0).apply(pandas.to_numeric)

homicide.loc[(homicide['Relationship'] == 'Wife') | (homicide['Relationship'] == 'Ex-Wife') |
             (homicide['Relationship'] == 'Girlfriend') |
             (homicide['Relationship'] == 'Common-Law Wife'), 'Rel_Category'] = 'Partner-F'

homicide.loc[(homicide['Relationship'] == 'Husband') | (homicide['Relationship'] == 'Ex-Husband') |
             (homicide['Relationship'] == 'Boyfriend') |
             (homicide['Relationship'] == 'Common-Law Husband'), 'Rel_Category'] = 'Partner-M'

homicide.loc[(homicide['Relationship'] == 'Father') | (homicide['Relationship'] == 'In-Law') |
             (homicide['Relationship'] == 'Mother') | (homicide['Relationship'] == 'Stepfather') |
             (homicide['Relationship'] == 'Stepmother'), 'Rel_Category'] = 'Parent'

homicide.loc[(homicide['Relationship'] == 'Daughter') | (homicide['Relationship'] == 'Son') |
             (homicide['Relationship'] == 'Stepdaughter') |
             (homicide['Relationship'] == 'Stepson'), 'Rel_Category'] = 'Children'

homicide.loc[(homicide['Relationship'] == 'Brother') | (homicide['Relationship'] == 'Sister'),
             'Rel_Category'] = 'Sibling'

homicide.loc[(homicide['Relationship'] == 'Employee') | (homicide['Relationship'] == 'Employer'),
             'Rel_Category'] = 'Work'

homicide.loc[(homicide['Relationship'] == 'Boyfriend/Girlfriend') & (homicide['Victim Sex'] == 'Female'),
             'Rel_Category'] = 'Partner-F'

homicide.loc[(homicide['Relationship'] == 'Boyfriend/Girlfriend') & ((homicide['Victim Sex'] == 'Male') |
            (homicide['Victim Sex'] == 'Unknown')), 'Rel_Category'] = 'Partner-M'


In [None]:
%time homicide.head(10)

In [None]:
%time homicide.isnull().any()

In [None]:
%time homicide.dtypes

In [None]:
%time homicide.describe()

In [None]:
%time pandas.crosstab(homicide['Victim Sex'], homicide['Perpetrator Sex'])

In [None]:
%%time

pyplot.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
ax = seaborn.heatmap(pandas.crosstab(homicide.Weapon, homicide['Perpetrator Sex'])
                     .apply(lambda r: r / r.sum(), axis=1), annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Weapon Use vs Gender')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
ax.plot()


In [None]:
%%time

pyplot.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
# ax.set_ticklabels(['0%', '20%', '75%', '100%'])
unknown_filter = homicide[homicide['Perpetrator Sex'] != 'Unknown']
ax = seaborn.heatmap(pandas.crosstab(unknown_filter.Weapon, unknown_filter['Perpetrator Sex'])
                     .apply(lambda r: r / r.sum(), axis=1), annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Weapon Use vs Gender')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
ax.plot()


In [None]:
%%time

seaborn.countplot(x='Month', data=homicide, palette="husl")
seaborn.plt.tight_layout()


In [None]:
%%time

s = seaborn.factorplot(x='Perpetrator Age', data=homicide[homicide['Perpetrator Age'] != 0],
                        kind="count", aspect=4)
s.set_xticklabels(rotation=90)
seaborn.plt.tight_layout()


In [None]:
%%time

seaborn.countplot(x='Perpetrator Race', data=homicide, palette="dark")
seaborn.plt.tight_layout()


In [None]:
%%time

plt.figure(figsize=(12, 12), facecolor='#efefef')
seaborn.set()
# ax.set_ticklabels(['0%', '20%', '75%', '100%'])
ax = seaborn.heatmap(pandas.crosstab(homicide['Perpetrator Sex'], homicide.Weapon).apply(lambda r: r / r.sum(), axis=1),
                     annot=True, fmt=".0%", linewidths=.5, cmap='Blues')
ax.set_title('Perpetrator Gender vs Weapon Use')
cbar = ax.collections[0].colorbar
cbar.set_ticks([0, .25, .50, .75, 1])
cbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])


In [None]:
%%time

seaborn.countplot(x='State', data=homicide, palette="husl")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

seaborn.plt.tight_layout()


In [None]:
%%time

sexoff = pandas.read_csv("./datasets/Sex_Offenders.csv")

In [None]:
%time sexoff.head(10)

In [None]:
%time sexoff.isnull().any()

In [None]:
%time sexoff.dtypes

In [None]:
%%time

seaborn.countplot(x='RACE', data=sexoff, palette="dark")
seaborn.plt.show()


In [None]:
%%time

seaborn.countplot(x='AGE', data=sexoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

seaborn.plt.show()


In [None]:
%%time

seaborn.countplot(x='GENDER', data=sexoff, palette="dark")
seaborn.plt.show()

In [None]:
%%time

seaborn.countplot(x='HEIGHT', data=sexoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

seaborn.plt.show()


In [None]:
%%time

seaborn.countplot(x='VICTIM MINOR', data=sexoff, palette="dark")
seaborn.plt.show()


In [None]:
%%time

seaborn.countplot(x='GENDER', hue='VICTIM MINOR', data=sexoff, palette="Paired")
seaborn.plt.show()


In [None]:
%%time

gunoff = pandas.read_csv("./datasets/Gun_Offenders.csv")


In [None]:
%time gunoff.head(10)


In [None]:
%time gunoff.isnull().any()


In [None]:
%time gunoff.dtypes


In [None]:
%%time

seaborn.countplot(x='race', data=gunoff, palette="dark")
seaborn.plt.show()


In [None]:
%%time

seaborn.countplot(x='sex', data=gunoff, palette="dark")
seaborn.plt.show()


In [None]:
%%time

seaborn.countplot(x='state', data=gunoff, palette="dark")

locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

seaborn.plt.show()
