# Part 1 Crime Exploration

> This notebook uses Baltimore Cities Part1 Crime Data. 

## Major Findings

1. Column names are different (desc_ vs descriptio)
2. Descriptions columns values share the same unique values. 2019 has less cols though.
 - no arson. shooting or common assault. 
3. Points within a CSA returned these results 
> 
> **Description** | 18 Values | **19 Values** |
> 
> **Total Points** | 48143.0 | **37166** |
> 
> **Total Points in Polygons** | 47748 | **37018** |
> 
> **Prcnt Points in Polygons** | 0.9917952765718796 | **0.9960178657913147** |

4. No firearm description but we do have a shooting column in the 19 dataset. which means that gunhom can't be calculated.

## Whats Inside?: 

1. Retrieve everything done in the previous notebook.  
9. Compare aggregates and idicators.

## SETUP Enviornment:

### Import Modules

In [None]:
%%capture
! pip install -U -q PyDrive
! pip install geopy
! pip install geopandas
! pip install geoplot
! pip install dataplay
! pip install matplotlib
! pip install psycopg2-binary

In [None]:
%%capture
! apt-get install build-dep python-psycopg2
! apt-get install libpq-dev
! apt-get install libspatialindex-dev

In [None]:
%%capture
!pip install rtree
!pip install dexplot

In [None]:
%%capture 
# These imports will handle everything
import os
import sys
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim

# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'

In [None]:
from IPython.display import clear_output
clear_output(wait=True)

In [None]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

### Configure Enviornment

In [None]:
# This will just beautify the output

pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# pd.set_option('display.expand_frame_repr', False)
# pd.set_option('display.precision', 2)
# pd.reset_option('max_colwidth')
pd.set_option('max_colwidth', 20)
# pd.reset_option('max_colwidth')

### (Optional) GoogleDrive Access

In [None]:
# (Optional) Run this cell to gain access to Google Drive (Colabs only) 
from google.colab import drive

# Colabs operates in a virtualized enviornment
# Colabs default directory is at ~/content.
# We mount Drive into a temporary folder at '~/content/drive' 

drive.mount('/content/drive')

In [None]:
cd drive/'MyDrive'/vitalSigns

In [None]:
ls

In [None]:
cd p1crime

In [None]:
ls

### Razzle Dazzle

In [None]:
df2 = pd.read_csv('p1crime_18_19_indicators_and_desc_aggregates.csv')

In [None]:
df2[['19_CSA2010', '18_Desc__SHOOTING', '18_crime18', '18_viol18', '18_prop18',
       '18_gunhom18', '19_Desc__AGG. ASSAULT',
       '19_Desc__AUTO THEFT', '19_Desc__BURGLARY', '19_Desc__HOMICIDE',
       '19_Desc__LARCENY', '19_Desc__LARCENY FROM AUTO', '19_Desc__RAPE',
       '19_Desc__ROBBERY - CARJACKING', '19_Desc__ROBBERY - COMMERCIAL',
       '19_Desc__ROBBERY - RESIDENCE', '19_Desc__ROBBERY - STREET',
       '19_crime19', '19_viol19', '19_prop19', '18_Unnamed: 0',
       '18_Desc__AGG. ASSAULT', '18_Desc__ARSON', '18_Desc__AUTO THEFT',
       '18_Desc__BURGLARY', '18_Desc__COMMON ASSAULT', '18_Desc__HOMICIDE',
       '18_Desc__LARCENY', '18_Desc__LARCENY FROM AUTO', '18_Desc__RAPE',
       '18_Desc__ROBBERY - CARJACKING', '18_Desc__ROBBERY - COMMERCIAL',
       '18_Desc__ROBBERY - RESIDENCE', '18_Desc__ROBBERY - STREET',
       ]].to_csv('vizthis.csv')

In [None]:
df = pd.read_csv('vizthis.csv')

In [None]:
!pip install sweetviz
!pip install autoviz

In [None]:
# https://medium.com/python-in-plain-english/how-to-use-pandas-profiling-on-google-colab-e34f34ff1c9f
! pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip

In [None]:
#hide_input 
df.head()

In [None]:
import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport

In [None]:
#hide 
# df = df.drop('geometry',axis=1) 

In [None]:
design_report = ProfileReport(
    df, 
    title="Pandas Profiling Report One", 
    html= {'style': { 'full_width': True } } 
)
design_report.to_file(output_file='pandas-profiling.html')

#from IPython.display import display,HTML,IFrame
#display(HTML(open('pandas-profiling.html').read()))

design_report.to_notebook_iframe()

In [None]:
display(HTML(open('pandas-profiling.html').read()))

In [None]:
# https://pypi.org/project/sweetviz/
import sweetviz as sv
sweet_report = sv.analyze(df)
sweet_report.show_html('sweet_report.html')

from IPython.display import display,HTML,IFrame
display(HTML(open('sweet_report.html').read()))

In [None]:
ls

In [None]:
!pip install pivottablejs

In [None]:
#hide_input
import dexplot as dxp
import pandas as pd 

# https://github.com/JasonKessler/scattertext
# https://colab.research.google.com/drive/1VIMd9jhpNgB9siYlTSFqjV62mMQQFx4K

from pivottablejs import pivot_ui
from IPython.display import HTML
#if using locally you can just use the following to display the output: pivot_ui(df)
# As we are using colab, we will just download the output - this can then be opened in a new tab in the browser


x = pivot_ui(df ,outfile_path='pivottablejs.html')

# pivot_ui( dxp.load_dataset('airbnb') ,outfile_path='pivottablejs.html')
HTML('pivottablejs.html')

# if you want to download to open in a new tab in the browser - use the below:
# from google.colab import files
# files.download('pivottablejs.html') 

In [None]:
df.columns

In [None]:
# https://github.com/AutoViML/AutoViz
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()
df = AV.AutoViz('vizthis.csv') # , depVar='CSA2010'

https://towardsdatascience.com/exploratory-data-analysis-dataprep-eda-vs-pandas-profiling-7137683fe47f

In [None]:
!pip install dataprep

In [None]:
from dataprep.eda import plot, plot_correlation, plot_missing, create_report
plot(df)

In [None]:
plot(df, "18_Desc__ARSON") #, bins=2)

In [None]:
plot(df, "18_crime18", "19_crime19") #, bins=2)

In [None]:
plot(df, "18_viol18", "19_viol19") #, bins=2)

In [None]:
plot(df, "18_prop18", "19_prop19") #, bins=2)

In [None]:
plot(df, "18_gunhom18", "19_gunhom19") #, bins=2)

In [None]:
plot_correlation(df, "18_prop18", value_range=[-1, 1])

In [None]:
plot_correlation(df, x="Desc__ARSON", y="Desc__AUTO THEFT", k=5) # top k attributes

In [None]:
missing = plot_missing(df) #, "Desc__ARSON")

In [None]:
missing.save('My missing Report')

In [None]:
missing

In [None]:
report = create_report(df)

In [None]:
report.save('My Fantastic Report')

In [None]:
report

In [None]:
ls

#### uhhhm

In [None]:
dft18 = dft18.merge( csa[ ['CSA2010', 'geometry'] ], left_on='CSA2010', right_on='CSA2010' ) 

In [None]:
dft18 = gpd.GeoDataFrame( dft18, geometry='geometry')

In [None]:
dft18.head(1)

In [None]:
dft18.plot(column='Desc__AGG. ASSAULT')

In [None]:
dft18.head(1)

In [None]:
!pip install mpld3

In [None]:
#hide 
! pip install dcpy

In [None]:
#hide 
from dcpy import widget
import pandas as pd

In [None]:
# CrimeCode	Location	Desc_	InOut	Weapon
#hide 
df = pd.DataFrame({"CSA2010":["A","B","C","D","E","F","G","H","I","J","K"],"Desc__BURGLARY":[45,34,54,27,70,25,92,22,40,10,40]})
data = df.to_json(orient="records")

In [None]:
df.head()

In [None]:
#hide 
import json
df2 = df18.groupby('CSA2010').sum(numeric_only=True)
df2 = df2.reset_index()
data = df2.to_json(orient="records")

In [None]:
df2.head()

In [None]:
data

In [None]:
#hide_input
# https://github.com/washim/dcpy/blob/master/dcpy/widget.py
from IPython.display import display, HTML

# Source
chart = widget.initiate(data)

# identifiers, crossfilter 

# var ndx=crossfilter(data);

chart.prepare(["pie","bar"], [
  "var xDimension=ndx.dimension(function(d){return d.CSA2010;});",
  "var yValues=xDimension.group().reduceSum(function(d){return d.Desc__BURGLARY;});",
  "var coreCount=ndx.dimension(function(d){ if (d.Desc__BURGLARY  > 100) { return 'High'; } else{ return 'Low'; } });",
  "var coreCountGroup=coreCount.group();",
  "var all = ndx.groupAll();"
])

# SET coreCount coreCountGroup
# name,identifier,options 
chart.chart("pieChart","pie", [
  "width(600)", "height(600)",
  "radius(300)", "innerRadius(100)",
  "dimension(coreCount)", "group(coreCountGroup)",
  "label(function(d){ var label = d.key;\
      if(all.value()){ label += '(' + Math.floor(d.value/all.value()*100)+'%)'; }\
      return label; })"
] )

# SET Labels
chart.chart("barChart","bar", [
  "width(600)", "height(600)",
  "xAxisLabel('community')", "x(d3.scale.ordinal())", "xUnits(dc.units.ordinal)", "dimension(xDimension)",
  "yAxisLabel('count')", "group(yValues)",
  "colors(['orange'])"
] )

# filename
chart.save('test.html')
f = open("test.html", "r")
display(HTML(f.read()))

In [None]:
!pip install mpld3

In [None]:
#hide 
import mpld3 
mpld3.enable_notebook()

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from mpld3 import plugins

fig, ax = plt.subplots(3, 3, figsize=(6, 6))
fig.subplots_adjust(hspace=0.1, wspace=0.1)
ax = ax[::-1]

X = np.random.normal(size=(3, 100))
for i in range(3):
    for j in range(3):
        ax[i, j].xaxis.set_major_formatter(plt.NullFormatter())
        ax[i, j].yaxis.set_major_formatter(plt.NullFormatter())
        points = ax[i, j].scatter(X[j], X[i])
        
plugins.connect(fig, plugins.LinkedBrush(points))

mpld3.display(fig)