## Explore Dataset

##### Importing libraries

In [2]:
import numpy as np
import pandas as pd
from IPython.core.display import display, HTML
import pandas_profiling
import seaborn as sns 
from matplotlib import pyplot 
import seaborn as sns
import base64
from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

#Facets functions
def dive(df):
    jsonstr = df.to_json(orient='records')
    HTML_TEMPLATE = """
        <script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
        <link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html">
        <facets-dive id="elem" height="600"></facets-dive>
        <script>
          var data = {jsonstr};
          document.querySelector("#elem").data = data;
        </script>"""
    html = HTML_TEMPLATE.format(jsonstr=jsonstr)
    display(HTML(html))

def overview(train_data,test_data):
    gfsg = GenericFeatureStatisticsGenerator()
    proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},
                                  {'name': 'test', 'table': test_data}])
    protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
    HTML_TEMPLATE = """
        <script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
        <link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html" >
        <facets-overview id="elem"></facets-overview>
        <script>
          document.querySelector("#elem").protoInput = "{protostr}";
        </script>"""
    html = HTML_TEMPLATE.format(protostr=protostr)
    display(HTML(html))


##### Import Data

In [3]:
queried = '/Users/italosayan/Code/super-dan/super_dan_app/dataset/queried_data/2019-11-19|09:59:41.089914_crimes.csv'

queried = pd.read_csv(queried)


queried['OccurredFrom_Timestamp'] = pd.to_datetime(queried.OccurredFrom_Timestamp,unit="ms")
queried['OccurredThrough_Timestamp'] = pd.to_datetime(queried.OccurredThrough_Timestamp,unit="ms")
queried['Reported_Timestamp'] = pd.to_datetime(queried.Reported_Timestamp,unit="ms")

pandas_profiling.ProfileReport(queried)



##### Find something to model quick

In [4]:
did_they_enter = queried[queried['Statute_Text'].isin(["Larceny","Burglary"])]
did_they_enter = did_they_enter[['OccurredFrom_Timestamp','OccurredThrough_Timestamp','Reported_Timestamp','Location_Type','x','y','Statute_Text']

Unnamed: 0,Unnamed_0,OBJECTID,OccurredFrom_Timestamp,OccurredThrough_Timestamp,Reported_Timestamp,Statute_Text,Statute_Description,Weapon_Description,Larceny_Type,Location_Type,x,y
0,0,1,2019-11-14 21:00:00,2019-11-15 16:00:00,2019-11-15 21:10:00,Larceny,Petit Larceny ...,* No Weapon Specified *,Theft from Motor Vehicle,Street,-77.576100,43.160845
1,1,2,2019-11-14 13:10:00,2019-11-14 14:32:00,2019-11-14 18:10:00,Larceny,Petit Larceny ...,* No Weapon Specified *,Theft from Building,College,-77.632693,43.128811
2,2,3,2019-11-14 13:10:00,2019-11-14 14:32:00,2019-11-14 18:10:00,Larceny,Petit Larceny ...,* No Weapon Specified *,Theft from Building,College,-77.632693,43.128811
3,3,4,2019-09-26 02:00:00,2019-09-26 09:15:00,2019-09-26 17:03:00,Larceny,Petit Larceny ...,* No Weapon Specified *,All Other Larcenies,College,-77.626701,43.129819
4,4,5,2019-09-25 06:00:00,2019-09-25 16:00:00,2019-09-25 21:40:00,Larceny,Petit Larceny ...,* No Weapon Specified *,All Other Larcenies,Multiple Dwelling,-77.617892,43.147925
...,...,...,...,...,...,...,...,...,...,...,...,...
1217,1217,90527,2019-10-13 20:00:00,2019-10-14 09:00:00,2019-10-14 13:00:00,Larceny,Petit Larceny ...,* No Weapon Specified *,Theft from Motor Vehicle,Parking Lot,-77.639707,43.178221
1218,1218,90559,2019-10-22 18:25:00,2019-10-22 18:25:00,2019-10-22 22:25:00,Larceny,Petit Larceny ...,* No Weapon Specified *,All Other Larcenies,Multiple Dwelling,-77.645410,43.149668
1220,1220,90775,2019-10-28 17:44:00,2019-10-28 18:00:00,2019-10-28 21:44:00,Larceny,Petit Larceny ...,* No Weapon Specified *,All Other Larcenies,Multiple Dwelling,-77.610958,43.185883
1221,1221,90962,2019-10-31 00:10:00,2019-10-31 12:00:00,2019-10-31 18:10:00,Larceny,Petit Larceny ...,* No Weapon Specified *,Theft from Motor Vehicle,Street,-77.593784,43.151135


In [11]:
did_they_enter.Statute_Text.nunique(dropna=False)

2