## Load 311 Data Locally

In [67]:
import pyspark as ps
from pyspark.sql.types import *
from pyspark.sql import functions as SparkFunc
from pyspark.mllib.tree import RandomForest, RandomForestModel ,GradientBoostedTrees, GradientBoostedTreesModel
from pyspark.mllib.util import MLUtils
from pyspark.sql.functions import unix_timestamp

In [68]:
spark = ps.sql.SparkSession.builder \
        .appName("df_311") \
        .getOrCreate()


In [69]:
sc = spark.sparkContext

In [70]:
df = spark.read.format("csv").option("header", "true").load("../data/311_Service_Requests_from_2010_to_Present.csv")

In [71]:
df.printSchema()

root
 |-- Unique Key: string (nullable = true)
 |-- Created Date: string (nullable = true)
 |-- Closed Date: string (nullable = true)
 |-- Agency: string (nullable = true)
 |-- Agency Name: string (nullable = true)
 |-- Complaint Type: string (nullable = true)
 |-- Descriptor: string (nullable = true)
 |-- Location Type: string (nullable = true)
 |-- Incident Zip: string (nullable = true)
 |-- Incident Address: string (nullable = true)
 |-- Street Name: string (nullable = true)
 |-- Cross Street 1: string (nullable = true)
 |-- Cross Street 2: string (nullable = true)
 |-- Intersection Street 1: string (nullable = true)
 |-- Intersection Street 2: string (nullable = true)
 |-- Address Type: string (nullable = true)
 |-- City: string (nullable = true)
 |-- Landmark: string (nullable = true)
 |-- Facility Type: string (nullable = true)
 |-- Status: string (nullable = true)
 |-- Due Date: string (nullable = true)
 |-- Resolution Description: string (nullable = true)
 |-- Resolution Action

In [72]:
#Select only needed cols as found in Pandas EDA
df_rdd = df.select('Created Date','Agency','Closed Date','Complaint Type',\
                   'Descriptor','Borough','Community Board','Open Data Channel Type','Status','Latitude','Longitude')


In [73]:
df_rdd.printSchema()

root
 |-- Created Date: string (nullable = true)
 |-- Agency: string (nullable = true)
 |-- Closed Date: string (nullable = true)
 |-- Complaint Type: string (nullable = true)
 |-- Descriptor: string (nullable = true)
 |-- Borough: string (nullable = true)
 |-- Community Board: string (nullable = true)
 |-- Open Data Channel Type: string (nullable = true)
 |-- Status: string (nullable = true)
 |-- Latitude: string (nullable = true)
 |-- Longitude: string (nullable = true)



In [74]:
# Function takes col name as string, recasts it to a temporary col as the specified data type string, then drops origional col and renames the 
# tempoary one
def convert_rdd_type(orig_rdd,column_name_str,data_type_str):
    return (orig_rdd.withColumn("{}_temp".format(column_name_str), 
                                df_rdd[column_name_str].cast(data_type_str)).drop(
                                column_name_str)).withColumnRenamed("{}_temp".format(column_name_str),
                                 column_name_str)

In [52]:
# #Convert to proper data types
# # df_rdd= df_rdd.withColumn("Latitude_temp", df_rdd.Latitude.cast('Float')).drop("Latitude").withColumnRenamed("Latitude_temp", "Latitude")
# df_rdd = df_rdd.withColumn("Longitude_temp", df_rdd['Longitude'].cast('Float'))

In [75]:
df_rdd = convert_rdd_type(df_rdd,'Longitude','Float')
df_rdd = convert_rdd_type(df_rdd,'Latitude','Float')
# df_rdd = convert_rdd_type(df_rdd,'Created Date','Date')

In [110]:
#convert to unix timestamp for Created & Closed
timeFmt = "MM-dd-yyyy'T'HH:mm:ss.SSS"
time_test = SparkFunc.unix_timestamp(df_rdd['Closed Date'], format=timeFmt)
df_rdd = df_rdd.withColumn('test',time_test)
# timeDiff = (F.unix_timestamp('EndDateTime', format=timeFmt)
#             - F.unix_timestamp('StartDateTime', format=timeFmt))
# df = df.withColumn("Duration", timeDiff)


In [76]:
df_rdd.printSchema()

root
 |-- Created Date: string (nullable = true)
 |-- Agency: string (nullable = true)
 |-- Closed Date: string (nullable = true)
 |-- Complaint Type: string (nullable = true)
 |-- Descriptor: string (nullable = true)
 |-- Borough: string (nullable = true)
 |-- Community Board: string (nullable = true)
 |-- Open Data Channel Type: string (nullable = true)
 |-- Status: string (nullable = true)
 |-- Longitude: float (nullable = true)
 |-- Latitude: float (nullable = true)



In [87]:
from pyspark.sql.functions import col
from pyspark.sql.functions import to_timestamp, date_format
spark.conf.set('spark.sql.session.timeZone', 'UTC')

In [104]:
df_rdd.select(
        to_timestamp(df_rdd['Created Date'], "MM/dd/yyyy HH:mm:ss"), #SUPER SUPER SPECIFIC FORMATING
    ).alias('timestamp_value').show()

+---------------------------------------------------+
|to_timestamp(`Created Date`, 'MM/dd/yyyy HH:mm:ss')|
+---------------------------------------------------+
|                                2015-08-10 09:18:31|
|                                2015-08-10 12:20:43|
|                                2015-08-10 06:16:29|
|                                2015-08-10 10:50:00|
|                                2015-08-10 10:36:00|
|                                2015-08-10 10:12:00|
|                                2015-08-10 01:17:30|
|                                2015-08-10 06:41:33|
|                                2015-08-10 08:10:06|
|                                2015-08-10 01:17:46|
|                                2015-08-10 10:55:00|
|                                2015-08-10 04:12:00|
|                                2015-08-10 12:37:03|
|                                2015-08-10 06:51:18|
|                                2015-08-10 02:40:53|
|                           

In [84]:
df_rdd.take(1)

[Row(Created Date='08/10/2015 09:18:31 AM', Agency='HPD', Closed Date='08/13/2015 12:46:56 PM', Complaint Type='PLUMBING', Descriptor='STEAM PIPE/RISER', Borough='BRONX', Community Board='04 BRONX', Open Data Channel Type='PHONE', Status='Closed', Longitude=-73.91136169433594, Latitude=40.84086227416992)]

## Dask

In [1]:
import pandas as pd
import dask.dataframe as dd
from multiprocessing.pool import ThreadPool
import os
from sodapy import Socrata
import json

In [2]:
sodapy_token = 'tvPeTjPatFwjuelfpMNb0G8WH'
sodapy_domain = 'data.cityofnewyork.us'
database_311 = "fhrw-4uyv"
query = "created_date > '2018-01-01T00:00:00.000'"

In [3]:
select_sql = "agency,borough,closed_date,community_board,complaint_type,created_date,descriptor,open_data_channel_type,status,longitude,latitude" 

In [4]:
client = Socrata(sodapy_domain, sodapy_token)

In [5]:
results = client.get(database_311, select=select_sql, where=query, limit=500000)

In [31]:
df = pd.DataFrame.from_records(results)

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500000 entries, 0 to 499999
Data columns (total 11 columns):
agency                    500000 non-null object
borough                   500000 non-null object
closed_date               489429 non-null object
community_board           500000 non-null object
complaint_type            500000 non-null object
created_date              500000 non-null object
descriptor                498605 non-null object
latitude                  499878 non-null object
longitude                 499878 non-null object
open_data_channel_type    500000 non-null object
status                    500000 non-null object
dtypes: object(11)
memory usage: 42.0+ MB


In [33]:
#drop nans:
df.dropna(subset=['closed_date'],inplace=True)

In [34]:
df.dropna(subset=['descriptor'],inplace=True)

In [35]:
df.dropna(subset=['latitude','longitude'],inplace=True)

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 488018 entries, 0 to 499999
Data columns (total 11 columns):
agency                    488018 non-null object
borough                   488018 non-null object
closed_date               488018 non-null object
community_board           488018 non-null object
complaint_type            488018 non-null object
created_date              488018 non-null object
descriptor                488018 non-null object
latitude                  488018 non-null object
longitude                 488018 non-null object
open_data_channel_type    488018 non-null object
status                    488018 non-null object
dtypes: object(11)
memory usage: 44.7+ MB


In [37]:
community_board_list = set(['06 BRONX', '01 BRONX', '14 QUEENS', '13 QUEENS',
        '13 BROOKLYN', '09 BROOKLYN', '10 QUEENS',
       '08 BRONX', '10 BRONX', '01 QUEENS', '11 QUEENS', '01 BROOKLYN',
       '12 BRONX', '14 BROOKLYN', '07 MANHATTAN', '04 MANHATTAN',
       '05 MANHATTAN', '07 BRONX', '06 QUEENS', '18 BROOKLYN',
       '02 STATEN ISLAND', '15 BROOKLYN', '07 QUEENS', '03 STATEN ISLAND',
       '03 QUEENS', '08 QUEENS', '01 STATEN ISLAND', '07 BROOKLYN',
       '04 BRONX', '12 BROOKLYN', '12 QUEENS',
       '17 BROOKLYN', '04 QUEENS', '03 BROOKLYN', '05 QUEENS',
       '02 QUEENS', '03 MANHATTAN', '11 BRONX', '10 BROOKLYN',
       '06 BROOKLYN', '11 BROOKLYN', '05 BROOKLYN', '04 BROOKLYN',
       '12 MANHATTAN', '09 QUEENS', '09 MANHATTAN', '10 MANHATTAN',
       '16 BROOKLYN', '09 BRONX', '05 BRONX', '02 BROOKLYN',
       '06 MANHATTAN', '02 BRONX', '08 BROOKLYN', '11 MANHATTAN',
       '08 MANHATTAN', '02 MANHATTAN',  '03 BRONX',
       '01 MANHATTAN'])

In [38]:
df = df[df['community_board'].isin(community_board_list)]

In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 483694 entries, 0 to 499999
Data columns (total 11 columns):
agency                    483694 non-null object
borough                   483694 non-null object
closed_date               483694 non-null object
community_board           483694 non-null object
complaint_type            483694 non-null object
created_date              483694 non-null object
descriptor                483694 non-null object
latitude                  483694 non-null object
longitude                 483694 non-null object
open_data_channel_type    483694 non-null object
status                    483694 non-null object
dtypes: object(11)
memory usage: 44.3+ MB


In [40]:
df = df[df['status'] == 'Closed']

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 474537 entries, 0 to 499999
Data columns (total 11 columns):
agency                    474537 non-null object
borough                   474537 non-null object
closed_date               474537 non-null object
community_board           474537 non-null object
complaint_type            474537 non-null object
created_date              474537 non-null object
descriptor                474537 non-null object
latitude                  474537 non-null object
longitude                 474537 non-null object
open_data_channel_type    474537 non-null object
status                    474537 non-null object
dtypes: object(11)
memory usage: 43.4+ MB


In [42]:
created = pd.to_datetime(df['created_date'],utc=True)
closed = pd.to_datetime(df['closed_date'],utc=True)
df['time_till_resolution'] =created - closed
df['time_till_resolution'] =(-round(
    df['time_till_resolution'].astype('timedelta64[s]')/3600,
        2))

In [43]:
df.drop('closed_date', axis=1 , inplace=True)

In [44]:
df['created_date']= pd.to_datetime(df['created_date'])

In [45]:
df['created_date_year'] = df['created_date'].dt.year
df['created_date_month'] = df['created_date'].dt.month
df['created_date_day'] = df['created_date'].dt.day
df['created_date_hour'] = df['created_date'].dt.hour
df['created_date_day_of_week'] = df['created_date'].dt.dayofweek

In [95]:
df.head(10)

Unnamed: 0,agency,borough,community_board,complaint_type,descriptor,open_data_channel_type,time_till_resolution,created_date_year,created_date_month,created_date_day,created_date_hour,created_date_day_of_week
0,DEP,BRONX,03 BRONX,Water System,Hydrant Leaking (WC1),PHONE,2574.9,2018,1,7,17,6
1,DPR,QUEENS,05 QUEENS,Dead/Dying Tree,Planted More Than 2 Years Ago,PHONE,15.42,2018,1,8,15,0
2,HPD,QUEENS,05 QUEENS,UNSANITARY CONDITION,PESTS,PHONE,2729.6,2018,1,9,16,1
3,HPD,QUEENS,05 QUEENS,ELECTRIC,NO LIGHTING,PHONE,2729.6,2018,1,9,16,1
4,DOB,BROOKLYN,11 BROOKLYN,General Construction/Plumbing,Cons - Contrary/Beyond Approved Plans/Permits,UNKNOWN,2221.64,2018,1,30,10,1
5,HPD,QUEENS,05 QUEENS,UNSANITARY CONDITION,PESTS,PHONE,2729.6,2018,1,9,16,1
6,DOB,BRONX,08 BRONX,Building/Use,Illegal. Commercial Use In Resident Zone,UNKNOWN,2574.82,2018,1,15,17,0
7,DPR,QUEENS,05 QUEENS,Overgrown Tree/Branches,Dead Branches in Tree,PHONE,2530.14,2018,1,18,9,3
8,DOB,QUEENS,10 QUEENS,Building/Use,Illegal Conversion Of Residential Building/Space,UNKNOWN,2362.18,2018,1,24,13,2
9,HPD,BRONX,04 BRONX,UNSANITARY CONDITION,MOLD,PHONE,2209.58,2018,1,31,13,2


In [104]:
df.open_data_channel_type.value_counts()

PHONE      272883
ONLINE     109299
MOBILE      52341
UNKNOWN     36152
OTHER        3862
Name: open_data_channel_type, dtype: int64

In [102]:
df.agency.value_counts()

HPD      167672
NYPD     130942
DSNY      57350
DEP       40391
DOT       40213
DOB       12311
DPR        8901
DOHMH      6589
TLC        3871
DCA        3052
DHS        1360
DOF        1177
DOE         454
EDC         135
DOITT        71
DFTA         48
Name: agency, dtype: int64

In [103]:
df[df.agency=='DCA'].time_till_resolution.mean()

215.83469855832192

In [115]:
df.groupby('community_board')

Unnamed: 0,agency,borough,community_board,complaint_type,descriptor,open_data_channel_type,time_till_resolution,created_date_year,created_date_month,created_date_day,created_date_hour,created_date_day_of_week
0,DEP,BRONX,03 BRONX,Water System,Hydrant Leaking (WC1),PHONE,2574.90,2018,1,7,17,6
1,DPR,QUEENS,05 QUEENS,Dead/Dying Tree,Planted More Than 2 Years Ago,PHONE,15.42,2018,1,8,15,0
2,HPD,QUEENS,05 QUEENS,UNSANITARY CONDITION,PESTS,PHONE,2729.60,2018,1,9,16,1
3,HPD,QUEENS,05 QUEENS,ELECTRIC,NO LIGHTING,PHONE,2729.60,2018,1,9,16,1
4,DOB,BROOKLYN,11 BROOKLYN,General Construction/Plumbing,Cons - Contrary/Beyond Approved Plans/Permits,UNKNOWN,2221.64,2018,1,30,10,1
5,HPD,QUEENS,05 QUEENS,UNSANITARY CONDITION,PESTS,PHONE,2729.60,2018,1,9,16,1
6,DOB,BRONX,08 BRONX,Building/Use,Illegal. Commercial Use In Resident Zone,UNKNOWN,2574.82,2018,1,15,17,0
7,DPR,QUEENS,05 QUEENS,Overgrown Tree/Branches,Dead Branches in Tree,PHONE,2530.14,2018,1,18,9,3
8,DOB,QUEENS,10 QUEENS,Building/Use,Illegal Conversion Of Residential Building/Space,UNKNOWN,2362.18,2018,1,24,13,2
9,HPD,BRONX,04 BRONX,UNSANITARY CONDITION,MOLD,PHONE,2209.58,2018,1,31,13,2


In [99]:
HPD = 234/24

In [66]:
#Make copy
df_test = df

In [67]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 474537 entries, 0 to 499999
Data columns (total 13 columns):
agency                      474537 non-null object
borough                     474537 non-null object
community_board             474537 non-null object
complaint_type              474537 non-null object
created_date                474537 non-null datetime64[ns]
descriptor                  474537 non-null object
open_data_channel_type      474537 non-null object
time_till_resolution        474537 non-null float64
created_date_year           474537 non-null int64
created_date_month          474537 non-null int64
created_date_day            474537 non-null int64
created_date_hour           474537 non-null int64
created_date_day_of_week    474537 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(5), object(6)
memory usage: 50.7+ MB


In [68]:
#Drop status & lat/long
# df_test.drop(['status','latitude','longitude','created_date'], axis=1 , inplace=True)
df_test.drop('created_date', axis=1 , inplace=True)

In [93]:
df_test.head()

Unnamed: 0,time_till_resolution,created_date_year,created_date_month,created_date_day,created_date_hour,created_date_day_of_week,agency_DCA,agency_DEP,agency_DFTA,agency_DHS,...,descriptor_Wrong Amount Paid or Withdrawn,descriptor_Yield,descriptor_Zoning - Non-Conforming/Illegal Vehicle Storage,descriptor_installation of hydrant side post (WHFP),descriptor_unknown odor/taste in drinking water (QA6),open_data_channel_type_MOBILE,open_data_channel_type_ONLINE,open_data_channel_type_OTHER,open_data_channel_type_PHONE,open_data_channel_type_UNKNOWN
0,2574.9,2018,1,7,17,6,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
1,15.42,2018,1,8,15,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,2729.6,2018,1,9,16,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2729.6,2018,1,9,16,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,2221.64,2018,1,30,10,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [70]:
df_test= pd.get_dummies(df_test, columns=['agency','borough','community_board',
                                      'complaint_type','descriptor','open_data_channel_type'])

In [71]:
df_test.shape

(474537, 935)

In [72]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 474537 entries, 0 to 499999
Columns: 935 entries, time_till_resolution to open_data_channel_type_UNKNOWN
dtypes: float64(1), int64(5), uint8(929)
memory usage: 445.8 MB


In [135]:
df.to_csv("../data/cleaned_500k")

In [23]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor , GradientBoostingRegressor
from sklearn.model_selection import train_test_split

In [73]:
# y = df_test.pop('time_till_resolution')
X = df_test

In [74]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y)

In [75]:
random_forest = RandomForestRegressor(n_jobs = -1)
gradient_boost = GradientBoostingRegressor()

In [76]:
random_forest.fit(Xtrain,ytrain)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [77]:
random_forest.score(Xtest,ytest)

0.9999989999358163

In [78]:
from sklearn.externals import joblib

In [79]:
joblib.dump(random_forest, 'random_forest_model.pkl')

['random_forest_model.pkl']

In [80]:
gradient_boost = GradientBoostingRegressor()

In [82]:
gradient_boost.fit(Xtrain, ytrain)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False)

In [87]:
predictions = gradient_boost.predict(Xtest)

In [88]:
predictions[:10]

array([9.97578163e-01, 5.80281648e+00, 7.54037873e+00, 9.97578163e-01,
       1.12450649e+03, 3.21011996e-01, 1.86189449e+02, 6.04520044e+01,
       8.08189636e-01, 6.98806956e+01])

In [86]:
ytest

367230       0.87
122449       6.02
382313       7.63
30525        0.87
208742    1145.95
84224       -0.00
66651      182.31
144042      63.69
27797        0.69
117725      69.61
15829      121.10
253025      39.85
16226        6.98
278716       0.94
459318      14.18
310701      -0.00
202947       1.23
149777     549.89
94559        2.64
83398       34.26
457807     241.66
435443     109.53
374444      -0.00
165923      57.15
121968      23.05
334404       1.02
149245       1.59
81386       20.63
372826     130.61
59986      628.20
           ...   
272039      -0.00
100322       2.74
402584      96.45
43772      179.22
422881      -0.00
212083       4.04
47823       57.16
158646     246.71
281796      89.47
175729      39.77
244309      48.63
108285      32.36
465723     157.91
261598       0.74
193943       3.09
166965      51.08
202164       1.96
466026     885.81
158558      26.93
40483        1.88
130957      75.65
407467       0.84
297894      43.32
227582     111.08
358256    

In [83]:
gradient_boost.score(Xtest,ytest)

0.999917658524057

In [84]:
joblib.dump(gradient_boost, 'gradient_boost_model.pkl')

['gradient_boost_model.pkl']

### Modeling:

#### Dask extension

In [136]:
keep_cols = ['Agency','Borough','Closed Date','Community Board','Complaint Type','Created Date','Descriptor',
             'Open Data Channel Type','Status','Longitude','Latitude']                    
# # Error rasied with type being verified on these specific col, no longer needed with selecti, fixed with setting dtypes
# types={'Incident Zip': 'object',
#        'Landmark': 'object',
#        'Vehicle Type': 'object'}
%time df = dd.read_csv("../data/311_Service_Requests_from_2010_to_Present.csv",  usecols=keep_cols)

CPU times: user 127 ms, sys: 0 ns, total: 127 ms
Wall time: 126 ms


In [None]:
df['Created Date'] = pd.to_datetime(df['Created Date'].compute(),utc=True)


In [152]:
df.head()

Unnamed: 0,Created Date,Closed Date,Agency,Complaint Type,Descriptor,Status,Community Board,Borough,Open Data Channel Type,Latitude,Longitude
0,08/10/2015 09:18:31 AM,08/13/2015 12:46:56 PM,HPD,PLUMBING,STEAM PIPE/RISER,Closed,04 BRONX,BRONX,PHONE,40.840863,-73.911364
1,08/10/2015 12:20:43 PM,08/20/2015 12:40:41 PM,HPD,PLUMBING,STEAM PIPE/RISER,Closed,14 BROOKLYN,BROOKLYN,PHONE,40.63667,-73.952617
2,08/10/2015 06:16:29 PM,08/11/2015 09:15:29 AM,NYPD,Illegal Parking,Blocked Hydrant,Closed,05 QUEENS,QUEENS,ONLINE,40.732584,-73.89223
3,08/10/2015 10:50:00 PM,08/19/2015 12:45:00 AM,DEP,Noise,Noise: Construction Before/After Hours (NM1),Closed,08 BRONX,BRONX,ONLINE,40.8751,-73.910534
4,08/10/2015 10:36:00 PM,08/16/2015 12:15:00 AM,DEP,Noise,Noise: Construction Before/After Hours (NM1),Closed,02 BROOKLYN,BROOKLYN,PHONE,40.697412,-73.968683


In [132]:
df['new_index'] = df['Created Date']

In [135]:
# #Following Dask suggestiong to set dat to index, but keeping date col intact, however do once and make copy to persist
# df.set_index('new_idex')

Unnamed: 0_level_0,Created Date,Closed Date,Agency,Complaint Type,Descriptor,Status,Community Board,Borough,Open Data Channel Type,Latitude,Longitude
npartitions=165,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
01/01/2010 01:00:00 AM,object,object,object,object,object,object,object,object,object,float64,float64
01/02/2013 11:27:00 AM,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...
12/30/2016 03:20:31 PM,...,...,...,...,...,...,...,...,...,...,...
12/31/2017 12:59:35 PM,...,...,...,...,...,...,...,...,...,...,...
