In [1]:
import featuretools as ft
import pandas as pd

es = ft.demo.load_mock_customer(return_entityset=True, random_seed=0)

es['transactions'].df.head()

Unnamed: 0,transaction_id,session_id,transaction_time,amount,product_id
298,298,1,2014-01-01 00:00:00,127.64,5
2,2,1,2014-01-01 00:01:05,109.48,2
308,308,1,2014-01-01 00:02:10,95.06,3
116,116,1,2014-01-01 00:03:15,78.92,4
371,371,1,2014-01-01 00:04:20,31.54,3


In [2]:
es['customers'].df

Unnamed: 0,customer_id,join_date,date_of_birth,zip_code
5,5,2010-07-17 05:27:50,1984-07-28,60091
4,4,2011-04-08 20:08:14,2006-08-15,60091
1,1,2011-04-17 10:48:33,1994-07-18,60091
3,3,2011-08-13 15:42:34,2003-11-21,13244
2,2,2012-04-15 23:31:04,1986-08-18,13244


Here, we have two time columns, join_date and date_of_birth. While either column might be useful for making features, the join_date should be used as the time index because it indicates when that customer first became available in the dataset.

The cutoff_time specifies the last point in time that a row’s data can be used for a feature calculation. Any data after this point in time will be filtered out before calculating features.

In [3]:
fm, features = ft.dfs(entityset=es,
                     target_entity='customers',
                     cutoff_time=pd.Timestamp('2014-1-1 04:00'),
                     instance_ids=[1,2,3], #customer id
                     cutoff_time_in_index=True)
fm

Unnamed: 0_level_0,Unnamed: 1_level_0,zip_code,COUNT(sessions),NUM_UNIQUE(sessions.device),MODE(sessions.device),SUM(transactions.amount),STD(transactions.amount),MAX(transactions.amount),SKEW(transactions.amount),MIN(transactions.amount),MEAN(transactions.amount),...,NUM_UNIQUE(sessions.WEEKDAY(session_start)),MODE(sessions.YEAR(session_start)),MODE(sessions.MODE(transactions.product_id)),MODE(sessions.MONTH(session_start)),MODE(sessions.DAY(session_start)),MODE(sessions.WEEKDAY(session_start)),NUM_UNIQUE(transactions.sessions.customer_id),NUM_UNIQUE(transactions.sessions.device),MODE(transactions.sessions.customer_id),MODE(transactions.sessions.device)
customer_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2014-01-01 04:00:00,60091,4,3,tablet,4958.19,42.309717,139.23,-0.006928,5.81,74.002836,...,1,2014,4,1,1,2,1,3,1,tablet
2,2014-01-01 04:00:00,13244,4,2,desktop,4150.3,39.289512,146.81,-0.134786,12.07,84.7,...,1,2014,2,1,1,2,1,2,2,desktop
3,2014-01-01 04:00:00,13244,1,1,tablet,941.87,47.264797,146.31,0.618455,8.19,62.791333,...,1,2014,1,1,1,2,1,1,3,tablet


In [4]:
cutoff_times = pd.DataFrame()
cutoff_times['customer_id'] = [1,2,3,1]
cutoff_times['time']=pd.to_datetime(['2014-1-1 04:00',
                                    '2014-1-1 05:00',
                                    '2014-1-1 06:00',
                                    '2014-1-1 08:00'])
cutoff_times['label'] = [True, True, False, True]
cutoff_times

Unnamed: 0,customer_id,time,label
0,1,2014-01-01 04:00:00,True
1,2,2014-01-01 05:00:00,True
2,3,2014-01-01 06:00:00,False
3,1,2014-01-01 08:00:00,True


In [5]:
fm, features = ft.dfs(entityset=es,
                     target_entity='customers',
                     cutoff_time=cutoff_times,
                     cutoff_time_in_index=True)
fm

Unnamed: 0_level_0,Unnamed: 1_level_0,zip_code,COUNT(sessions),NUM_UNIQUE(sessions.device),MODE(sessions.device),SUM(transactions.amount),STD(transactions.amount),MAX(transactions.amount),SKEW(transactions.amount),MIN(transactions.amount),MEAN(transactions.amount),...,MODE(sessions.YEAR(session_start)),MODE(sessions.MODE(transactions.product_id)),MODE(sessions.MONTH(session_start)),MODE(sessions.DAY(session_start)),MODE(sessions.WEEKDAY(session_start)),NUM_UNIQUE(transactions.sessions.customer_id),NUM_UNIQUE(transactions.sessions.device),MODE(transactions.sessions.customer_id),MODE(transactions.sessions.device),label
customer_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2014-01-01 04:00:00,60091,4,3,tablet,4958.19,42.309717,139.23,-0.006928,5.81,74.002836,...,2014,4,1,1,2,1,3,1,tablet,True
2,2014-01-01 05:00:00,13244,5,2,desktop,5155.26,38.047944,146.81,-0.121811,12.07,83.149355,...,2014,2,1,1,2,1,2,2,desktop,True
3,2014-01-01 06:00:00,13244,4,2,desktop,2867.69,40.349758,146.31,0.318315,6.65,65.174773,...,2014,1,1,1,2,1,2,3,desktop,False
1,2014-01-01 08:00:00,60091,8,3,mobile,9025.62,40.442059,139.43,0.019698,5.81,71.631905,...,2014,4,1,1,2,1,3,1,mobile,True


By default, all data up to and including the cutoff time is used. We can restrict the amount of historical data that is selected for calculations using a “training window.”

In [6]:
window_fm, window_features = ft.dfs(entityset=es,
                                   target_entity='customers',
                                   cutoff_time=cutoff_times,
                                   cutoff_time_in_index=True,
                                    training_window='2 hour')
window_fm

Unnamed: 0_level_0,Unnamed: 1_level_0,zip_code,COUNT(sessions),NUM_UNIQUE(sessions.device),MODE(sessions.device),SUM(transactions.amount),STD(transactions.amount),MAX(transactions.amount),SKEW(transactions.amount),MIN(transactions.amount),MEAN(transactions.amount),...,MODE(sessions.YEAR(session_start)),MODE(sessions.MODE(transactions.product_id)),MODE(sessions.MONTH(session_start)),MODE(sessions.DAY(session_start)),MODE(sessions.WEEKDAY(session_start)),NUM_UNIQUE(transactions.sessions.customer_id),NUM_UNIQUE(transactions.sessions.device),MODE(transactions.sessions.customer_id),MODE(transactions.sessions.device),label
customer_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2014-01-01 04:00:00,60091,2,2,desktop,2077.66,43.772157,139.09,-0.187686,5.81,76.95037,...,2014,1,1,1,2,1,2,1,desktop,True
2,2014-01-01 05:00:00,13244,3,2,desktop,2605.61,36.077146,146.81,-0.198611,12.07,84.051935,...,2014,1,1,1,2,1,2,2,desktop,True
3,2014-01-01 06:00:00,13244,3,1,desktop,1925.82,37.130891,128.26,0.110145,6.65,66.407586,...,2014,1,1,1,2,1,1,3,desktop,False
1,2014-01-01 08:00:00,60091,3,2,mobile,3124.15,38.952172,139.43,0.04712,5.91,66.471277,...,2014,1,1,1,2,1,2,1,mobile,True


In [7]:
fm[['COUNT(transactions)']]

Unnamed: 0_level_0,Unnamed: 1_level_0,COUNT(transactions)
customer_id,time,Unnamed: 2_level_1
1,2014-01-01 04:00:00,67
2,2014-01-01 05:00:00,62
3,2014-01-01 06:00:00,44
1,2014-01-01 08:00:00,126


In [8]:
window_fm[["COUNT(transactions)"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,COUNT(transactions)
customer_id,time,Unnamed: 2_level_1
1,2014-01-01 04:00:00,27
2,2014-01-01 05:00:00,31
3,2014-01-01 06:00:00,29
1,2014-01-01 08:00:00,47


# Setting a Last Time Index

The training window in Featuretools limits the amount of past data that can be used while calculating a particular feature vector. A row in the entity is filtered out if the value of its time index is either before or after the training window. This works for entities where a row occurs at a single point in time. However, a row can sometimes exist for a duration.

For example, a customer’s session has multiple transactions which can happen at different points in time. If we are trying to count the number of sessions a user has in a given time period, we often want to count all the sessions that had any transaction during the training window. To accomplish this, we need to not only know when a session starts, but also when it ends. The last time that an instance appears in the data is stored as the last_time_index of an Entity. We can compare the time index and the last time index of the sessions entity above:

In [9]:
es['sessions'].df['session_start'].head()

1   2014-01-01 00:00:00
2   2014-01-01 00:17:20
3   2014-01-01 00:28:10
4   2014-01-01 00:44:25
5   2014-01-01 01:11:30
Name: session_start, dtype: datetime64[ns]

In [10]:
es['sessions'].last_time_index.head()

1   2014-01-01 00:16:15
2   2014-01-01 00:27:05
3   2014-01-01 00:43:20
4   2014-01-01 01:10:25
5   2014-01-01 01:22:20
Name: last_time, dtype: datetime64[ns]

Featuretools can automatically add last time indexes to every Entity in an Entityset by running EntitySet.add_last_time_indexes(). If a last_time_index has been set, Featuretools will check to see if the last_time_index is after the start of the training window. That, combined with the cutoff time, allows DFS to discover which data is relevant for a given training window.

# Approximating Features by Rounding Cutoff Times

For each unique cutoff time, Featuretools must perform operations to select the data that’s valid for computations. If there are a large number of unique cutoff times relative to the number of instances for which we are calculating features, the time spent filtering data can add up. By reducing the number of unique cutoff times, we minimize the overhead from searching for and extracting data for feature calculations.

One way to decrease the number of unique cutoff times is to round cutoff times to an earlier point in time. An earlier cutoff time is always valid for predictive modeling — it just means we’re not using some of the data we could potentially use while calculating that feature. So, we gain computational speed by losing a small amount of information.

To understand when an approximation is useful, consider calculating features for a model to predict fraudulent credit card transactions. In this case, an important feature might be, “the average transaction amount for this card in the past”. While this value can change every time there is a new transaction, updating it less frequently might not impact accuracy.

In [11]:
#fm = ft.calculate_feature_matrix(features=features,
#                                entityset=es,
#                                cutoff_time=ct,
#                                approximate='1 day')

# Secondary Time index

It is sometimes the case that information in a dataset is updated or added after a row has been created. This means that certain columns may actually become known after the time index for a row. Rather than drop those columns to avoid leaking information, we can create a secondary time index to indicate when those columns become known.

The Flights entityset is a good example of a dataset where column values in a row become known at different times. Each trip is recorded in the trip_logs entity, and has many times associated with it.

In [12]:
es_flight = ft.demo.load_flight(nrows=100)

Downloading data ...


In [13]:
es_flight

Entityset: Flight Data
  Entities:
    trip_logs [Rows: 100, Columns: 21]
    flights [Rows: 13, Columns: 9]
    airlines [Rows: 1, Columns: 1]
    airports [Rows: 6, Columns: 3]
  Relationships:
    trip_logs.flight_id -> flights.flight_id
    flights.carrier -> airlines.carrier
    flights.dest -> airports.dest

In [14]:
es_flight['trip_logs'].df.head(3)

Unnamed: 0,trip_log_id,flight_id,date_scheduled,scheduled_dep_time,scheduled_arr_time,dep_time,arr_time,dep_delay,taxi_out,taxi_in,arr_delay,scheduled_elapsed_time,air_time,distance,carrier_delay,weather_delay,national_airspace_delay,security_delay,late_aircraft_delay,canceled,diverted
30,30,AA-494:RSW->CLT,2016-09-03,2017-01-01 13:14:00,2017-01-01 15:05:00,2017-01-01 13:03:00,2017-01-01 14:53:00,-11.0,12.0,10.0,-12.0,6660000000000,88.0,600.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
38,38,AA-495:ATL->PHX,2016-09-03,2017-01-01 11:30:00,2017-01-01 15:40:00,2017-01-01 11:24:00,2017-01-01 15:41:00,-6.0,28.0,5.0,1.0,15000000000000,224.0,1587.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
46,46,AA-495:CLT->ATL,2016-09-03,2017-01-01 09:25:00,2017-01-01 10:42:00,2017-01-01 09:23:00,2017-01-01 10:39:00,-2.0,18.0,8.0,-3.0,4620000000000,50.0,226.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


For every trip log, the time index is date_scheduled, which is when the airline decided on the scheduled departure and arrival times, as well as what route will be flown. We don’t know the rest of the information about the actual departure/arrival times and the details of any delay at this time. However, it is possible to know everything about how a trip went after it has arrived, so we can use that information at any time after the flight lands.

Using a secondary time index, we can indicate to Featuretools which columns in our flight logs are known at the time the flight is scheduled, plus which are known at the time the flight lands.



In [15]:
es = ft.EntitySet('Flight Data')
arr_time_columns = ['arr_delay', 'dep_delay', 'carrier_delay', 'weather_delay',
                    'national_airspace_delay', 'security_delay',
                    'late_aircraft_delay', 'canceled', 'diverted',
                    'taxi_in', 'taxi_out', 'air_time', 'dep_time']
es.entity_from_dataframe('trip_logs',
                        data,
                        index='trip_log_id',
                        make_index=True,
                        time_index='date_schedule',
                        secondary_time_index={'arr_time':arr_time_columns})

NameError: name 'data' is not defined

Let’s make some features at varying times using the flight example described above. Trip 14 is a flight from CLT to PHX on January 31, 2017 and trip 92 is a flight from PIT to DFW on January 1. We can set any cutoff time before the flight is scheduled to depart, emulating how we would make the prediction at that point in time.

We set two cutoff times for trip 14 at two different times: one which is more than a month before the flight and another which is only 5 days before. For trip 92, we’ll only set one cutoff time, three days before it is scheduled to leave.

In [17]:
ct_flight = pd.DataFrame()
ct_flight['trip_log_id'] = [14, 14, 92]
ct_flight['time'] = pd.to_datetime(['2016-12-28',
                                    '2017-1-25',
                                    '2016-12-28'])
ct_flight['label'] = [True, True, False]
ct_flight

Unnamed: 0,trip_log_id,time,label
0,14,2016-12-28,True
1,14,2017-01-25,True
2,92,2016-12-28,False


In [18]:
fm, features = ft.dfs(entityset=es_flight,
                     target_entity='trip_logs',
                     cutoff_time=ct_flight,
                     cutoff_time_in_index=True,
                     agg_primitives=['max'],
                     trans_primitives=['month'])

In [19]:
fm[['flight_id', 'label', 'flights.MAX(trip_logs.arr_delay)', 'MONTH(scheduled_dep_time)']]

Unnamed: 0_level_0,Unnamed: 1_level_0,flight_id,label,flights.MAX(trip_logs.arr_delay),MONTH(scheduled_dep_time)
trip_log_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14,2016-12-28,AA-494:CLT->PHX,True,,1
14,2017-01-25,AA-494:CLT->PHX,True,33.0,1
92,2016-12-28,AA-496:PIT->DFW,False,,1


# Creating and Flattening a Feature Tensor
The make_temporal_cutoffs() function generates a series of equally spaced cutoff times from a given set of cutoff times and instance ids.

This function can be paired with DFS to create and flatten a feature tensor rather than making multiple feature matrices at different delays.

The function takes in the the following parameters:

instance_ids (list, pd.Series, or np.ndarray): A list of instances.

cutoffs (list, pd.Series, or np.ndarray): An associated list of cutoff times.

window_size (str or pandas.DateOffset): The amount of time between each cutoff time in the created time series.

start (datetime.datetime or pd.Timestamp): The first cutoff time in the created time series.

num_windows (int): The number of cutoff times to create in the created time series.

Only two of the three options window_size, start, and num_windows need to be specified to uniquely determine an equally-spaced set of cutoff times at which to compute each instance.

In [20]:
cutoff_times


Unnamed: 0,customer_id,time,label
0,1,2014-01-01 04:00:00,True
1,2,2014-01-01 05:00:00,True
2,3,2014-01-01 06:00:00,False
3,1,2014-01-01 08:00:00,True


Then passing in window_size='1h' and num_windows=2 makes one row an hour over the last two hours to produce the following new dataframe. The result can be directly passed into DFS to make features at the different time points.

In [21]:
temporal_cutoffs = ft.make_temporal_cutoffs(cutoff_times['customer_id'],
                                           cutoff_times['time'],
                                           window_size='1h',
                                           num_windows=2)

temporal_cutoffs

Unnamed: 0,time,instance_id
0,2014-01-01 03:00:00,1
1,2014-01-01 04:00:00,1
2,2014-01-01 04:00:00,2
3,2014-01-01 05:00:00,2
4,2014-01-01 05:00:00,3
5,2014-01-01 06:00:00,3
6,2014-01-01 07:00:00,1
7,2014-01-01 08:00:00,1


In [23]:
es = ft.demo.load_mock_customer(return_entityset=True, random_seed=0)

fm, features = ft.dfs(entityset=es,
            target_entity='customers',
            cutoff_time=temporal_cutoffs,
            cutoff_time_in_index=True)

In [24]:
fm

Unnamed: 0_level_0,Unnamed: 1_level_0,zip_code,COUNT(sessions),NUM_UNIQUE(sessions.device),MODE(sessions.device),SUM(transactions.amount),STD(transactions.amount),MAX(transactions.amount),SKEW(transactions.amount),MIN(transactions.amount),MEAN(transactions.amount),COUNT(transactions),NUM_UNIQUE(transactions.product_id),MODE(transactions.product_id),DAY(join_date),DAY(date_of_birth),YEAR(join_date),YEAR(date_of_birth),MONTH(join_date),MONTH(date_of_birth),WEEKDAY(join_date),WEEKDAY(date_of_birth),SUM(sessions.MAX(transactions.amount)),SUM(sessions.MIN(transactions.amount)),SUM(sessions.STD(transactions.amount)),SUM(sessions.NUM_UNIQUE(transactions.product_id)),SUM(sessions.SKEW(transactions.amount)),SUM(sessions.MEAN(transactions.amount)),STD(sessions.MAX(transactions.amount)),STD(sessions.MIN(transactions.amount)),STD(sessions.NUM_UNIQUE(transactions.product_id)),STD(sessions.COUNT(transactions)),STD(sessions.SKEW(transactions.amount)),STD(sessions.SUM(transactions.amount)),STD(sessions.MEAN(transactions.amount)),MAX(sessions.MIN(transactions.amount)),MAX(sessions.STD(transactions.amount)),MAX(sessions.NUM_UNIQUE(transactions.product_id)),MAX(sessions.COUNT(transactions)),MAX(sessions.SKEW(transactions.amount)),MAX(sessions.SUM(transactions.amount)),MAX(sessions.MEAN(transactions.amount)),SKEW(sessions.MAX(transactions.amount)),SKEW(sessions.MIN(transactions.amount)),SKEW(sessions.STD(transactions.amount)),SKEW(sessions.NUM_UNIQUE(transactions.product_id)),SKEW(sessions.COUNT(transactions)),SKEW(sessions.SUM(transactions.amount)),SKEW(sessions.MEAN(transactions.amount)),MIN(sessions.MAX(transactions.amount)),MIN(sessions.STD(transactions.amount)),MIN(sessions.NUM_UNIQUE(transactions.product_id)),MIN(sessions.COUNT(transactions)),MIN(sessions.SKEW(transactions.amount)),MIN(sessions.SUM(transactions.amount)),MIN(sessions.MEAN(transactions.amount)),MEAN(sessions.MAX(transactions.amount)),MEAN(sessions.MIN(transactions.amount)),MEAN(sessions.STD(transactions.amount)),MEAN(sessions.NUM_UNIQUE(transactions.product_id)),MEAN(sessions.COUNT(transactions)),MEAN(sessions.SKEW(transactions.amount)),MEAN(sessions.SUM(transactions.amount)),MEAN(sessions.MEAN(transactions.amount)),NUM_UNIQUE(sessions.YEAR(session_start)),NUM_UNIQUE(sessions.MODE(transactions.product_id)),NUM_UNIQUE(sessions.MONTH(session_start)),NUM_UNIQUE(sessions.DAY(session_start)),NUM_UNIQUE(sessions.WEEKDAY(session_start)),MODE(sessions.YEAR(session_start)),MODE(sessions.MODE(transactions.product_id)),MODE(sessions.MONTH(session_start)),MODE(sessions.DAY(session_start)),MODE(sessions.WEEKDAY(session_start)),NUM_UNIQUE(transactions.sessions.customer_id),NUM_UNIQUE(transactions.sessions.device),MODE(transactions.sessions.customer_id),MODE(transactions.sessions.device)
customer_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1
1,2014-01-01 03:00:00,60091,3,3,desktop,3932.56,42.769602,139.23,0.140387,5.81,71.501091,55,5,1,17,18,2011,1994,4,7,6,0,400.95,20.84,129.747625,15,0.325932,219.132533,5.178021,1.571507,0.0,5.773503,0.210827,283.551883,10.255607,8.74,46.905665,5,25,0.234349,1613.93,84.44,0.782152,1.55204,0.763052,0.0,1.732051,0.685199,1.173675,129.0,40.187205,5,15,-0.134754,1052.03,64.5572,133.65,6.946667,43.249208,5,18.333333,0.108644,1310.853333,73.044178,1,3,1,1,1,2014,1,1,1,2,1,3,1,mobile
1,2014-01-01 04:00:00,60091,4,3,tablet,4958.19,42.309717,139.23,-0.006928,5.81,74.002836,67,5,4,17,18,2011,1994,4,7,6,0,540.04,27.62,169.572874,20,-0.505043,304.6017,5.027226,1.285833,0.0,5.678908,0.500353,271.917637,10.426572,8.74,46.905665,5,25,0.234349,1613.93,85.469167,-0.451371,1.452325,1.235445,0.0,1.614843,1.197406,-0.233453,129.0,39.825249,5,12,-0.830975,1025.63,64.5572,135.01,6.905,42.393218,5,16.75,-0.126261,1239.5475,76.150425,1,3,1,1,1,2014,4,1,1,2,1,3,1,tablet
2,2014-01-01 04:00:00,13244,4,2,desktop,4150.3,39.289512,146.81,-0.134786,12.07,84.7,49,5,4,15,18,2012,1986,4,8,6,0,569.29,105.24,157.262738,20,0.045171,340.791792,3.470527,20.424007,0.0,3.86221,0.324809,307.743859,8.983533,56.46,47.93592,5,16,0.295458,1320.64,96.581,0.459305,1.815491,-0.966834,0.0,-0.169238,-0.823347,0.651941,138.38,27.839228,5,8,-0.455197,634.84,76.813125,142.3225,26.31,39.315685,5,12.25,0.011293,1037.575,85.197948,1,3,1,1,1,2014,2,1,1,2,1,2,2,desktop
2,2014-01-01 05:00:00,13244,5,2,desktop,5155.26,38.047944,146.81,-0.121811,12.07,83.149355,62,5,4,15,18,2012,1986,4,8,6,0,688.14,127.06,190.987775,25,-0.269747,418.096407,10.919023,17.801322,0.0,3.361547,0.316873,266.912832,8.543351,56.46,47.93592,5,16,0.295458,1320.64,96.581,-1.814717,1.959531,-0.213518,0.0,-0.379092,-0.667256,1.082192,118.85,27.839228,5,8,-0.455197,634.84,76.813125,137.628,25.412,38.197555,5,12.4,-0.053949,1031.052,83.619281,1,4,1,1,1,2014,2,1,1,2,1,2,2,desktop
3,2014-01-01 05:00:00,13244,2,2,desktop,1886.72,41.199361,146.31,0.637074,6.65,58.96,32,5,1,13,21,2011,2003,8,11,5,4,273.05,14.84,83.432017,10,1.150043,118.370745,13.83808,1.088944,0.0,1.414214,0.061424,2.107178,5.099599,8.19,47.264797,5,17,0.618455,944.85,62.791333,,,,,,,,126.74,36.16722,5,15,0.531588,941.87,55.579412,136.525,7.42,41.716008,5,16.0,0.575022,943.36,59.185373,1,1,1,1,1,2014,1,1,1,2,1,2,3,desktop
3,2014-01-01 06:00:00,13244,4,2,desktop,2867.69,40.349758,146.31,0.318315,6.65,65.174773,44,5,1,13,21,2011,2003,8,11,5,4,493.07,126.66,119.136697,16,0.860577,290.968018,22.808351,40.508892,2.0,7.118052,0.500999,417.557763,16.540737,91.76,47.264797,5,17,0.618455,944.85,91.76,-1.060639,1.87417,1.722323,-2.0,-1.330938,-1.977878,0.201588,91.76,35.70468,1,1,-0.289466,91.76,55.579412,123.2675,31.665,39.712232,4,11.0,0.286859,716.9225,72.742004,1,2,1,1,1,2014,1,1,1,2,1,2,3,desktop
1,2014-01-01 07:00:00,60091,7,3,tablet,7605.53,41.018896,139.43,0.149908,5.81,69.141182,110,5,4,17,18,2011,1994,4,7,6,0,931.86,66.97,280.421418,35,0.562312,493.437492,7.441648,7.470707,0.0,4.386125,0.471955,273.713405,13.123365,26.36,46.905665,5,25,0.640252,1613.93,85.469167,-1.277394,2.552328,-0.755846,0.0,1.927658,1.377768,-0.282093,118.9,30.450261,5,12,-0.830975,809.97,50.623125,133.122857,9.567143,40.060203,5,15.714286,0.08033,1086.504286,70.49107,1,4,1,1,1,2014,1,1,1,2,1,3,1,tablet
1,2014-01-01 08:00:00,60091,8,3,mobile,9025.62,40.442059,139.43,0.019698,5.81,71.631905,126,5,4,17,18,2011,1994,4,7,6,0,1057.97,78.59,312.745952,40,-0.476122,582.193117,7.322191,6.954507,0.0,4.062019,0.589386,279.510713,13.759314,26.36,46.905665,5,25,0.640252,1613.93,88.755625,-0.780493,2.440005,-0.312355,0.0,1.946018,0.77817,-0.424949,118.9,30.450261,5,12,-1.038434,809.97,50.623125,132.24625,9.82375,39.093244,5,15.75,-0.059515,1128.2025,72.77414,1,4,1,1,1,2014,4,1,1,2,1,3,1,mobile
