In [54]:
#!pip install featuretools

In [2]:
import pandas as pd
import featuretools as ft

In [3]:
df_seoul = pd.read_csv('../data/bike_clean.csv')
df_seoul.head()

Unnamed: 0.1,Unnamed: 0,datetime,date,hour,season,holiday,open,rent_count,temperature,humidity,wind_speed,visibility,dewpoint_temp,solar_radiation,rainfall,snowfall
0,1,2017-12-01 00:00:00,2017-12-01,0,Winter,No Holiday,Yes,254,-5.2,37,2.2,20000,-17.6,0.0,0.0,0
1,2,2017-12-01 01:00:00,2017-12-01,1,Winter,No Holiday,Yes,204,-5.5,38,0.8,20000,-17.6,0.0,0.0,0
2,3,2017-12-01 02:00:00,2017-12-01,2,Winter,No Holiday,Yes,173,-6.0,39,1.0,20000,-17.7,0.0,0.0,0
3,4,2017-12-01 03:00:00,2017-12-01,3,Winter,No Holiday,Yes,107,-6.2,40,0.9,20000,-17.6,0.0,0.0,0
4,5,2017-12-01 04:00:00,2017-12-01,4,Winter,No Holiday,Yes,78,-6.0,36,2.3,20000,-18.6,0.0,0.0,0


In [4]:
df_seoul.dtypes

Unnamed: 0           int64
datetime            object
date                object
hour                 int64
season              object
holiday             object
open                object
rent_count           int64
temperature        float64
humidity             int64
wind_speed         float64
visibility           int64
dewpoint_temp      float64
solar_radiation    float64
rainfall           float64
snowfall             int64
dtype: object

### Datetime Feature Engineering

In [6]:
df_seoul['datetime'] =  pd.to_datetime(df_seoul['datetime'])
df_seoul['month'] = df_seoul.datetime.dt.month
df_seoul['week'] = df_seoul.datetime.dt.isocalendar().week # week number
df_seoul['day'] = df_seoul.datetime.dt.day # 1 to 31
df_seoul['dayofyear'] = df_seoul.datetime.dt.dayofyear # 1 to 365
df_seoul['dayofweek'] = df_seoul.datetime.dt.dayofweek # mon, tue ... 

In [15]:
df_seoul.month.value_counts(dropna=False)

12    744
1     744
3     744
5     744
7     744
8     744
10    744
4     720
6     720
9     720
11    720
2     672
Name: month, dtype: int64

In [16]:
df_seoul.week.value_counts(dropna=False)

48     192
21     168
50     168
51     168
52     168
1      168
2      168
3      168
4      168
5      168
6      168
7      168
9      168
20     168
10     168
11     168
12     168
13     168
14     168
15     168
16     168
17     168
18     168
19     168
47     168
8      168
22     168
49     168
23     168
24     168
25     168
26     168
27     168
28     168
29     168
30     168
31     168
32     168
33     168
34     168
35     168
36     168
37     168
38     168
39     168
40     168
41     168
42     168
43     168
44     168
45     168
46     168
NaN      0
Name: week, dtype: Int64

In [45]:
df_seoul.day.value_counts(dropna=False)

1     288
2     288
28    288
27    288
26    288
25    288
24    288
23    288
22    288
21    288
20    288
19    288
18    288
17    288
16    288
15    288
14    288
13    288
12    288
11    288
10    288
9     288
8     288
7     288
6     288
5     288
4     288
3     288
29    264
30    264
31    168
Name: day, dtype: int64

In [46]:
df_seoul.dayofweek.value_counts(dropna=False)
# should we change this to string?

4    1272
5    1248
6    1248
0    1248
1    1248
2    1248
3    1248
Name: dayofweek, dtype: int64

### Automated Feature Engineering

In [7]:
df_x = df_seoul.drop(['rent_count','datetime','date','month','week','day','dayofyear','dayofweek'], axis=1)

In [8]:
es = ft.EntitySet(id = 'bikes')

# adding a dataframe 
es.entity_from_dataframe(entity_id = 'seoul_bike', dataframe = df_x, index = 'Unnamed: 0')

Entityset: bikes
  Entities:
    seoul_bike [Rows: 8760, Columns: 13]
  Relationships:
    No relationships

In [9]:
es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="hour_of_day",
                    index="hour")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Season",
                    index="season")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Holiday",
                    index="holiday")

print(es)

Entityset: bikes
  Entities:
    seoul_bike [Rows: 8760, Columns: 13]
    hour_of_day [Rows: 24, Columns: 1]
    Season [Rows: 4, Columns: 1]
    Holiday [Rows: 2, Columns: 1]
  Relationships:
    seoul_bike.hour -> hour_of_day.hour
    seoul_bike.season -> Season.season
    seoul_bike.holiday -> Holiday.holiday


In [10]:
feature_matrix, feature_names = ft.dfs(entityset=es, 
target_entity = 'seoul_bike', 
max_depth = 2, 
verbose = 1, 
n_jobs = 3)

Built 177 features
EntitySet scattered to 3 workers in 10 seconds                                                                         
Elapsed: 00:02 | Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████


In [11]:
feature_matrix.columns

Index(['hour', 'season', 'holiday', 'open', 'temperature', 'humidity',
       'wind_speed', 'visibility', 'dewpoint_temp', 'solar_radiation',
       ...
       'Holiday.STD(seoul_bike.visibility)',
       'Holiday.STD(seoul_bike.wind_speed)',
       'Holiday.SUM(seoul_bike.dewpoint_temp)',
       'Holiday.SUM(seoul_bike.humidity)', 'Holiday.SUM(seoul_bike.rainfall)',
       'Holiday.SUM(seoul_bike.snowfall)',
       'Holiday.SUM(seoul_bike.solar_radiation)',
       'Holiday.SUM(seoul_bike.temperature)',
       'Holiday.SUM(seoul_bike.visibility)',
       'Holiday.SUM(seoul_bike.wind_speed)'],
      dtype='object', length=177)

In [12]:
feature_matrix.head()

Unnamed: 0_level_0,hour,season,holiday,open,temperature,humidity,wind_speed,visibility,dewpoint_temp,solar_radiation,...,Holiday.STD(seoul_bike.visibility),Holiday.STD(seoul_bike.wind_speed),Holiday.SUM(seoul_bike.dewpoint_temp),Holiday.SUM(seoul_bike.humidity),Holiday.SUM(seoul_bike.rainfall),Holiday.SUM(seoul_bike.snowfall),Holiday.SUM(seoul_bike.solar_radiation),Holiday.SUM(seoul_bike.temperature),Holiday.SUM(seoul_bike.visibility),Holiday.SUM(seoul_bike.wind_speed)
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,Winter,No Holiday,Yes,-5.2,37,2.2,20000,-17.6,0.0,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
2,1,Winter,No Holiday,Yes,-5.5,38,0.8,20000,-17.6,0.0,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
3,2,Winter,No Holiday,Yes,-6.0,39,1.0,20000,-17.7,0.0,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
4,3,Winter,No Holiday,Yes,-6.2,40,0.9,20000,-17.6,0.0,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
5,4,Winter,No Holiday,Yes,-6.0,36,2.3,20000,-18.6,0.0,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8


In [31]:
feature_matrix.reset_index(inplace=True)

In [50]:
list(feature_matrix.columns)

['Unnamed: 0',
 'hour',
 'season',
 'holiday',
 'open',
 'temperature',
 'humidity',
 'wind_speed',
 'visibility',
 'dewpoint_temp',
 'solar_radiation',
 'rainfall',
 'snowfall',
 'hour_of_day.COUNT(seoul_bike)',
 'hour_of_day.MAX(seoul_bike.dewpoint_temp)',
 'hour_of_day.MAX(seoul_bike.humidity)',
 'hour_of_day.MAX(seoul_bike.rainfall)',
 'hour_of_day.MAX(seoul_bike.snowfall)',
 'hour_of_day.MAX(seoul_bike.solar_radiation)',
 'hour_of_day.MAX(seoul_bike.temperature)',
 'hour_of_day.MAX(seoul_bike.visibility)',
 'hour_of_day.MAX(seoul_bike.wind_speed)',
 'hour_of_day.MEAN(seoul_bike.dewpoint_temp)',
 'hour_of_day.MEAN(seoul_bike.humidity)',
 'hour_of_day.MEAN(seoul_bike.rainfall)',
 'hour_of_day.MEAN(seoul_bike.snowfall)',
 'hour_of_day.MEAN(seoul_bike.solar_radiation)',
 'hour_of_day.MEAN(seoul_bike.temperature)',
 'hour_of_day.MEAN(seoul_bike.visibility)',
 'hour_of_day.MEAN(seoul_bike.wind_speed)',
 'hour_of_day.MIN(seoul_bike.dewpoint_temp)',
 'hour_of_day.MIN(seoul_bike.humidity)'

In [42]:
feature_matrix['hour_of_day.SUM(seoul_bike.temperature)'].value_counts()

4119.5    365
3987.0    365
4455.8    365
4652.0    365
4887.2    365
5212.4    365
5584.8    365
5933.0    365
6167.2    365
6218.2    365
6138.1    365
5933.3    365
5643.7    365
5223.2    365
4712.2    365
4152.2    365
3714.6    365
3497.1    365
3489.6    365
3565.6    365
3659.6    365
3757.2    365
3865.9    365
4285.0    365
Name: hour_of_day.SUM(seoul_bike.temperature), dtype: int64

In [25]:
df = pd.read_csv('seoulDate_dateEngin', )
df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,datetime,date,hour,season,holiday,open,rent_count,temperature,...,solar_radiation,rainfall,snowfall,month,week,day_of_month,day_of_year,day_of_week,dayName_of_week,month_name
0,0,1,2017-12-01 00:00:00,2017-12-01,0,Winter,No Holiday,Yes,254,-5.2,...,0.0,0.0,0,12,48,1,335,4,Friday,Dec
1,1,2,2017-12-01 01:00:00,2017-12-01,1,Winter,No Holiday,Yes,204,-5.5,...,0.0,0.0,0,12,48,1,335,4,Friday,Dec
2,2,3,2017-12-01 02:00:00,2017-12-01,2,Winter,No Holiday,Yes,173,-6.0,...,0.0,0.0,0,12,48,1,335,4,Friday,Dec
3,3,4,2017-12-01 03:00:00,2017-12-01,3,Winter,No Holiday,Yes,107,-6.2,...,0.0,0.0,0,12,48,1,335,4,Friday,Dec
4,4,5,2017-12-01 04:00:00,2017-12-01,4,Winter,No Holiday,Yes,78,-6.0,...,0.0,0.0,0,12,48,1,335,4,Friday,Dec


In [54]:
col_name = ['hour_of_day.COUNT(seoul_bike)',
 'hour_of_day.MAX(seoul_bike.dewpoint_temp)',
 'hour_of_day.MAX(seoul_bike.humidity)',
 'hour_of_day.MAX(seoul_bike.rainfall)',
 'hour_of_day.MAX(seoul_bike.snowfall)',
 'hour_of_day.MAX(seoul_bike.solar_radiation)',
 'hour_of_day.MAX(seoul_bike.temperature)',
 'hour_of_day.MAX(seoul_bike.visibility)',
 'hour_of_day.MAX(seoul_bike.wind_speed)',
 'hour_of_day.MEAN(seoul_bike.dewpoint_temp)',
 'hour_of_day.MEAN(seoul_bike.humidity)',
 'hour_of_day.MEAN(seoul_bike.rainfall)',
 'hour_of_day.MEAN(seoul_bike.snowfall)',
 'hour_of_day.MEAN(seoul_bike.solar_radiation)',
 'hour_of_day.MEAN(seoul_bike.temperature)',
 'hour_of_day.MEAN(seoul_bike.visibility)',
 'hour_of_day.MEAN(seoul_bike.wind_speed)',
 'hour_of_day.MIN(seoul_bike.dewpoint_temp)',
 'hour_of_day.MIN(seoul_bike.humidity)',
 'hour_of_day.MIN(seoul_bike.rainfall)',
 'hour_of_day.MIN(seoul_bike.snowfall)',
 'hour_of_day.MIN(seoul_bike.solar_radiation)',
 'hour_of_day.MIN(seoul_bike.temperature)',
 'hour_of_day.MIN(seoul_bike.visibility)',
 'hour_of_day.MIN(seoul_bike.wind_speed)',
 'hour_of_day.MODE(seoul_bike.holiday)',
 'hour_of_day.MODE(seoul_bike.open)',
 'hour_of_day.MODE(seoul_bike.season)',
 'hour_of_day.NUM_UNIQUE(seoul_bike.holiday)',
 'hour_of_day.NUM_UNIQUE(seoul_bike.open)',
 'hour_of_day.NUM_UNIQUE(seoul_bike.season)',
 'hour_of_day.SKEW(seoul_bike.dewpoint_temp)',
 'hour_of_day.SKEW(seoul_bike.humidity)',
 'hour_of_day.SKEW(seoul_bike.rainfall)',
 'hour_of_day.SKEW(seoul_bike.snowfall)',
 'hour_of_day.SKEW(seoul_bike.solar_radiation)',
 'hour_of_day.SKEW(seoul_bike.temperature)',
 'hour_of_day.SKEW(seoul_bike.visibility)',
 'hour_of_day.SKEW(seoul_bike.wind_speed)',
 'hour_of_day.STD(seoul_bike.dewpoint_temp)',
 'hour_of_day.STD(seoul_bike.humidity)',
 'hour_of_day.STD(seoul_bike.rainfall)',
 'hour_of_day.STD(seoul_bike.snowfall)',
 'hour_of_day.STD(seoul_bike.solar_radiation)',
 'hour_of_day.STD(seoul_bike.temperature)',
 'hour_of_day.STD(seoul_bike.visibility)',
 'hour_of_day.STD(seoul_bike.wind_speed)',
 'hour_of_day.SUM(seoul_bike.dewpoint_temp)',
 'hour_of_day.SUM(seoul_bike.humidity)',
 'hour_of_day.SUM(seoul_bike.rainfall)',
 'hour_of_day.SUM(seoul_bike.snowfall)',
 'hour_of_day.SUM(seoul_bike.solar_radiation)',
 'hour_of_day.SUM(seoul_bike.temperature)',
 'hour_of_day.SUM(seoul_bike.visibility)',
 'hour_of_day.SUM(seoul_bike.wind_speed)',
 'Season.COUNT(seoul_bike)',
 'Season.MAX(seoul_bike.dewpoint_temp)',
 'Season.MAX(seoul_bike.humidity)',
 'Season.MAX(seoul_bike.rainfall)',
 'Season.MAX(seoul_bike.snowfall)',
 'Season.MAX(seoul_bike.solar_radiation)',
 'Season.MAX(seoul_bike.temperature)',
 'Season.MAX(seoul_bike.visibility)',
 'Season.MAX(seoul_bike.wind_speed)',
 'Season.MEAN(seoul_bike.dewpoint_temp)',
 'Season.MEAN(seoul_bike.humidity)',
 'Season.MEAN(seoul_bike.rainfall)',
 'Season.MEAN(seoul_bike.snowfall)',
 'Season.MEAN(seoul_bike.solar_radiation)',
 'Season.MEAN(seoul_bike.temperature)',
 'Season.MEAN(seoul_bike.visibility)',
 'Season.MEAN(seoul_bike.wind_speed)',
 'Season.MIN(seoul_bike.dewpoint_temp)',
 'Season.MIN(seoul_bike.humidity)',
 'Season.MIN(seoul_bike.rainfall)',
 'Season.MIN(seoul_bike.snowfall)',
 'Season.MIN(seoul_bike.solar_radiation)',
 'Season.MIN(seoul_bike.temperature)',
 'Season.MIN(seoul_bike.visibility)',
 'Season.MIN(seoul_bike.wind_speed)',
 'Season.MODE(seoul_bike.holiday)',
 'Season.MODE(seoul_bike.hour)',
 'Season.MODE(seoul_bike.open)',
 'Season.NUM_UNIQUE(seoul_bike.holiday)',
 'Season.NUM_UNIQUE(seoul_bike.hour)',
 'Season.NUM_UNIQUE(seoul_bike.open)',
 'Season.SKEW(seoul_bike.dewpoint_temp)',
 'Season.SKEW(seoul_bike.humidity)',
 'Season.SKEW(seoul_bike.rainfall)',
 'Season.SKEW(seoul_bike.snowfall)',
 'Season.SKEW(seoul_bike.solar_radiation)',
 'Season.SKEW(seoul_bike.temperature)',
 'Season.SKEW(seoul_bike.visibility)',
 'Season.SKEW(seoul_bike.wind_speed)',
 'Season.STD(seoul_bike.dewpoint_temp)',
 'Season.STD(seoul_bike.humidity)',
 'Season.STD(seoul_bike.rainfall)',
 'Season.STD(seoul_bike.snowfall)',
 'Season.STD(seoul_bike.solar_radiation)',
 'Season.STD(seoul_bike.temperature)',
 'Season.STD(seoul_bike.visibility)',
 'Season.STD(seoul_bike.wind_speed)',
 'Season.SUM(seoul_bike.dewpoint_temp)',
 'Season.SUM(seoul_bike.humidity)',
 'Season.SUM(seoul_bike.rainfall)',
 'Season.SUM(seoul_bike.snowfall)',
 'Season.SUM(seoul_bike.solar_radiation)',
 'Season.SUM(seoul_bike.temperature)',
 'Season.SUM(seoul_bike.visibility)',
 'Season.SUM(seoul_bike.wind_speed)',
 'Holiday.COUNT(seoul_bike)',
 'Holiday.MAX(seoul_bike.dewpoint_temp)',
 'Holiday.MAX(seoul_bike.humidity)',
 'Holiday.MAX(seoul_bike.rainfall)',
 'Holiday.MAX(seoul_bike.snowfall)',
 'Holiday.MAX(seoul_bike.solar_radiation)',
 'Holiday.MAX(seoul_bike.temperature)',
 'Holiday.MAX(seoul_bike.visibility)',
 'Holiday.MAX(seoul_bike.wind_speed)',
 'Holiday.MEAN(seoul_bike.dewpoint_temp)',
 'Holiday.MEAN(seoul_bike.humidity)',
 'Holiday.MEAN(seoul_bike.rainfall)',
 'Holiday.MEAN(seoul_bike.snowfall)',
 'Holiday.MEAN(seoul_bike.solar_radiation)',
 'Holiday.MEAN(seoul_bike.temperature)',
 'Holiday.MEAN(seoul_bike.visibility)',
 'Holiday.MEAN(seoul_bike.wind_speed)',
 'Holiday.MIN(seoul_bike.dewpoint_temp)',
 'Holiday.MIN(seoul_bike.humidity)',
 'Holiday.MIN(seoul_bike.rainfall)',
 'Holiday.MIN(seoul_bike.snowfall)',
 'Holiday.MIN(seoul_bike.solar_radiation)',
 'Holiday.MIN(seoul_bike.temperature)',
 'Holiday.MIN(seoul_bike.visibility)',
 'Holiday.MIN(seoul_bike.wind_speed)',
 'Holiday.MODE(seoul_bike.hour)',
 'Holiday.MODE(seoul_bike.open)',
 'Holiday.MODE(seoul_bike.season)',
 'Holiday.NUM_UNIQUE(seoul_bike.hour)',
 'Holiday.NUM_UNIQUE(seoul_bike.open)',
 'Holiday.NUM_UNIQUE(seoul_bike.season)',
 'Holiday.SKEW(seoul_bike.dewpoint_temp)',
 'Holiday.SKEW(seoul_bike.humidity)',
 'Holiday.SKEW(seoul_bike.rainfall)',
 'Holiday.SKEW(seoul_bike.snowfall)',
 'Holiday.SKEW(seoul_bike.solar_radiation)',
 'Holiday.SKEW(seoul_bike.temperature)',
 'Holiday.SKEW(seoul_bike.visibility)',
 'Holiday.SKEW(seoul_bike.wind_speed)',
 'Holiday.STD(seoul_bike.dewpoint_temp)',
 'Holiday.STD(seoul_bike.humidity)',
 'Holiday.STD(seoul_bike.rainfall)',
 'Holiday.STD(seoul_bike.snowfall)',
 'Holiday.STD(seoul_bike.solar_radiation)',
 'Holiday.STD(seoul_bike.temperature)',
 'Holiday.STD(seoul_bike.visibility)',
 'Holiday.STD(seoul_bike.wind_speed)',
 'Holiday.SUM(seoul_bike.dewpoint_temp)',
 'Holiday.SUM(seoul_bike.humidity)',
 'Holiday.SUM(seoul_bike.rainfall)',
 'Holiday.SUM(seoul_bike.snowfall)',
 'Holiday.SUM(seoul_bike.solar_radiation)',
 'Holiday.SUM(seoul_bike.temperature)',
 'Holiday.SUM(seoul_bike.visibility)',
 'Holiday.SUM(seoul_bike.wind_speed)']

In [55]:
filtered_feature_matrix = feature_matrix[['Unnamed: 0'] + col_name]

In [56]:
filtered_feature_matrix

Unnamed: 0.1,Unnamed: 0,hour_of_day.COUNT(seoul_bike),hour_of_day.MAX(seoul_bike.dewpoint_temp),hour_of_day.MAX(seoul_bike.humidity),hour_of_day.MAX(seoul_bike.rainfall),hour_of_day.MAX(seoul_bike.snowfall),hour_of_day.MAX(seoul_bike.solar_radiation),hour_of_day.MAX(seoul_bike.temperature),hour_of_day.MAX(seoul_bike.visibility),hour_of_day.MAX(seoul_bike.wind_speed),...,Holiday.STD(seoul_bike.visibility),Holiday.STD(seoul_bike.wind_speed),Holiday.SUM(seoul_bike.dewpoint_temp),Holiday.SUM(seoul_bike.humidity),Holiday.SUM(seoul_bike.rainfall),Holiday.SUM(seoul_bike.snowfall),Holiday.SUM(seoul_bike.solar_radiation),Holiday.SUM(seoul_bike.temperature),Holiday.SUM(seoul_bike.visibility),Holiday.SUM(seoul_bike.wind_speed)
0,1,365,25.6,98,18.0,35,0.00,32.1,20000,4.6,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
1,2,365,26.0,98,8.5,40,0.00,31.7,20000,5.0,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
2,3,365,25.3,98,6.5,41,0.00,31.5,20000,4.3,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
3,4,365,25.5,98,7.0,41,0.00,31.2,20000,4.5,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
4,5,365,25.5,98,19.0,39,0.00,31.1,20000,6.9,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,8756,365,27.2,98,7.0,33,0.81,36.1,20000,5.8,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
8756,8757,365,26.8,98,24.0,33,0.12,34.5,20000,6.1,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
8757,8758,365,25.7,98,29.5,35,0.00,33.3,20000,7.3,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
8758,8759,365,25.1,98,6.0,35,0.00,32.9,20000,5.4,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8


In [58]:
df_1 = df.merge(filtered_feature_matrix, left_on =  'Unnamed: 0.1', right_on ='Unnamed: 0', how = 'left')  
df_1

Unnamed: 0,Unnamed: 0_x,Unnamed: 0.1,datetime,date,hour,season,holiday,open,rent_count,temperature,...,Holiday.STD(seoul_bike.visibility),Holiday.STD(seoul_bike.wind_speed),Holiday.SUM(seoul_bike.dewpoint_temp),Holiday.SUM(seoul_bike.humidity),Holiday.SUM(seoul_bike.rainfall),Holiday.SUM(seoul_bike.snowfall),Holiday.SUM(seoul_bike.solar_radiation),Holiday.SUM(seoul_bike.temperature),Holiday.SUM(seoul_bike.visibility),Holiday.SUM(seoul_bike.wind_speed)
0,0,1,2017-12-01 00:00:00,2017-12-01,0,Winter,No Holiday,Yes,254,-5.2,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
1,1,2,2017-12-01 01:00:00,2017-12-01,1,Winter,No Holiday,Yes,204,-5.5,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
2,2,3,2017-12-01 02:00:00,2017-12-01,2,Winter,No Holiday,Yes,173,-6.0,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
3,3,4,2017-12-01 03:00:00,2017-12-01,3,Winter,No Holiday,Yes,107,-6.2,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
4,4,5,2017-12-01 04:00:00,2017-12-01,4,Winter,No Holiday,Yes,78,-6.0,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,8755,8756,2018-11-30 19:00:00,2018-11-30,19,Autumn,No Holiday,Yes,1003,4.2,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
8756,8756,8757,2018-11-30 20:00:00,2018-11-30,20,Autumn,No Holiday,Yes,764,3.4,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
8757,8757,8758,2018-11-30 21:00:00,2018-11-30,21,Autumn,No Holiday,Yes,694,2.6,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8
8758,8758,8759,2018-11-30 22:00:00,2018-11-30,22,Autumn,No Holiday,Yes,712,2.1,...,6064.993949,1.029499,35580.4,486850,1268.8,6356,4747.92,108556.1,119292280,14319.8


In [60]:
df_1.to_csv('seoulDate_automatedEngin')