In [1]:
#!pip install featuretools

In [2]:
#!pip install numpy --upgrade

In [40]:
import pandas as pd
import featuretools as ft

In [41]:
df_seoul = pd.read_csv('../data/seoul_bike_agg_mean.csv')
#df_seoul = pd.read_csv('https://raw.githubusercontent.com/Jiian/seoulbike/main/data/seoul_bike_agg_mean.csv?token=AVLOXCOE7JOIREWTTZYFGV3BHGMRC')
df_seoul.head()

Unnamed: 0.1,Unnamed: 0,rent_count,year,month,day,day_of_week,season,temperature,humudity,holiday,wind_speed,visibility,dewpoint_temp,solar_radiation,rainfall,snowfall
0,1,9539,2017,12,1,4,Winter,-2.454167,37.5,No Holiday,1.5375,18707.5,-13.545833,0.24875,0.0,0
1,2,8523,2017,12,2,5,Winter,1.325,55.5,No Holiday,1.7125,14710.833333,-5.716667,0.26375,0.0,0
2,3,7222,2017,12,3,6,Winter,4.875,84.5,No Holiday,1.6125,4557.5,1.883333,0.125417,4.0,0
3,4,8729,2017,12,4,0,Winter,-0.304167,43.5,No Holiday,3.45,13628.333333,-9.925,0.282917,0.1,0
4,5,8307,2017,12,5,1,Winter,-4.458333,34.5,No Holiday,1.108333,19594.583333,-17.425,0.035833,0.0,0


In [42]:
df_seoul.dtypes

Unnamed: 0           int64
rent_count           int64
year                 int64
month                int64
day                  int64
day_of_week          int64
season              object
temperature        float64
humudity           float64
holiday             object
wind_speed         float64
visibility         float64
dewpoint_temp      float64
solar_radiation    float64
rainfall           float64
snowfall             int64
dtype: object

### Automated Feature Engineering

In [43]:
es = ft.EntitySet(id = 'bikes')

# adding a dataframe 
es.entity_from_dataframe(entity_id = 'seoul_bike', dataframe = df_seoul, index = 'Unnamed: 0')

Entityset: bikes
  Entities:
    seoul_bike [Rows: 353, Columns: 16]
  Relationships:
    No relationships

In [44]:
df_seoul.dtypes

Unnamed: 0           int64
rent_count           int64
year                 int64
month                int64
day                  int64
day_of_week          int64
season              object
temperature        float64
humudity           float64
holiday             object
wind_speed         float64
visibility         float64
dewpoint_temp      float64
solar_radiation    float64
rainfall           float64
snowfall             int64
dtype: object

In [45]:
es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Year",
                    index="year")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Month",
                    index="month")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Day",
                    index="day")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Day_of_week",
                    index="day_of_week")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Season",
                    index="season")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Holiday",
                    index="holiday")

print(es)

Entityset: bikes
  Entities:
    seoul_bike [Rows: 353, Columns: 16]
    Year [Rows: 2, Columns: 1]
    Month [Rows: 12, Columns: 1]
    Day [Rows: 31, Columns: 1]
    Day_of_week [Rows: 7, Columns: 1]
    Season [Rows: 4, Columns: 1]
    Holiday [Rows: 2, Columns: 1]
  Relationships:
    seoul_bike.year -> Year.year
    seoul_bike.month -> Month.month
    seoul_bike.day -> Day.day
    seoul_bike.day_of_week -> Day_of_week.day_of_week
    seoul_bike.season -> Season.season
    seoul_bike.holiday -> Holiday.holiday


In [46]:
feature_matrix, feature_names = ft.dfs(entityset=es, 
target_entity = 'seoul_bike', 
max_depth = 2, 
verbose = 1, 
n_jobs = 3)

Built 405 features
EntitySet scattered to 3 workers in 3 seconds                                                                          
Elapsed: 00:01 | Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████


In [47]:
list(feature_matrix.columns)

['rent_count',
 'year',
 'month',
 'day',
 'day_of_week',
 'season',
 'temperature',
 'humudity',
 'holiday',
 'wind_speed',
 'visibility',
 'dewpoint_temp',
 'solar_radiation',
 'rainfall',
 'snowfall',
 'Year.COUNT(seoul_bike)',
 'Year.MAX(seoul_bike.dewpoint_temp)',
 'Year.MAX(seoul_bike.humudity)',
 'Year.MAX(seoul_bike.rainfall)',
 'Year.MAX(seoul_bike.rent_count)',
 'Year.MAX(seoul_bike.snowfall)',
 'Year.MAX(seoul_bike.solar_radiation)',
 'Year.MAX(seoul_bike.temperature)',
 'Year.MAX(seoul_bike.visibility)',
 'Year.MAX(seoul_bike.wind_speed)',
 'Year.MEAN(seoul_bike.dewpoint_temp)',
 'Year.MEAN(seoul_bike.humudity)',
 'Year.MEAN(seoul_bike.rainfall)',
 'Year.MEAN(seoul_bike.rent_count)',
 'Year.MEAN(seoul_bike.snowfall)',
 'Year.MEAN(seoul_bike.solar_radiation)',
 'Year.MEAN(seoul_bike.temperature)',
 'Year.MEAN(seoul_bike.visibility)',
 'Year.MEAN(seoul_bike.wind_speed)',
 'Year.MIN(seoul_bike.dewpoint_temp)',
 'Year.MIN(seoul_bike.humudity)',
 'Year.MIN(seoul_bike.rainfall)',

In [48]:
feature_matrix

Unnamed: 0_level_0,rent_count,year,month,day,day_of_week,season,temperature,humudity,holiday,wind_speed,...,Holiday.STD(seoul_bike.wind_speed),Holiday.SUM(seoul_bike.dewpoint_temp),Holiday.SUM(seoul_bike.humudity),Holiday.SUM(seoul_bike.rainfall),Holiday.SUM(seoul_bike.rent_count),Holiday.SUM(seoul_bike.snowfall),Holiday.SUM(seoul_bike.solar_radiation),Holiday.SUM(seoul_bike.temperature),Holiday.SUM(seoul_bike.visibility),Holiday.SUM(seoul_bike.wind_speed)
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,9539,2017,12,1,4,Winter,-2.454167,37.5,No Holiday,1.537500,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167
2,8523,2017,12,2,5,Winter,1.325000,55.5,No Holiday,1.712500,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167
3,7222,2017,12,3,6,Winter,4.875000,84.5,No Holiday,1.612500,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167
4,8729,2017,12,4,0,Winter,-0.304167,43.5,No Holiday,3.450000,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167
5,8307,2017,12,5,1,Winter,-4.458333,34.5,No Holiday,1.108333,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349,17162,2018,11,26,0,Autumn,6.320833,70.5,No Holiday,1.029167,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167
350,16282,2018,11,27,1,Autumn,7.066667,73.0,No Holiday,1.350000,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167
351,16524,2018,11,28,2,Autumn,5.304167,26.0,No Holiday,1.695833,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167
352,16423,2018,11,29,3,Autumn,3.304167,35.5,No Holiday,1.212500,...,0.590867,1396.45,19546.0,1249.6,5956419,6356,190.766667,4345.233333,4.807378e+06,577.679167


In [49]:
feature_matrix.to_csv("seoul_bike_agg_mean_FG_new.csv")
#files.download("seoul_bike_agg_median_FG.csv")
#feature_matrix.to_csv("seoul_bike_agg_mean_FG.csv")
#files.download("seoul_bike_agg_mean_FG.csv")