In [89]:
#!pip install featuretools

In [90]:
#!pip install numpy --upgrade

In [129]:
import pandas as pd
import featuretools as ft

In [130]:
df_seoul = pd.read_csv('../data/seoul_bike_agg_mean.csv')
#df_seoul = pd.read_csv('../data/7days_demo.csv')
df_seoul.head()

Unnamed: 0.1,Unnamed: 0,rent_count,year,month,day,dayofweek,season,temperature,humidity,holiday,wind_speed,visibility,dewpoint_temp,solar_radiation,rainfall,snowfall
0,1,9539,2017,12,1,4,Winter,-2.454167,37.5,No Holiday,1.5375,18707.5,-13.545833,0.24875,0.0,0
1,2,8523,2017,12,2,5,Winter,1.325,55.5,No Holiday,1.7125,14710.833333,-5.716667,0.26375,0.0,0
2,3,7222,2017,12,3,6,Winter,4.875,84.5,No Holiday,1.6125,4557.5,1.883333,0.125417,4.0,0
3,4,8729,2017,12,4,0,Winter,-0.304167,43.5,No Holiday,3.45,13628.333333,-9.925,0.282917,0.1,0
4,5,8307,2017,12,5,1,Winter,-4.458333,34.5,No Holiday,1.108333,19594.583333,-17.425,0.035833,0.0,0


In [131]:
df_seoul.dtypes

Unnamed: 0           int64
rent_count           int64
year                 int64
month                int64
day                  int64
dayofweek            int64
season              object
temperature        float64
humidity           float64
holiday             object
wind_speed         float64
visibility         float64
dewpoint_temp      float64
solar_radiation    float64
rainfall           float64
snowfall             int64
dtype: object

In [132]:
df_seoul = df_seoul.drop(['rent_count', 'year'], axis =1)

In [94]:
df_seoul['datetime'] = pd.to_datetime(df_seoul['date'])
df_seoul['month'] = df_seoul.datetime.dt.month
df_seoul['day_of_week'] = df_seoul.datetime.dt.dayofweek # mon, tue ... 
df_seoul['day'] = df_seoul.datetime.dt.day

In [95]:
df_seoul_1 = df_seoul.drop(['Unnamed: 0.1','date','rep_count', 'predicted_rep_count', 'predicted_rent_count', 'rent_count'], axis=1)
df_seoul_1.columns

Index(['Unnamed: 0', 'season', 'holiday', 'snowfall', 'rainfall', 'rent_count',
       'temperature', 'humidity', 'wind_speed', 'visibility', 'dewpoint_temp',
       'solar_radiation', 'datetime', 'month', 'day_of_week', 'day'],
      dtype='object')

### Automated Feature Engineering

In [133]:
es = ft.EntitySet(id = 'bikes')

# adding a dataframe 
es.entity_from_dataframe(entity_id = 'seoul_bike', dataframe = df_seoul, index = 'Unnamed: 0')

Entityset: bikes
  Entities:
    seoul_bike [Rows: 353, Columns: 14]
  Relationships:
    No relationships

In [134]:
df_seoul.dtypes

Unnamed: 0           int64
month                int64
day                  int64
dayofweek            int64
season              object
temperature        float64
humidity           float64
holiday             object
wind_speed         float64
visibility         float64
dewpoint_temp      float64
solar_radiation    float64
rainfall           float64
snowfall             int64
dtype: object

In [135]:
es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Month",
                    index="month")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Day",
                    index="day")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Day_of_week",
                    index="dayofweek")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Season",
                    index="season")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Holiday",
                    index="holiday")

print(es)

Entityset: bikes
  Entities:
    seoul_bike [Rows: 353, Columns: 14]
    Month [Rows: 12, Columns: 1]
    Day [Rows: 31, Columns: 1]
    Day_of_week [Rows: 7, Columns: 1]
    Season [Rows: 4, Columns: 1]
    Holiday [Rows: 2, Columns: 1]
  Relationships:
    seoul_bike.month -> Month.month
    seoul_bike.day -> Day.day
    seoul_bike.dayofweek -> Day_of_week.dayofweek
    seoul_bike.season -> Season.season
    seoul_bike.holiday -> Holiday.holiday


In [136]:
feature_matrix, feature_names = ft.dfs(entityset=es, 
target_entity = 'seoul_bike', 
max_depth = 2, 
verbose = 1, 
n_jobs = 3)

Built 298 features
EntitySet scattered to 3 workers in 3 seconds                                                                          
Elapsed: 00:00 | Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████


In [137]:
list(feature_matrix.columns)

['month',
 'day',
 'dayofweek',
 'season',
 'temperature',
 'humidity',
 'holiday',
 'wind_speed',
 'visibility',
 'dewpoint_temp',
 'solar_radiation',
 'rainfall',
 'snowfall',
 'Month.COUNT(seoul_bike)',
 'Month.MAX(seoul_bike.dewpoint_temp)',
 'Month.MAX(seoul_bike.humidity)',
 'Month.MAX(seoul_bike.rainfall)',
 'Month.MAX(seoul_bike.snowfall)',
 'Month.MAX(seoul_bike.solar_radiation)',
 'Month.MAX(seoul_bike.temperature)',
 'Month.MAX(seoul_bike.visibility)',
 'Month.MAX(seoul_bike.wind_speed)',
 'Month.MEAN(seoul_bike.dewpoint_temp)',
 'Month.MEAN(seoul_bike.humidity)',
 'Month.MEAN(seoul_bike.rainfall)',
 'Month.MEAN(seoul_bike.snowfall)',
 'Month.MEAN(seoul_bike.solar_radiation)',
 'Month.MEAN(seoul_bike.temperature)',
 'Month.MEAN(seoul_bike.visibility)',
 'Month.MEAN(seoul_bike.wind_speed)',
 'Month.MIN(seoul_bike.dewpoint_temp)',
 'Month.MIN(seoul_bike.humidity)',
 'Month.MIN(seoul_bike.rainfall)',
 'Month.MIN(seoul_bike.snowfall)',
 'Month.MIN(seoul_bike.solar_radiation)',
 

In [138]:
ori =  pd.read_csv('../data/seoul_bike_agg_mean.csv')
ori[['Unnamed: 0', 'rent_count', 'year']]

Unnamed: 0.1,Unnamed: 0,rent_count,year
0,1,9539,2017
1,2,8523,2017
2,3,7222,2017
3,4,8729,2017
4,5,8307,2017
...,...,...,...
348,349,17162,2018
349,350,16282,2018
350,351,16524,2018
351,352,16423,2018


In [140]:
pd.concat([feature_matrix.reset_index(), ori[['Unnamed: 0', 'rent_count', 'year']]], axis =1).to_csv("seoul_bike_agg_mean_FG_new.csv")

In [77]:
feature_matrix.to_csv("seoul_bike_agg_median_FG_new.csv")
#files.download("seoul_bike_agg_median_FG.csv")
#feature_matrix.to_csv("seoul_bike_agg_mean_FG.csv")
#files.download("seoul_bike_agg_mean_FG.csv")