In [89]:
#!pip install featuretools

In [3]:
#!pip install numpy --upgrade

In [6]:
import pandas as pd
import featuretools as ft

In [7]:
df_seoul = pd.read_csv('../data/seoul_bike_agg_median.csv')
df_seoul_ori = pd.read_csv('../data/7days_demo.csv')

In [8]:
df_seoul.dtypes

Unnamed: 0           int64
rent_count           int64
year                 int64
month                int64
day                  int64
day_of_week          int64
season              object
temperature        float64
humudity           float64
holiday             object
wind_speed         float64
visibility           int64
dewpoint_temp      float64
solar_radiation    float64
rainfall           float64
snowfall             int64
dtype: object

In [9]:
df_seoul = df_seoul.drop(['rent_count', 'year'], axis =1)

In [10]:
df_seoul_ori['datetime'] = pd.to_datetime(df_seoul_ori['date'])
df_seoul_ori['month'] = df_seoul_ori.datetime.dt.month
df_seoul_ori['day_of_week'] = df_seoul_ori.datetime.dt.dayofweek # mon, tue ... 
df_seoul_ori['day'] = df_seoul_ori.datetime.dt.day
df_seoul_ori['year'] = df_seoul_ori.datetime.dt.year

In [11]:
df_seoul_ori.dtypes

Unnamed: 0                       int64
Unnamed: 0.1                     int64
date                            object
season                          object
holiday                         object
snowfall                         int64
rainfall                       float64
rent_count                       int64
temperature                    float64
humidity                       float64
wind_speed                     float64
visibility                       int64
dewpoint_temp                  float64
solar_radiation                  int64
rep_count                        int64
predicted_rep_count            float64
predicted_rent_count           float64
datetime                datetime64[ns]
month                            int64
day_of_week                      int64
day                              int64
year                             int64
dtype: object

In [12]:
df_seoul = df_seoul_ori.drop(['Unnamed: 0.1','date','datetime','rep_count', 'predicted_rep_count', 'predicted_rent_count', 'rent_count', 'year'], axis=1)
df_seoul.columns

Index(['Unnamed: 0', 'season', 'holiday', 'snowfall', 'rainfall',
       'temperature', 'humidity', 'wind_speed', 'visibility', 'dewpoint_temp',
       'solar_radiation', 'month', 'day_of_week', 'day'],
      dtype='object')

### Automated Feature Engineering

In [13]:
es = ft.EntitySet(id = 'bikes')

# adding a dataframe 
es.entity_from_dataframe(entity_id = 'seoul_bike', dataframe = df_seoul, index = 'Unnamed: 0')

Entityset: bikes
  Entities:
    seoul_bike [Rows: 7, Columns: 14]
  Relationships:
    No relationships

In [14]:
df_seoul.dtypes

Unnamed: 0           int64
season              object
holiday             object
snowfall             int64
rainfall           float64
temperature        float64
humidity           float64
wind_speed         float64
visibility           int64
dewpoint_temp      float64
solar_radiation      int64
month                int64
day_of_week          int64
day                  int64
dtype: object

In [15]:
es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Month",
                    index="month")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Day",
                    index="day")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Day_of_week",
                    index="day_of_week")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Season",
                    index="season")

es.normalize_entity(base_entity_id="seoul_bike",
                    new_entity_id="Holiday",
                    index="holiday")

print(es)

Entityset: bikes
  Entities:
    seoul_bike [Rows: 7, Columns: 14]
    Month [Rows: 1, Columns: 1]
    Day [Rows: 7, Columns: 1]
    Day_of_week [Rows: 7, Columns: 1]
    Season [Rows: 1, Columns: 1]
    Holiday [Rows: 1, Columns: 1]
  Relationships:
    seoul_bike.month -> Month.month
    seoul_bike.day -> Day.day
    seoul_bike.day_of_week -> Day_of_week.day_of_week
    seoul_bike.season -> Season.season
    seoul_bike.holiday -> Holiday.holiday


In [16]:
feature_matrix, feature_names = ft.dfs(entityset=es, 
target_entity = 'seoul_bike', 
max_depth = 2, 
verbose = 1, 
n_jobs = 3)

Built 298 features
EntitySet scattered to 3 workers in 3 seconds                                                                          
Elapsed: 00:01 | Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████


In [17]:
list(feature_matrix.columns)

['season',
 'holiday',
 'snowfall',
 'rainfall',
 'temperature',
 'humidity',
 'wind_speed',
 'visibility',
 'dewpoint_temp',
 'solar_radiation',
 'month',
 'day_of_week',
 'day',
 'Month.COUNT(seoul_bike)',
 'Month.MAX(seoul_bike.dewpoint_temp)',
 'Month.MAX(seoul_bike.humidity)',
 'Month.MAX(seoul_bike.rainfall)',
 'Month.MAX(seoul_bike.snowfall)',
 'Month.MAX(seoul_bike.solar_radiation)',
 'Month.MAX(seoul_bike.temperature)',
 'Month.MAX(seoul_bike.visibility)',
 'Month.MAX(seoul_bike.wind_speed)',
 'Month.MEAN(seoul_bike.dewpoint_temp)',
 'Month.MEAN(seoul_bike.humidity)',
 'Month.MEAN(seoul_bike.rainfall)',
 'Month.MEAN(seoul_bike.snowfall)',
 'Month.MEAN(seoul_bike.solar_radiation)',
 'Month.MEAN(seoul_bike.temperature)',
 'Month.MEAN(seoul_bike.visibility)',
 'Month.MEAN(seoul_bike.wind_speed)',
 'Month.MIN(seoul_bike.dewpoint_temp)',
 'Month.MIN(seoul_bike.humidity)',
 'Month.MIN(seoul_bike.rainfall)',
 'Month.MIN(seoul_bike.snowfall)',
 'Month.MIN(seoul_bike.solar_radiation)',

In [37]:
feature_matrix['Month.COUNT(seoul_bike)']

Unnamed: 0
0    7
1    7
2    7
3    7
4    7
5    7
6    7
Name: Month.COUNT(seoul_bike), dtype: int64

In [19]:
df_merge = df_seoul_ori[['rent_count', 'year','date','datetime','rep_count', 'predicted_rep_count', 'predicted_rent_count', 'rent_count']]

In [26]:
#df_merge = pd.read_csv('../data/seoul_bike_agg_median.csv')[['rent_count', 'year']]

In [27]:
pd.concat([feature_matrix.reset_index(), df_merge], axis =1).to_csv("../data/7days_demo_FG.csv")

In [77]:
feature_matrix.to_csv("seoul_bike_agg_median_FG_new.csv")
#files.download("seoul_bike_agg_median_FG.csv")
#feature_matrix.to_csv("seoul_bike_agg_mean_FG.csv")
#files.download("seoul_bike_agg_mean_FG.csv")