# Testing on predicting casual and Register

In [1]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from xgboost import XGBRegressor, plot_importance
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, MinMaxScaler, RobustScaler, OneHotEncoder
from sklearn.metrics import r2_score
import pickle

import plotly.express as px

  from pandas import MultiIndex, Int64Index


In [2]:
df = pd.read_csv("../Datas/data.csv")

In [5]:
df_casual_register = pd.read_csv("../Datas/train.csv")

In [6]:
df_casual_register.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1


In [7]:
df_casual_register['month'] = df['month']
df_casual_register['year'] = df['year']
df_casual_register.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count,month,year
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16,1,2011
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40,1,2011
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32,1,2011
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13,1,2011
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1,1,2011


In [8]:
y_train_casual = np.log1p(df_casual_register[(df_casual_register.month <=  8) | (df_casual_register.year == 2011)]["casual"])
y_train_registered = np.log1p(df_casual_register[(df_casual_register.month <=  8) | (df_casual_register.year == 2011)]["registered"])

y_test_casual = np.log1p(df_casual_register[(df_casual_register.month >  8) & (df_casual_register.year == 2012)]["casual"])
y_test_registered = np.log1p(df_casual_register[(df_casual_register.month >  8) & (df_casual_register.year == 2012)]["registered"])

In [3]:
X_train = df[(df.month <=  8) | (df.year == 2011)].drop(["datetime", "weather","count"], axis=1)
X_test = df[(df.month >  8) & (df.year == 2012)].drop(["datetime", "weather","count"], axis=1)
y_train = np.log1p(df[(df.month <=  8) | (df.year == 2011)]["count"])
y_test = np.log1p(df[(df.month >  8) & (df.year == 2012)]["count"])

In [9]:
df_final_X = df.drop(["datetime", "weather","count"], axis=1)
df_final_y = np.log1p(df["count"])

df_final_y_casual = np.log1p(df_casual_register["casual"])
df_final_y_registered = np.log1p(df_casual_register["registered"])


In [10]:
one_hot_features = ["holiday", "workingday", "season", "month", "day", "year"]
standard_feature = ["temp", "humidity", "windspeed", "day_number", "hour"]

one_hot_pipeline = make_pipeline(OneHotEncoder(handle_unknown="ignore"))
standard_pipeline = make_pipeline(StandardScaler())

processor = make_column_transformer((one_hot_pipeline, one_hot_features), (standard_pipeline, standard_feature))

In [None]:
rfr = make_pipeline(processor, RandomForestRegressor(random_state=1))
etr = make_pipeline(processor, ExtraTreesRegressor(random_state=1))
lgbm = make_pipeline(processor, LGBMRegressor(random_state=1))
xgbr = make_pipeline(processor, XGBRegressor(random_state=1))

## Casual

In [13]:
# compare machine learning models for regression
from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from matplotlib import pyplot


# get a list of models to evaluate
def get_models():
    models = dict()
    models['rfr'] = make_pipeline(processor, RandomForestRegressor(random_state=1))
    models['etr'] = make_pipeline(processor, ExtraTreesRegressor(random_state=1))
    models['lgbm'] = make_pipeline(processor, LGBMRegressor(random_state=1))
    models['xgbr'] = make_pipeline(processor, XGBRegressor(random_state=1))
    
    return models

# evaluate a given model using cross-validation
def evaluate_model(model, X_train, y_train_casual, X_test, y_test_casual):
	
	model.fit(X_train, y_train_casual)
	scores = model.score(X_test, y_test_casual)

	return scores

# get the models to evaluate
models = get_models()
# evaluate the models and store results
results, names = list(), list()
print ('####################################')
print ('##     Prediction for Casual      ##')
print ('####################################')
for name, model in models.items():
	scores = evaluate_model(model,X_train, y_train_casual, X_test, y_test_casual)
	results.append(scores)
	names.append(name)
	print('>%s %.3f' % (name, mean(scores)))


####################################
##     Prediction for Casual      ##
####################################
>rfr 0.830
>etr 0.854
>lgbm 0.865
>xgbr 0.842


## Registered

In [14]:
# compare machine learning models for regression
from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from matplotlib import pyplot


# get a list of models to evaluate
def get_models():
    models = dict()
    models['rfr'] = make_pipeline(processor, RandomForestRegressor(random_state=1))
    models['etr'] = make_pipeline(processor, ExtraTreesRegressor(random_state=1))
    models['lgbm'] = make_pipeline(processor, LGBMRegressor(random_state=1))
    models['xgbr'] = make_pipeline(processor, XGBRegressor(random_state=1))
    
    return models

# evaluate a given model using cross-validation
def evaluate_model(model, X_train, y_train_registered, X_test, y_test_registered):
	
	model.fit(X_train, y_train_registered)
	scores = model.score(X_test, y_test_registered)

	return scores

# get the models to evaluate
models = get_models()
# evaluate the models and store results
results, names = list(), list()
print ('####################################')
print ('##     Prediction for Registered      ##')
print ('####################################')
for name, model in models.items():
	scores = evaluate_model(model,X_train, y_train_registered, X_test, y_test_registered)
	results.append(scores)
	names.append(name)
	print('>%s %.3f' % (name, mean(scores)))


####################################
##     Prediction for Registered      ##
####################################
>rfr 0.901
>etr 0.935
>lgbm 0.910
>xgbr 0.901


In [16]:
models

{'rfr': Pipeline(steps=[('columntransformer',
                  ColumnTransformer(transformers=[('pipeline-1',
                                                   Pipeline(steps=[('onehotencoder',
                                                                    OneHotEncoder(handle_unknown='ignore'))]),
                                                   ['holiday', 'workingday',
                                                    'season', 'month', 'day',
                                                    'year']),
                                                  ('pipeline-2',
                                                   Pipeline(steps=[('standardscaler',
                                                                    StandardScaler())]),
                                                   ['temp', 'humidity',
                                                    'windspeed', 'day_number',
                                                    'hour'])])),
                 ('r

## Create Models for Casual and Registered

In [18]:

models_etr_casual = make_pipeline(processor, ExtraTreesRegressor(random_state=1))
models_lgbm_casual = make_pipeline(processor, LGBMRegressor(random_state=1))
models_xgbr_casual = make_pipeline(processor, XGBRegressor(random_state=1))

models_etr_registered = make_pipeline(processor, ExtraTreesRegressor(random_state=1))
models_lgbm_registered = make_pipeline(processor, LGBMRegressor(random_state=1))
models_xgbr_registered = make_pipeline(processor, XGBRegressor(random_state=1))


In [19]:

models_etr_casual.fit(df_final_X, df_final_y_casual)
models_lgbm_casual.fit(df_final_X, df_final_y_casual)
models_xgbr_casual.fit(df_final_X, df_final_y_casual)

models_etr_registered.fit(df_final_X, df_final_y_registered)
models_lgbm_registered.fit(df_final_X, df_final_y_registered)
models_xgbr_registered.fit(df_final_X, df_final_y_registered)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('pipeline-1',
                                                  Pipeline(steps=[('onehotencoder',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  ['holiday', 'workingday',
                                                   'season', 'month', 'day',
                                                   'year']),
                                                 ('pipeline-2',
                                                  Pipeline(steps=[('standardscaler',
                                                                   StandardScaler())]),
                                                  ['temp', 'humidity',
                                                   'windspeed', 'day_number',
                                                   'hour'])])),
                ('xgbregress...
        

In [20]:
# pickle.dump(models_etr_casual, open('models_etr_casual.sav', 'wb'))
# pickle.dump(models_lgbm_casual, open('models_lgbm_casual.sav', 'wb'))
# pickle.dump(models_xgbr_casual, open('models_xgbr_casual.sav', 'wb'))

# pickle.dump(models_etr_registered, open('models_etr_registered.sav', 'wb'))
# pickle.dump(models_lgbm_registered, open('models_lgbm_registered.sav', 'wb'))
# pickle.dump(models_xgbr_registered, open('models_xgbr_registered.sav', 'wb'))
