# Using different algorithms

#### Importing required libraries

In [1]:
#General imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

#Regression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor,AdaBoostRegressor

#Model selection
from sklearn.model_selection import train_test_split,cross_validate

#Evaluation
from sklearn.metrics import mean_squared_error

#### Loading Dataset

In [2]:
df_train = pd.read_csv("C:\\Users\\rkrish95\\Desktop\\DeveloperDen\\DataSet\\RentalDemand\\train.csv")
df_test = pd.read_csv("C:\\Users\\rkrish95\\Desktop\\DeveloperDen\\DataSet\\RentalDemand\\test.csv")

#### date column type conversion

In [3]:
df_train['date'] = pd.to_datetime(df_train['date'], errors='coerce')
df_test['date'] = pd.to_datetime(df_test['date'], errors='coerce')

In [4]:
pd.date_range(start='1/1/2018', end='1/08/2018')

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df_train

Unnamed: 0,date,hour,demand
0,2018-08-18,9,91
1,2018-08-18,10,21
2,2018-08-18,13,23
3,2018-08-18,14,104
4,2018-08-18,15,81
...,...,...,...
18242,2021-02-28,19,95
18243,2021-02-28,20,88
18244,2021-02-28,21,39
18245,2021-02-28,22,104


In [6]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18247 entries, 0 to 18246
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    18247 non-null  datetime64[ns]
 1   hour    18247 non-null  int64         
 2   demand  18247 non-null  int64         
dtypes: datetime64[ns](1), int64(2)
memory usage: 427.8 KB


In [7]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7650 entries, 0 to 7649
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    7650 non-null   datetime64[ns]
 1   hour    7650 non-null   int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 119.7 KB


prophet -  trend and seas

np.sin - feat

np.cos - feat

spline - 

#### Extracting details from date column

In [8]:
#Extracting Day, month, year, hour, day of week
df_train['day'] = df_train['date'].dt.day
df_train['month'] = df_train['date'].dt.month
df_train['year'] = df_train['date'].dt.year
df_train['weekday'] = df_train['date'].dt.weekday

In [9]:
#Extracting Day, month, year, hour, day of week
df_test['day'] = df_test['date'].dt.day
df_test['month'] = df_test['date'].dt.month
df_test['year'] = df_test['date'].dt.year
df_test['weekday'] = df_test['date'].dt.weekday

In [10]:
X_train = df_train.drop(['demand','date'], axis=1)
y_train = df_train['demand']
X_test = df_train.drop(['demand','date'], axis=1)
y_test = df_train['demand']

In [11]:
X_train.head(1)

Unnamed: 0,hour,day,month,year,weekday
0,9,18,8,2018,5


#### Adding all the models to the dictonary 

In [12]:
tree_models = {'RandomForestRegressor' : RandomForestRegressor(n_estimators=100, max_depth=5, random_state=0),
          'AdaBoostRegressor' : AdaBoostRegressor(n_estimators=100, random_state=0),
          'GradientBoostingRegressor' : GradientBoostingRegressor(n_estimators=100, max_depth=5, random_state=0)
              }
print([tree_models.values()])

[dict_values([RandomForestRegressor(max_depth=5, random_state=0), AdaBoostRegressor(n_estimators=100, random_state=0), GradientBoostingRegressor(max_depth=5, random_state=0)])]


#### Validate all model score 

In [13]:
tree_models = {'RandomForestRegressor' : RandomForestRegressor(n_estimators=100, max_depth=5, random_state=0),
          'AdaBoostRegressor' : AdaBoostRegressor(n_estimators=100, random_state=0),
          'GradientBoostingRegressor' : GradientBoostingRegressor(n_estimators=100, max_depth=5, random_state=0)}
rmse = []
dict_m = {}
list_of_dict_values = list(tree_models.values())
print(list_of_dict_values)
for iterator in list_of_dict_values:
    print(iterator)
    cv_results = cross_validate(iterator, X_train, y_train, cv=5,scoring='neg_mean_squared_error', return_train_score=True)
    rmse.append(np.sqrt(-cv_results['test_score']).mean())
    dict_m[iterator.__class__.__name__] = np.sqrt(-cv_results['test_score']).mean()
    print(rmse)

[RandomForestRegressor(max_depth=5, random_state=0), AdaBoostRegressor(n_estimators=100, random_state=0), GradientBoostingRegressor(max_depth=5, random_state=0)]
RandomForestRegressor(max_depth=5, random_state=0)
[37.22169594007861]
AdaBoostRegressor(n_estimators=100, random_state=0)
[37.22169594007861, 38.598487891867805]
GradientBoostingRegressor(max_depth=5, random_state=0)
[37.22169594007861, 38.598487891867805, 36.06746669040064]


In [14]:
# RMSE values for each model
rmse

[37.22169594007861, 38.598487891867805, 36.06746669040064]

In [15]:
print(dict_m)

{'RandomForestRegressor': 37.22169594007861, 'AdaBoostRegressor': 38.598487891867805, 'GradientBoostingRegressor': 36.06746669040064}


In [16]:
min(dict_m, key=dict_m.get)

'GradientBoostingRegressor'

In [17]:
print(tree_models[min(dict_m, key=dict_m.get)])

GradientBoostingRegressor(max_depth=5, random_state=0)


#### choosing the model

In [18]:
gbr = tree_models[min(dict_m, key=dict_m.get)]
gbr.fit(X_train, y_train)
gbr_pred = gbr.predict(X_test)

In [19]:
gbr_rmse = np.sqrt(mean_squared_error(y_test, gbr_pred))
gbr_rmse

31.844043025223083

In [2]:
pip install xgboost

Collecting xgboost
  Downloading xgboost-1.7.1-py3-none-win_amd64.whl (89.1 MB)
Installing collected packages: xgboost
Successfully installed xgboost-1.7.1
Note: you may need to restart the kernel to use updated packages.


In [4]:
from xgboost import XGBRegressor

In [5]:
xgb = XGBRegressor(n_estimators=100,max_depth=7,eta=0.05,subsample=0.6,colsample_bytree=1)

In [None]:
X_train = 

In [None]:
xgb_cv = cross_validate(xgb,X_train,y_train,cv=5,scoring='neg_mean_squared_error',return_train_score=True)