# Daily Bike Share Regression

## Importing Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


## Importing Dataset

In [2]:
dataset = pd.read_csv('daily-bike-share.csv')
dataset.head()

Unnamed: 0,day,mnth,year,season,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,rentals
0,1,1,2011,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331
1,2,1,2011,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131
2,3,1,2011,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120
3,4,1,2011,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108
4,5,1,2011,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82


In [3]:
dataset.isnull().sum() # Missing values in Dataset

day           0
mnth          0
year          0
season        0
holiday       0
weekday       0
workingday    0
weathersit    0
temp          0
atemp         0
hum           0
windspeed     0
rentals       0
dtype: int64

In [4]:
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size = 0.2, random_state=0)

## Model 1 - Training the Multiple Linear Regression model on the Training set

In [6]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Predicting the Test set results

In [7]:
y_pred = regressor.predict(X_test)

## Using R2_Score For Prediction

In [8]:
from sklearn.metrics import r2_score
r2_score(Y_test,y_pred)

0.6661127587466706

## Using K-Fold For Prediction

In [9]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=regressor, X=X_train, y=Y_train, cv=10)
print("Accuracy : {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation : {:.2f} %".format(accuracies.std()*100))

Accuracy : 66.30 %
Standard Deviation : 7.08 %


## Model 2 - Training the Polynomial Linear Regression model on the Training set

In [17]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(X_train)
new_regressor = LinearRegression() 
new_regressor.fit(X_poly,Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Predicting the Test set results

In [18]:
X_poly_test = poly_reg.fit_transform(X_test)
y_pred = new_regressor.predict(X_poly_test)

## Using R2_Score For Prediction

In [19]:
from sklearn.metrics import r2_score
r2_score(Y_test,y_pred)

0.7642466906870424

## Using K-Fold For Prediction

In [20]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=new_regressor, X=X_poly, y=Y_train, cv=10)
print("Accuracy : {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation : {:.2f} %".format(accuracies.std()*100))

Accuracy : 44.57 %
Standard Deviation : 82.62 %


## Model 3 - Training the Support vector model on the Training set

In [21]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train,Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

## Predicting the Test set results

In [22]:
y_pred = regressor.predict(X_test)

## Using R2_Score For Prediction

In [23]:
from sklearn.metrics import r2_score
r2_score(Y_test,y_pred)

-0.050009376252283744

## Using K-Fold For Prediction

In [25]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=regressor,X=X_train,y=Y_train,cv=10)
print("Accuracy : {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation : {:.2f} %".format(accuracies.std()*100))

Accuracy : -3.86 %
Standard Deviation : 2.35 %


## Model 4 - Training the Decision Tree model on the Training set

In [26]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X_train,Y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')

## Predicting the Test set results

In [27]:
y_pred = regressor.predict(X_test)

## Using R2_Score For Prediction

In [28]:
from sklearn.metrics import r2_score
r2_score(Y_test,y_pred)

0.7532703911437246

## Using K-Fold For Prediction

In [29]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=regressor,X=X_train,y=Y_train,cv=10)
print("Accuracy : {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation : {:.2f} %".format(accuracies.std()*100))

Accuracy : 66.03 %
Standard Deviation : 10.75 %


## Model 4 - Training the Random Forest  model on the Training set

In [30]:
from sklearn.ensemble import RandomForestClassifier
regressor = RandomForestClassifier(n_estimators=10,random_state=0)
regressor.fit(X_train,Y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

## Predicting the Test set results

In [31]:
y_pred = regressor.predict(X_test)

## Using R2_Score For Prediction

In [32]:
from sklearn.metrics import r2_score
r2_score(Y_test,y_pred)

0.04775763041069436

## Model 4 - Training the XGBoost model on the Training set **(Best Performance)**

In [34]:
from xgboost import XGBRFRegressor
regressor = XGBRFRegressor()
regressor.fit(X_train,Y_train)



XGBRFRegressor(base_score=0.5, colsample_bylevel=1, colsample_bynode=0.8,
               colsample_bytree=1, gamma=0, learning_rate=1, max_delta_step=0,
               max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
               n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
               silent=None, subsample=0.8, verbosity=1)

## Predicting the Test set results

In [35]:
y_pred = regressor.predict(X_test)

## Using R2_Score For Prediction

In [36]:
from sklearn.metrics import r2_score
r2_score(Y_test,y_pred)

0.7103474331870159

## Using K-Fold For Prediction

In [37]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=regressor,X=X_train,y=Y_train,cv=10)
print("Accuracy : {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation : {:.2f} %".format(accuracies.std()*100))

Accuracy : 73.99 %
Standard Deviation : 6.22 %
