In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sea
from sklearn.preprocessing import StandardScaler
from prettytable import PrettyTable

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('/Users/ankusmanish/Desktop/Training/Datasets/Week9/bike_sharing.csv')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17379 entries, 0 to 17378
Data columns (total 17 columns):
instant       17379 non-null int64
dteday        17379 non-null object
season        17379 non-null int64
yr            17379 non-null int64
mnth          17379 non-null int64
hr            17379 non-null int64
holiday       17379 non-null int64
weekday       17379 non-null int64
workingday    17379 non-null int64
weathersit    17379 non-null int64
temp          17379 non-null float64
atemp         17379 non-null float64
hum           17379 non-null float64
windspeed     17379 non-null float64
casual        17379 non-null int64
registered    17379 non-null int64
cnt           17379 non-null int64
dtypes: float64(4), int64(12), object(1)
memory usage: 2.3+ MB


In [4]:
data.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [5]:
data['weathersit'].value_counts()

1    11413
2     4544
3     1419
4        3
Name: weathersit, dtype: int64

In [6]:
data.drop(['instant','dteday'], axis = 1, inplace = True)

In [7]:
data.head()

Unnamed: 0,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [8]:
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
y = np.reshape(y,(len(y),1))

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [10]:
algos = {'DecisionTreeRegressor':DecisionTreeRegressor(), 'RandomForestRegressor':RandomForestRegressor(), 
         'Support Vector Regressor':SVR(), 'Linear Regression':LinearRegression()}

In [11]:
def models(X_train,y_train, X_test, y_test):
    t = PrettyTable(['Classifier', 'RMSE'])
    
    for key, value in algos.items():
        model = value
        
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        print()

        print(key.upper(), '\n')
        t.add_row([key, np.sqrt(mean_squared_error(y_test, y_pred))])
        print('MAE : {}'.format(mean_absolute_error(y_test, y_pred)))
        print('MSE : {}'.format(mean_squared_error(y_test, y_pred)))
        print('RMSE : {}'.format(np.sqrt(mean_squared_error(y_test, y_pred))))
        print('\n')
        print('-' * 100)
    print(t)


In [12]:
models(X_train, y_train, X_test, y_test)


DECISIONTREEREGRESSOR 

MAE : 2.288454161871883
MSE : 21.485615650172612
RMSE : 4.635257883890886


----------------------------------------------------------------------------------------------------

RANDOMFORESTREGRESSOR 

MAE : 1.261411584196394
MSE : 8.179842731108554
RMSE : 2.8600424351936726


----------------------------------------------------------------------------------------------------

SUPPORT VECTOR REGRESSOR 

MAE : 116.83387248536164
MSE : 29572.262700464336
RMSE : 171.9658765582996


----------------------------------------------------------------------------------------------------

LINEAR REGRESSION 

MAE : 9.650816990803984e-14
MSE : 2.0202774133867358e-26
RMSE : 1.4213646306935935e-13


----------------------------------------------------------------------------------------------------
+--------------------------+------------------------+
|        Classifier        |          RMSE          |
+--------------------------+------------------------+
|  DecisionTreeRe