# Estimating the  number of bikes to be rented using LLS Regression



In [1]:
import pandas as pd 
import numpy as np 
from numpy.linalg import inv
from numpy.linalg import norm
from numpy.linalg import qr
from numpy.linalg import svd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error 
from sklearn.metrics.pairwise import manhattan_distances
from sklearn import linear_model

In [2]:
#Upload Data
data = pd.read_csv("../DATASETS/Bike-sharing-Dataset/day.csv",sep=',',header=None) 
# Preview Data
data

In [3]:
# Clean data & preview
data = data.drop([0,1,13,14],axis=1)  #we need to estimate column 15 = column 13 + column 14 (drop 13, 14)
data = data.drop([0],axis=0)
data

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,12,15
1,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,985
2,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,801
3,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,1349
4,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,1562
5,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,1600
...,...,...,...,...,...,...,...,...,...,...,...,...
727,1,1,12,0,4,1,2,0.254167,0.226642,0.652917,0.350133,2114
728,1,1,12,0,5,1,2,0.253333,0.255046,0.59,0.155471,3095
729,1,1,12,0,6,0,2,0.253333,0.2424,0.752917,0.124383,1341
730,1,1,12,0,0,0,1,0.255833,0.2317,0.483333,0.350754,1796


In [4]:
#Convert to numpoy matrix
data_matrix = data.as_matrix()

#convert to float, shuffle  and split
data_matrix = data_matrix.astype(float)
np.random.shuffle(data_matrix)
y = data_matrix[:,11]
A = data_matrix[:,:11]


A_train, A_test, y_train, y_test = train_test_split(A, y, test_size=0.2, random_state=42)
#check
print(type(A[1]))
print("Feature space:", A_train.shape)
print("Labels space:" ,y_train.shape)


<class 'numpy.ndarray'>
Feature space: (584, 11)
Labels space: (584,)


  


# Linear Least Squares: Backslash method

For a linear model: 
<br>$y=Ax$ where: 
<br>$y$ is a label vector $\in R^{m}$  and $A$ is the feature matrix $\in R^{mxn}$  
<br> The optimal solution for  $x$ is  $\hat{x} = (A^{T}A)^{-1}A^{T}y$ 

In [5]:
#1/(ATA)
AAI = inv(np.matmul(A_train.transpose(), A_train))
#ATy
Ay = np.matmul(A_train.transpose(),y_train)
# Our Linear model weights
x = np.matmul(AAI,Ay)

#test residual error
res_train = np.dot(A_train,x)  - y_train  #Residual error 
#test residual error
res_test = np.dot(A_test,x)  - y_test  #Residual error 



rms_train = np.sqrt(mean_squared_error(np.dot(A_train,x), y_train))
rms_test  = np.sqrt(mean_squared_error(np.dot(A_test ,x), y_test ))


print('Average test  RMS: ',  rms_test )
print('Average train RMS: ', rms_train)

print('Value of weights: ', x)





Average test  RMS:  861.1879988228834
Average train RMS:  895.86815945173
Value of weights:  [ 531.12553283 2153.64085211  -31.52781555 -307.66989055   85.83574508
  249.8353336  -696.547066    882.52052837 5155.27579969  159.41204802
 -895.56597293]


#  Linear Least Squares: QR Decomposition

When $A$ is decomposed into $A=QR$
<br> The optimal solution for  $x$ is   $\hat{x} = R^{-1}Q^{T}y$ 

In [6]:
Q, R = qr(A_train)
# x = 1/R * Q'y
x = np.matmul(inv(R) , np.matmul(Q.transpose(),y_train))

#test residual error
res_train = np.dot(A_train,x)  - y_train  #Residual error 
#test residual error
res_test = np.dot(A_test,x)  - y_test  #Residual error 



rms_train = np.sqrt(mean_squared_error(np.dot(A_train,x), y_train))
rms_test  = np.sqrt(mean_squared_error(np.dot(A_test ,x), y_test ))


print('Average test  RMS: ',  rms_test )
print('Average train RMS: ', rms_train)

print('Value of weights: ', x)

Average test  RMS:  861.1879988228422
Average train RMS:  895.86815945173
Value of weights:  [ 531.12553283 2153.64085211  -31.52781555 -307.66989055   85.83574508
  249.8353336  -696.547066    882.52052836 5155.27579969  159.41204802
 -895.56597293]


# Linear Least Squares: SVD Decomposition

When $A$ is decomposed into $A=U\Sigma V^{T}$
<br> The optimal solution for  $x$ is   $\hat{x} = V\Sigma^{-1}U^{T}y$ 

In [7]:
U, S, V = svd(A_train, full_matrices=False)

x = np.matmul(np.matmul(V.transpose(),inv(np.diag(S))), np.matmul(U.transpose(),y_train))


#test residual error
res_train = np.dot(A_train,x)  - y_train  #Residual error 
#test residual error
res_test = np.dot(A_test,x)  - y_test  #Residual error 



rms = np.sqrt(mean_squared_error(np.dot(A_train,x), y_train))



rms_train = np.sqrt(mean_squared_error(np.dot(A_train,x), y_train))
rms_test  = np.sqrt(mean_squared_error(np.dot(A_test ,x), y_test ))


print('Average test  RMS: ',  rms_test )
print('Average train RMS: ', rms_train)

print('Value of weights: ', x)

Average test  RMS:  861.1879988228417
Average train RMS:  895.86815945173
Value of weights:  [ 531.12553283 2153.64085211  -31.52781555 -307.66989055   85.83574508
  249.8353336  -696.547066    882.52052836 5155.27579969  159.41204802
 -895.56597293]


# Tikhonov Regularized Linear Least Squares


In [8]:
U, S, V = svd(A_train, full_matrices=False)
λ = 0.1
inner = np.divide(S, (S**2)+λ**2)
x = np.matmul(np.matmul(V.transpose(),np.diag(inner)), np.matmul(U.transpose(),y_train))



#test residual error
res_train = np.dot(A_train,x)  - y_train  #Residual error 
#test residual error
res_test = np.dot(A_test,x)  - y_test  #Residual error 



rms_train = np.sqrt(mean_squared_error(np.dot(A_train,x), y_train))
rms_test  = np.sqrt(mean_squared_error(np.dot(A_test ,x), y_test ))


print('Test  RMS: ',  rms_test )
print('Train RMS: ', rms_train)

print('Value of weights: ', x)





Test  RMS:  861.6241366568261
Train RMS:  895.8735500796483
Value of weights:  [ 531.65991594 2153.82892787  -31.60594536 -307.72162757   85.85383107
  250.21521558 -697.25198702 1002.45279161 5018.86105648  166.54507608
 -893.18522544]


# Lasso Regularized Linear Least Squares



In [9]:
# Use Scikit-Learn for Lasso (Coordinate descent is used)

lm = linear_model.Lasso(alpha=0.1)
lm.fit(A_train,y_train)
rms_train = np.sqrt(mean_squared_error(lm.predict(A_train), y_train))
rms_test  = np.sqrt(mean_squared_error(lm.predict(A_test), y_test ))

print('Test  RMS: ',  rms_test )
print('Train RMS: ', rms_train)
print('Value of weights: ', lm.coef_)





Test  RMS:  849.4832360644964
Train RMS:  870.669997443745
Value of weights:  [  499.10193395  2050.42155592   -32.32493141  -487.17209674
    58.69409705   153.6852087   -650.35068256  3002.35910447
  2360.77948295 -1000.56263996 -2619.74546344]
