# Linear Regression

- R squared score 0.4546

Best params -  
Random state = 40164  
Split size = 0.2  

In [11]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('Seoul Bikes Clean')
data.head()

Unnamed: 0,Rented Bike Count,Temp,Humidity,Dew_temp,Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm)
0,254,-5.2,37,-17.6,0.0,0.0,0.0
1,204,-5.5,38,-17.6,0.0,0.0,0.0
2,173,-6.0,39,-17.7,0.0,0.0,0.0
3,107,-6.2,40,-17.6,0.0,0.0,0.0
4,78,-6.0,36,-18.6,0.0,0.0,0.0


In [3]:
def normalize(value, col):
    return (value - min(col))/(max(col) - min(col))

In [4]:
for column in data.columns.to_list()[1:]:
    data[column] = [normalize(sample, data[column]) for sample in data[column]]
    print('Normalized', column)

Temp
Humidity
Dew_temp
Solar Radiation (MJ/m2)
Rainfall(mm)
Snowfall (cm)


In [5]:
data.head()

Unnamed: 0,Rented Bike Count,Temp,Humidity,Dew_temp,Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm)
0,254,0.22028,0.377551,0.224913,0.0,0.0,0.0
1,204,0.215035,0.387755,0.224913,0.0,0.0,0.0
2,173,0.206294,0.397959,0.223183,0.0,0.0,0.0
3,107,0.202797,0.408163,0.224913,0.0,0.0,0.0
4,78,0.206294,0.367347,0.207612,0.0,0.0,0.0


In [33]:
y = data['Rented Bike Count'].values.reshape(-1, 1)
X = data.drop('Rented Bike Count', axis=1)

In [34]:
X.shape

(8760, 6)

In [35]:
y.shape

(8760, 1)

In [42]:
def train_eval(itern, X, y):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=itern, test_size=0.2)
    lin_model = LinearRegression()
    lin_model.fit(X_train, y_train)
    
    y_pred = lin_model.predict(X_test)
    
    return r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred)

In [46]:
max_r2 = -999
min_mse = 999

for i in range(0, 50_000):
    r2, mse = train_eval(i, X, y)
    
    if r2 > max_r2:
        print(f'r2 {round(r2, 5)} MSE {round(mse, 8)} RandomState {i}')
        max_r2 = r2
        
    if mse < min_mse:
        print(f'r2 {round(r2, 5)} MSE {round(mse, 8)} RandomState {i}')
        min_mse = mse
        
    if i%5000 == 0:
        print(f'----------------------------------------------------------------------------------> RS {i}')

r2 0.37896 MSE 259917.54803779 RandomState 0
----------------------------------------------------------------------------------> RS 0
r2 0.38409 MSE 254551.12113191 RandomState 1
r2 0.40013 MSE 249231.23189104 RandomState 2
r2 0.40235 MSE 259537.60043742 RandomState 6
r2 0.40565 MSE 241340.21926802 RandomState 25
r2 0.40753 MSE 242119.45733643 RandomState 33
r2 0.42482 MSE 237774.52979986 RandomState 49
r2 0.43919 MSE 239374.51562949 RandomState 98
----------------------------------------------------------------------------------> RS 5000
r2 0.44003 MSE 231289.59569817 RandomState 6598
r2 0.44173 MSE 223796.45123463 RandomState 7381
----------------------------------------------------------------------------------> RS 10000
r2 0.44665 MSE 234698.05967291 RandomState 14770
----------------------------------------------------------------------------------> RS 15000
----------------------------------------------------------------------------------> RS 20000
-------------------------------

#### without normalization

In [52]:
data2 = pd.read_csv('Seoul Bikes Clean')
data2.head()

Unnamed: 0,Rented Bike Count,Temp,Humidity,Dew_temp,Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm)
0,254,-5.2,37,-17.6,0.0,0.0,0.0
1,204,-5.5,38,-17.6,0.0,0.0,0.0
2,173,-6.0,39,-17.7,0.0,0.0,0.0
3,107,-6.2,40,-17.6,0.0,0.0,0.0
4,78,-6.0,36,-18.6,0.0,0.0,0.0


In [53]:
y2 = data2['Rented Bike Count'].values.reshape(-1, 1)
X2 = data2.drop('Rented Bike Count', axis=1)

In [54]:
max_r2 = -999
min_mse = 999

for i in range(0, 50_000):
    r2, mse = train_eval(i, X2, y2)
    
    if r2 > max_r2:
        print(f'r2 {round(r2, 5)} MSE {round(mse, 8)} RandomState {i}')
        max_r2 = r2
        
    if mse < min_mse:
        print(f'r2 {round(r2, 5)} MSE {round(mse, 8)} RandomState {i}')
        min_mse = mse
        
    if i%5000 == 0:
        print(f'----------------------------------------------------------------------------------> RS {i}')

r2 0.37896 MSE 259917.54803779 RandomState 0
----------------------------------------------------------------------------------> RS 0
r2 0.38409 MSE 254551.12113191 RandomState 1
r2 0.40013 MSE 249231.23189104 RandomState 2
r2 0.40235 MSE 259537.60043742 RandomState 6
r2 0.40565 MSE 241340.21926802 RandomState 25
r2 0.40753 MSE 242119.45733643 RandomState 33
r2 0.42482 MSE 237774.52979986 RandomState 49
r2 0.43919 MSE 239374.51562949 RandomState 98
----------------------------------------------------------------------------------> RS 5000
r2 0.44003 MSE 231289.59569817 RandomState 6598
r2 0.44173 MSE 223796.45123463 RandomState 7381
----------------------------------------------------------------------------------> RS 10000
r2 0.44665 MSE 234698.05967291 RandomState 14770
----------------------------------------------------------------------------------> RS 15000


KeyboardInterrupt: 