In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import seaborn as sns

In [2]:
df = pd.read_csv("energydata_complete.csv")
df.head(3)

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-11 17:00:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-11 17:10:00,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,2016-01-11 17:20:00,50,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,...,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668


### Finding the value of R square

In [3]:
#Extract the atrribute and label
x = df.loc[:, "T2"]
y = df.loc[:, "T6"]

In [4]:
#Convert to numpy array
X_Array = np.array(x)

In [5]:
from sklearn.linear_model import LinearRegression
Regressor = LinearRegression()
Model  = Regressor.fit(X_Array[:, np.newaxis], y)

In [6]:
y_pred = Regressor.predict(X_Array[:, np.newaxis])

In [7]:
#Evaluating R-Squared 
from sklearn.metrics import r2_score
R_Square = r2_score(y, y_pred)
round(R_Square, 2)

0.64

In [8]:
#Removing the date and lights columns
df['date'] = pd.to_datetime(df['date'])

In [9]:
df = df.drop(columns = ['date', 'lights'])

In [10]:
df.head(3)

Unnamed: 0,Appliances,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,60,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,17.166667,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,60,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,17.166667,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,50,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,45.89,17.166667,...,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668


In [11]:
#Scaling the data. Normalise to a common scale using the min max scaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
Normalised_df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns)

In [12]:
#Creating features and target
Features_df = Normalised_df.drop(columns = ['Appliances'])
Target_Var = Normalised_df['Appliances']

In [13]:
#Split our dataset into the training and testing dataset
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(Features_df, Target_Var, test_size = 0.3, random_state = 42)

In [14]:
#Training the model. fit the model to the training dataset
from sklearn.linear_model import LinearRegression
regression_model = LinearRegression()
Model = regression_model.fit(x_train, y_train)

In [15]:
#obtain predictions
predicted_values = Model.predict(x_test)

In [16]:
#Evaluating the mean absolute error
from sklearn.metrics import mean_absolute_error
MAE = mean_absolute_error(y_test, predicted_values)
round(MAE, 2) 

0.05

In [17]:
#Evaluating Residual Sum of Squares (RSS) 
RSS = np.sum(np.square(y_test - predicted_values))
round(RSS, 2)

45.35

In [18]:
#Evaluating Root Mean Square Error (RMSE) 
from sklearn.metrics import mean_squared_error
RMSE = np.sqrt(mean_squared_error(y_test, predicted_values))
round(RMSE, 3)

0.088

In [19]:
#Evaluating the coefficient of determination 
from sklearn.metrics import r2_score
R_Square = r2_score(y_test, predicted_values)
round(R_Square, 2)

0.15

In [20]:
#Coefficient of the model
coeff_df = pd.DataFrame(Model.coef_, index = Features_df.columns, columns=['Model_Coeff'])
coeff_df

Unnamed: 0,Model_Coeff
T1,-0.003281
RH_1,0.553547
T2,-0.236178
RH_2,-0.456698
T3,0.290627
RH_3,0.096048
T4,0.028981
RH_4,0.026386
T5,-0.015657
RH_5,0.016006


In [21]:
#Minimum weight
coeff_df['Model_Coeff'].min()

-0.4566979483385004

In [22]:
#Maximum Weight
coeff_df['Model_Coeff'].max()

0.5535465998386391

In [23]:
#Create a Regularized RIDGE Model and its coefficients
from sklearn.linear_model import Ridge
ridge = Ridge(alpha=0.4)
ridge.fit(x_train, y_train)
print("Ridge model:", (ridge.coef_))

Ridge model: [-0.01840621  0.5195253  -0.20139673 -0.41107123  0.28808681  0.0951346
  0.02738389  0.02457853 -0.01985322  0.01615237  0.21729178  0.03551862
  0.01009781 -0.04597696  0.10102815 -0.15683005 -0.1889163  -0.04136654
 -0.26217227  0.00658387 -0.05472365  0.03026762  0.01207649  0.08312757
  0.00074817  0.00074817]


In [24]:
#obtain predictions with ridge
predRidge_values = ridge.predict(x_test)

In [25]:
#Evaluating Root Mean Square Error (RMSE) with ridge
RMSE_Ridge = np.sqrt(mean_squared_error(y_test, predRidge_values))
round(RMSE_Ridge, 3)

0.088

In [26]:
#Create a Regularized LASSO Model and its coefficients
from sklearn.linear_model import Lasso
lasso = Lasso(alpha=0.001)
lasso.fit(x_train,y_train)
lasso.coef_

array([ 0.        ,  0.01787993,  0.        , -0.        ,  0.        ,
        0.        , -0.        ,  0.        , -0.        ,  0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.        ,
       -0.00011004, -0.        , -0.        ,  0.        , -0.        ,
       -0.04955749,  0.00291176,  0.        ,  0.        , -0.        ,
       -0.        ])

In [27]:
#obtain predictions with lasso
predLasso_values = lasso.predict(x_test)

In [28]:
#Evaluating Root Mean Square Error (RMSE) with ridge
RMSE_Lasso = np.sqrt(mean_squared_error(y_test, predLasso_values))
round(RMSE_Lasso, 3)

0.094