In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
df_quiz2_data = pd.read_csv('energydata_complete.csv')
df = df_quiz2_data.drop(columns = ['date','lights'])

In [3]:
## Data normalising exercise using min max scaler

from sklearn.preprocessing import MinMaxScaler
minmaxscaler = MinMaxScaler()
normalised_df = pd.DataFrame(minmaxscaler.fit_transform(df), columns=df.columns)

#normalised_df = normalised_df.iloc[:, :-2]
#normalised_df_ts
features_df = normalised_df.drop(columns = ['Appliances'])
#features_df_ts
target_df = normalised_df['Appliances']

#### DATA SPLITTING, TRAINING and PREDICTION

In [4]:
## Splitting Data to test and train

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(features_df, target_df, test_size=0.3, random_state=42)


##Fitting Model to training dataset
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(X_train,y_train)

LinearRegression()

In [5]:
### Predicted Values
predicted_values = regressor.predict(X_test)
predicted_values

array([0.03319646, 0.24412988, 0.03410198, ..., 0.06836907, 0.10029337,
       0.05730947])

In [6]:
## Print Coeffcient and Intercept
print(regressor.coef_)
print(regressor.intercept_)

[-3.28105119e-03  5.53703508e-01 -2.36423473e-01 -4.57053389e-01
  2.90777153e-01  9.60574341e-02  2.90314601e-02  2.64573922e-02
 -1.56380557e-02  1.60012115e-02  2.36394899e-01  3.80596667e-02
  1.02549869e-02 -4.45512512e-02  1.02005866e-01 -1.57710295e-01
 -1.89918324e-01 -3.97966436e-02 -3.21535054e-01  6.87241608e-03
 -7.74442149e-02  2.92338967e-02  1.22935616e-02  1.17597874e-01
 -3.02285590e+11  3.02285590e+11]
0.1529113259397064


#### MEAN ABSOLUTE ERROR - SUM OF ABSOLUTE ERRORS

In [7]:
## lOWER MAE (0)- The better the model estimation

## Evaluate Model
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, predicted_values)
mae.round(decimals=3)

0.05

#### SUM OF SQUARE RESIDUAL/ERROR (SSR/RSS)

In [8]:
## lOWER SSR- The better the model estimation

rss = np.sum(np.square(y_test - predicted_values))
round(rss,3)

45.344

#### ROOT MEAN SQUARE ERROR (RMSE)

In [9]:
## lOW RMSE - The better the model estimation

from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, predicted_values))
round(rmse,3)

0.088

#### R-SQUARED (R2)

In [10]:
## Higher the R-square : The Better the model

from sklearn.metrics import r2_score
r2_score = r2_score(y_test, predicted_values)
round(r2_score, 3)

0.149

#### RIDGE REGRESSION

In [11]:
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha=0.5)
ridge_reg.fit(X_train,y_train)
print(ridge_reg.coef_)
print(ridge_reg.intercept_)

[-0.02154858  0.51193201 -0.19387955 -0.40113385  0.28740783  0.09497599
  0.02700573  0.02416813 -0.0207269   0.01617566  0.21331574  0.03502308
  0.0100213  -0.04629091  0.10075383 -0.15659589 -0.18858431 -0.04170062
 -0.25076497  0.00651632 -0.05054104  0.03046327  0.01203158  0.07666802
  0.00074348  0.00074348]
0.12268912308452232


#### LASSO REGRESSION

In [12]:
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.01)
lasso_reg.fit(X_train,y_train)
print(lasso_reg.coef_)
print(lasso_reg.intercept_)

[ 0.  0.  0. -0.  0.  0.  0.  0.  0.  0.  0. -0.  0. -0.  0. -0.  0. -0.
  0. -0. -0.  0.  0.  0. -0. -0.]
0.0819884743772064


In [13]:
## Obtain Features Weights

def get_weights_df(regressor, feat, col_name):
    #this function returns the weight of every feature
    weights = pd.Series(regressor.coef_, feat.columns).sort_values()
    weights_df = pd.DataFrame(weights).reset_index()
    weights_df.columns = ['Features', col_name]
    weights_df[col_name].round(3)
    return weights_df

linear_model_weights = get_weights_df(regressor, X_train, 'Linear_Model_Weight')
ridge_weights_df = get_weights_df(ridge_reg, X_train, 'Ridge_Weight')
lasso_weights_df = get_weights_df(lasso_reg, X_train, 'Lasso_weight')
final_weights = pd.merge(linear_model_weights, ridge_weights_df, on='Features')
final_weights = pd.merge(final_weights, lasso_weights_df, on='Features')

In [14]:
final_weights

Unnamed: 0,Features,Linear_Model_Weight,Ridge_Weight,Lasso_weight
0,rv1,-302285600000.0,0.000743,-0.0
1,RH_2,-0.4570534,-0.401134,-0.0
2,T_out,-0.3215351,-0.250765,0.0
3,T2,-0.2364235,-0.19388,0.0
4,T9,-0.1899183,-0.188584,0.0
5,RH_8,-0.1577103,-0.156596,-0.0
6,RH_out,-0.07744421,-0.050541,-0.0
7,RH_7,-0.04455125,-0.046291,-0.0
8,RH_9,-0.03979664,-0.041701,-0.0
9,T5,-0.01563806,-0.020727,0.0


In [15]:
final_weights.describe()

Unnamed: 0,Linear_Model_Weight,Ridge_Weight,Lasso_weight
count,26.0,26.0,26.0
mean,0.0008709247,0.00293,0.0
std,85499280000.0,0.172954,0.0
min,-302285600000.0,-0.401134,-0.0
25%,-0.06922097,-0.045143,0.0
50%,0.01127427,0.008269,0.0
75%,0.08155799,0.033883,0.0
max,302285600000.0,0.511932,-0.0


#### QUIZ QUESTION

In [16]:
features_df_ts = np.array(normalised_df['T2'])
target_df_ts = np.array(normalised_df['T6'])
print(features_df_ts)
print(target_df_ts)

[0.22534529 0.22534529 0.22534529 ... 0.69265118 0.67705355 0.66617051]
[0.3810702  0.37544268 0.36748654 ... 0.86426042 0.82952506 0.78858002]


In [17]:
## Splitting Data to test and train

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(features_df_ts.reshape(-1, 1), target_df_ts, test_size=0.3, random_state=42)

In [18]:
##Fitting Model to training dataset
from sklearn.linear_model import LinearRegression

regressorb = LinearRegression()
regressorb.fit(X_train,y_train)

predicted_values = regressorb.predict(X_test)
predicted_values

array([0.23928945, 0.46794238, 0.23108472, ..., 0.3001772 , 0.4297256 ,
       0.3217686 ])

In [19]:
## Print Coeffcient and Intercept

print(regressorb.coef_)
print(regressorb.intercept_)

[0.8910771]
0.1317642776188842


In [20]:
### Calculating R2 score

from sklearn.metrics import r2_score
r2_score = r2_score(y_test, predicted_values)
round(r2_score, 2)

0.64

In [21]:
## Evaluate Model using MAE

from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, predicted_values)
mae.round(decimals=2)

0.08

In [22]:
## lOWER SSR- The better the model estimation

rss = np.sum(np.square(y_test - predicted_values))
round(rss,2)

66.12

In [23]:
## lOW RMSE - The better the model estimation

from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, predicted_values))
round(rmse,2)

0.11

In [24]:
from sklearn.linear_model import Ridge
ridge_regb = Ridge(alpha=0.4)
ridge_regb.fit(X_train,y_train)
print(ridge_regb.coef_)
print(ridge_regb.intercept_)

[0.8900643]
0.13207647500100234


In [25]:
from sklearn.linear_model import Lasso
lasso_regb = Lasso(alpha=0.001)
lasso_regb.fit(X_train,y_train)
print(lasso_regb.coef_)
print(lasso_regb.intercept_)

[0.85177991]
0.1438777132765378


In [26]:
## Obtain Features Weights

def get_weights_df(regressorb, feat, col_name):
    #this function returns the weight of every feature
    #weights = pd.Series(regressor.coef_, feat.columns).sort_values()
    weights = pd.Series(regressorb.coef_).sort_values()
    weights_df = pd.DataFrame(weights).reset_index()
    weights_df.columns = ['Features', col_name]
    weights_df[col_name].round(3)
    return weights_df

linear_model_weightsb = get_weights_df(regressorb, X_train, 'Linear_Model_Weight')
ridge_weights_dfb = get_weights_df(ridge_regb, X_train, 'Ridge_Weight')
lasso_weights_dfb = get_weights_df(lasso_regb, X_train, 'Lasso_weight')
final_weightsb = pd.merge(linear_model_weightsb, ridge_weights_dfb, on='Features')
final_weightsb = pd.merge(final_weightsb, lasso_weights_dfb, on='Features')

In [27]:
final_weightsb

Unnamed: 0,Features,Linear_Model_Weight,Ridge_Weight,Lasso_weight
0,0,0.891077,0.890064,0.85178


In [28]:
final_weightsb.describe()

Unnamed: 0,Features,Linear_Model_Weight,Ridge_Weight,Lasso_weight
count,1.0,1.0,1.0,1.0
mean,0.0,0.891077,0.890064,0.85178
std,,,,
min,0.0,0.891077,0.890064,0.85178
25%,0.0,0.891077,0.890064,0.85178
50%,0.0,0.891077,0.890064,0.85178
75%,0.0,0.891077,0.890064,0.85178
max,0.0,0.891077,0.890064,0.85178


In [29]:
## lOW RMSE - The better the model estimation

from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, predicted_values))
round(rmse,3)

0.106