### Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

import warnings
warnings.filterwarnings('ignore')

### Reading Dataset

In [2]:
data = pd.read_csv('energydata_complete.csv')

In [3]:
data.head(3)

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-11 17:00:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-11 17:10:00,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,2016-01-11 17:20:00,50,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,...,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668


In [4]:
data.isnull().sum().sum()

0

# Question 12

In [5]:
x_Q12 = data['T2'].values.reshape(19735,1)
y_Q12 = data['T6']

In [6]:
model_Q12 = LinearRegression()
model_Q12.fit(x_Q12,y_Q12)
predictions_Q12 = model_Q12.predict(x_Q12)

print(f'R^2 : {round(r2_score(y_Q12,predictions_Q12), 2)}')

R^2 : 0.64


# Question 13

#### X and y data

In [7]:
x_Q13 = data.drop(['date', 'lights','Appliances'], axis = 1)
y_Q13 = data['Appliances']

#### Splitting dataset

In [8]:
x_train_Q13, x_test_Q13, y_train_Q13, y_test_Q13 = train_test_split(x_Q13, y_Q13, random_state= 42, test_size= 0.3)

#### MinMax Scaler

In [9]:
scaler = MinMaxScaler()
scaler.fit(x_train_Q13)
x_train_Q13 = scaler.transform(x_train_Q13)
x_test_Q13 = scaler.transform(x_test_Q13)

#### Model Building and Evaluation

In [10]:
model_Q13 = LinearRegression()
model_Q13.fit(x_train_Q13,y_train_Q13)
predictions_Q13 = model_Q13.predict(x_test_Q13)

print(f'MAE : {round(mean_absolute_error(y_test_Q13,predictions_Q13),2)}')

MAE : 53.65


# Question 14

In [12]:
##Residual_Sum_of_Squares = RSS
RSS = mean_squared_error(y_test_Q13,predictions_Q13)
round(RSS,2)

8767.63

# Question 15

In [13]:
## Root Mean Squared Error = RMSE
RMSE = np.sqrt(mean_squared_error(y_test_Q13,predictions_Q13))
round(RMSE,2)

93.64

# Question 16

In [14]:
## Coefficient of Determination = R_squared
R_squared = r2_score(y_test_Q13,predictions_Q13)
round(R_squared,2)

0.15

# Question 17

In [15]:
feat_weight = pd.DataFrame(model_Q13.coef_, index = x_Q13.columns, columns = ['Feature weights']).sort_values('Feature weights', ascending = False)

In [19]:
feat_weight.head(2)

Unnamed: 0,Feature weights
rv2,451047900000000.0
RH_1,495.4925


In [20]:
feat_weight.tail(2)

Unnamed: 0,Feature weights
RH_2,-469.7429
rv1,-451047900000000.0


# Question 18

#### Model Ridge

In [21]:
model_Q18 = Ridge(alpha = 0.4)
model_Q18.fit(x_train_Q13,y_train_Q13)
predictions_Q18 = model_Q18.predict(x_test_Q13)

print(f'RMSE : {round(np.sqrt(mean_squared_error(y_test_Q13,predictions_Q18)),2)}')

RMSE : 93.66


# Question 19

In [22]:
model_Q19 = Lasso(alpha = 0.001)
model_Q19.fit(x_train_Q13,y_train_Q13)
predictions_Q19 = model_Q19.predict(x_test_Q13)

In [23]:
feat_weight = pd.DataFrame(model_Q19.coef_, index = x_Q13.columns, columns = ['Feature weights']).sort_values('Feature weights', ascending = False)

In [24]:
len(feat_weight)

26

# Question 20

In [25]:
print(f'Lasso RMSE : {round(np.sqrt(mean_squared_error(y_test_Q13,predictions_Q19)),2)}')

Lasso RMSE : 93.64
