In [86]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge


Import data using pandas

In [62]:
data = pd.read_csv("data.csv")

Normalize the entire dataset using MinMaxScaler in sklearn excluding date and light columns

In [63]:
scaler = MinMaxScaler()
df = data.drop(columns=["date", "lights"] )
normalised_data = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
#features_data = normalised_data.drop(columns = ["date", "lights"])

### Question  12:

In this question, we need to build a simple linear regression using T2 as the independent variables with T6 as our target(dependent variable). Afterwards,the data set was split inot $70:30$ ratio for train and test respectively with a random state of $42$

In [35]:
X= normalised_data['T2'].values.reshape(-1, 1)
y= normalised_data["T6"].values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size= 0.3 , random_state= 42)

Next step, call the linear regression model and fit on the train data (X_train and y_train)

In [37]:
regressor = LinearRegression()  
model = regressor.fit(X_train, y_train)

Use model now to make predictions on the test data and compare the values with its y_test values

In [41]:
predictions = model.predict(X_test)

### Question 12:
Calculate the R2 squared

In [47]:
print("The R2 squared value is {:.2f}".format(r2_score(y_test, predictions)))


The R2 squared value is 0.64


### Question 13:
Calculate the Mean Absolute Error

In [82]:
print("The mean absolute error value is {:.2f}".format(mean_absolute_error(y_test, predictions)))


The mean absolute error value is 0.33


### Question 14:
Calculate the Residual Sum of Squares

In [54]:
rss = np.sum(np.square(y_test - predictions))
print("The Residual Sum of Squares value is {:.2f}".format(rss))


The Residual Sum of Squares value is 66.12


### Question 15:
Calculate Root Mean Squared Error 


In [56]:
print("The Root Mean Squared Error value is {:.3f}".format(np.sqrt(mean_squared_error(y_test, predictions))))


The Residual Sum of Squares value is 0.106


### Question 16:
Calculate  Coefficient of Determination: This is another name for the $R^{2}$ and which we already calculated above


In [57]:
print("The Coefficient of Determination: value is {:.2f}".format(rss))


The Coefficient of Determination: value is 66.12


### Question 17

In [77]:
X= normalised_data.iloc[:, 1:]#.values#.reshape(-1, 1)
y= normalised_data["Appliances"]#.values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size= 0.3 , random_state= 42)

In [78]:
regressor = LinearRegression()  
model = regressor.fit(X_train, y_train)

In [83]:
pred= model.predict(X_test)
print("The Root Mean Squared Error value is {:.3f}".format(np.sqrt(mean_squared_error(y_test, pred))))


The Root Mean Squared Error value is 0.088


In [80]:
def get_weights_df(model, feat, col_name) :
    weights = pd.Series(model.coef_, feat.columns).sort_values()
    weights_df = pd.DataFrame(weights).reset_index()
    weights_df.columns = [ 'Features' , col_name]
    weights_df[col_name].round( 3 )
    return weights_df

linear_model_weights = get_weights_df(model, X_train, 'Linear_Model_Weight' )


In [81]:
print(linear_model_weights)

       Features  Linear_Model_Weight
0          RH_2            -0.456698
1         T_out            -0.321860
2            T2            -0.236178
3            T9            -0.189941
4          RH_8            -0.157595
5        RH_out            -0.077671
6          RH_7            -0.044614
7          RH_9            -0.039800
8            T5            -0.015657
9            T1            -0.003281
10          rv2             0.000770
11          rv1             0.000770
12  Press_mm_hg             0.006839
13           T7             0.010319
14   Visibility             0.012307
15         RH_5             0.016006
16         RH_4             0.026386
17           T4             0.028981
18    Windspeed             0.029183
19         RH_6             0.038049
20         RH_3             0.096048
21           T8             0.101995
22    Tdewpoint             0.117758
23           T6             0.236425
24           T3             0.290627
25         RH_1             0.553547


### Question 18

**Ridge Regression Model**

In [87]:
ridge_reg = Ridge(alpha= 0.4)
model_ridge= ridge_reg.fit(X_train, y_train)
pred_ridge= model_ridge.predict(X_test)


print("The Root Mean Squared Error value is {:.3f}".format(np.sqrt(mean_squared_error(y_test, pred_ridge))))


The Root Mean Squared Error value is 0.088


### Question  19

**Lasso Regression**


In [88]:
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.001 )
model_lasso= lasso_reg.fit(X_train, y_train)
linear_model_weights = get_weights_df(model_lasso, X_train, 'Model_lasso_Weight' )
print(linear_model_weights)

       Features  Model_lasso_Weight
0        RH_out           -0.049557
1          RH_8           -0.000110
2            T1            0.000000
3     Tdewpoint            0.000000
4    Visibility            0.000000
5   Press_mm_hg           -0.000000
6         T_out            0.000000
7          RH_9           -0.000000
8            T9           -0.000000
9            T8            0.000000
10         RH_7           -0.000000
11          rv1           -0.000000
12           T7           -0.000000
13           T6            0.000000
14         RH_5            0.000000
15           T5           -0.000000
16         RH_4            0.000000
17           T4           -0.000000
18         RH_3            0.000000
19           T3            0.000000
20         RH_2           -0.000000
21           T2            0.000000
22         RH_6           -0.000000
23          rv2           -0.000000
24    Windspeed            0.002912
25         RH_1            0.017880


### Question 20:

**RMSE**

In [89]:
predictions_lasso= model_lasso.predict(X_test)
print("The Root Mean Squared Error value is {:.3f}".format(np.sqrt(mean_squared_error(y_test, predictions_lasso))))


The Root Mean Squared Error value is 0.094
