<a href="https://colab.research.google.com/github/ProsperChuks/hamoye-stage-b-assessment/blob/main/ML_assessment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [85]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [86]:
path = '/content/drive/MyDrive/Colab Notebooks/regression/linear/hamoye/energydata_complete.csv'

df = pd.read_csv(path)

### normalized the dataset

In [87]:
df = df.drop(columns=['date', 'lights'])
scale = MinMaxScaler()
normalized_df = pd.DataFrame(scale.fit_transform(df), columns=df.columns)
predictor = normalized_df.drop(columns=['Appliances'])

### splitting the dataset

In [88]:
target = normalized_df['Appliances']
x_train, x_test, y_train, y_test = train_test_split(predictor, target, test_size=0.33, random_state=42)

model = LinearRegression()
model.fit(x_train, y_train)

prediction = model.predict(x_test)
prediction

array([0.03348091, 0.24459154, 0.03247059, ..., 0.06149371, 0.03245236,
       0.06696082])

### evaluating the model

In [89]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

print('Mean Absolute Error')
mae = mean_absolute_error(y_test, prediction)
print('%.3f'%mae, '\n')

print('Sum of Square Residuals')
rss = np.sum(np.square(y_test - prediction))
print('%.3f'%rss, '\n')

print('Root Mean Square Error')
rmse = np.sqrt(mean_squared_error(y_test, prediction))
print('%.3f'%rmse, '\n')

print('R-Squared')
r2 = r2_score(y_test, prediction)
print('%.3f'%r2)

Mean Absolute Error
0.050 

Sum of Square Residuals
50.908 

Root Mean Square Error
0.088 

R-Squared
0.146


# Question 12:

 r-squared in 2dp

In [103]:
x = np.array(normalized_df['T2']).reshape(-1, 1)
y = normalized_df['T6']

lin_model = LinearRegression()
lin_model.fit(x, y)
pred = lin_model.predict(x)

r2 = r2_score(y, pred)
print('%.2f'%r2)

0.64


# Question 13: 

mean absolute error in 2dp

In [101]:
mae = mean_absolute_error(y, pred)
print('%.2f'%mae)

0.08


# Question 14: 

residual sum of squares in 2dp

In [102]:
rss = np.sum(np.square(y - pred))
'%.2f'%rss

'222.09'

# Question 15: 

root mean squared error in 3dp

In [104]:
rmse = np.sqrt(mean_squared_error(y, pred))
'%.3f'%rmse

'0.106'

# Question 17

In [111]:
def get_weights_df(model, feat, col_name):

  weight = pd.Series(model.coef_, feat.columns).sort_values()
  weights_df = pd.DataFrame(weight).reset_index()
  weights_df.columns = ['Features', col_name]
  return weights_df

linear_model_weights = get_weights_df(model, x_train, 'Linear_Model_Weight')
final_weight = pd.DataFrame(linear_model_weights)
final_weight

Unnamed: 0,Features,Linear_Model_Weight
0,RH_2,-0.451845
1,T_out,-0.30395
2,T2,-0.228149
3,T9,-0.188451
4,RH_8,-0.15761
5,RH_out,-0.068746
6,RH_9,-0.042429
7,RH_7,-0.040881
8,T5,-0.019953
9,T1,-0.006409


# Question 18

In [112]:
from sklearn.linear_model import Ridge

ridge_reg = Ridge(alpha=0.4)
ridge_reg.fit(x_train, y_train)

predict = ridge_reg.predict(x_test)

rmse = np.sqrt(mean_squared_error(y_test, predict))
print('%.3f'%rmse)

0.088


# Question 19

In [113]:
from sklearn.linear_model import Lasso

lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(x_train, y_train)

def get_weights_df(model, feat, col_name):

  weight = pd.Series(model.coef_, feat.columns).sort_values()
  weights_df = pd.DataFrame(weight).reset_index()
  weights_df.columns = ['Features', col_name]
  return weights_df

lasso_reg_weights = get_weights_df(lasso_reg, x_train, 'Lasso_Weight')
final_weight = pd.DataFrame(lasso_reg_weights)
final_weight

Unnamed: 0,Features,Lasso_Weight
0,RH_out,-0.048942
1,RH_8,-0.000448
2,T1,0.0
3,Tdewpoint,0.0
4,Visibility,0.0
5,Press_mm_hg,-0.0
6,T_out,0.0
7,RH_9,-0.0
8,T9,-0.0
9,T8,0.0


# Question 20

In [114]:
predict = lasso_reg.predict(x_test)

rmse = np.sqrt(mean_squared_error(y_test, predict))
print('%.3f'%rmse)

0.094
