# Assessment 2 - Part 1: Machine Learning
<br>

## Task: train regression models that predict two indicators of energy efficiency based on 8 numerical inputs. 
<br>

### Packages needed:

- pandas
- sklearn
- numpy
- matplotlib

### 1.1 - Data Preparation

#### Importing libraries

In [138]:
## Imports
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

## Used for normalising the data
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

## Used for regression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

## Used for assessment of regression
from sklearn.metrics import mean_squared_error

## Used for cross validation
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

#### Loading the dataset

In [139]:
energy_data = pd.read_excel("data.xlsx")

# Pre-calculate the split point
data_split = math.floor( len(energy_data) * 0.7 ) # 70% train / 30% test

energy_data.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


#### Extracting and splitting data

In [140]:
## Extracting Data

#inputs
inputs = energy_data.values[:,:8].astype(float)

#normalise the inputs
scaler = MinMaxScaler()
scaled_inputs = scaler.fit_transform(inputs)

#targets
targets_Y1 = energy_data["Y1"]
targets_Y2 = energy_data["Y2"]
targets = []
targets.append(targets_Y1)
targets.append(targets_Y2)



#### Random Sample generator

In [141]:
sample = energy_data.sample(frac=1).reset_index(drop=True)

print( sample.head())

     X1     X2     X3     X4   X5  X6    X7  X8     Y1     Y2
0  0.79  637.0  343.0  147.0  7.0   2  0.25   3  39.04  45.13
1  0.62  808.5  367.5  220.5  3.5   3  0.25   3  14.34  14.96
2  0.76  661.5  416.5  122.5  7.0   5  0.00   0  23.93  29.40
3  0.76  661.5  416.5  122.5  7.0   2  0.25   2  36.95  36.87
4  0.74  686.0  245.0  220.5  3.5   4  0.25   1  12.41  15.28


#### Splitting the data into train and test (70/30)

In [142]:
def train_test_split(data, sp):
    train, test = data[:sp], data[sp + 1: ]
    return [train, test]


#### Setting up train and test data

In [143]:
#set up data
x_train, x_test = train_test_split(scaled_inputs, data_split)
y_joint_train, y_joint_test = train_test_split(energy_data.values[:,8:].astype(float), data_split)
y1_train, y1_test = train_test_split(targets_Y1, data_split)
y2_train, y2_test = train_test_split(targets_Y2, data_split)

### Task 1.2 - Regression

#### Regression using the "sklearn.neural_network.MLPRegressor"

In [144]:
MLP = MLPRegressor(max_iter=10000)
MLP.fit(x_train, y_joint_train)
MLP_Outputs = MLP.predict(x_train)

MLP_CV = cross_val_score(MLP, x_test, y_joint_test, cv=5)
print(MLP_CV)

MLP_Result = mean_squared_error(y_joint_test, MLP_Outputs)
print(MLP_Result)



[0.97791793 0.98204467 0.96209293 0.96524125 0.95712152]




ValueError: Found input variables with inconsistent numbers of samples: [230, 537]

#### Regression using the "sklearn.ensemble.RandomForestRegressor"

In [None]:
Forest = RandomForestRegressor()

Forest.fit(x_train, y_joint_train)
Forest_Outputs = Forest.predict(x_train)

Forest_CV = cross_val_score(Forest, x_test, y_joint_test, cv=5)
print(Forest_CV)

Forest_Result =  mean_squared_error(y_joint_test, Forest_Outputs)
print(Forest_Result)

#### Regression using the "sklearn.svm.SVR"

In [None]:
SVR_poly = SVR()
SVR_1 = SVR_poly.fit(x_train, y1_train)
SVR_2 = SVR_poly.fit(x_train, y2_train)

SVR_Outputs = SVR_poly.predict(x_train)

SVR_1_CV = cross_val_score(SVR_poly, x_test, y1_test, cv=5)
SVR_2_CV = cross_val_score(SVR_poly, x_test, y2_test, cv=5)
print(SVR_1_CV)
print(SVR_2_CV)

### Task 1.3 - Assessment of  Regression

#### MSE of all 3 regression methods