# Algorithmic Tuning: A Guide for Regression Task

## What model (in Part 1) has the best outcome?
Replace the value of the variable `best_model` below with one of the following codes:


*   `LR` - for Linear Regression
*   `SVM` - for Support Vector Machine
*   `MLP` - for Artificial Neural Network - Multilayer Perceptron
*   `VOT` - for Voting Ensemble
*   `BAGLR` - for Bagging Ensemble with Linear Regression
*   `BAGSVM` - for Bagging Ensemble with Support Vector Machine
*   `BAGMLP` - for Bagging Ensemble with Multilayer Perceptron
*   `ADALR` - for Boosting Ensemble with Linear Regression
*   `ADASVM` - for Boosting Ensemble with Support Vector Machine





In [None]:
best_model = 'LR'

## 1. Upload data from Local File System.

In [None]:
from google.colab import files

uploaded = files.upload()


Saving Energy_efficiency_DataSet.csv to Energy_efficiency_DataSet.csv


## 2. Take a peek at your raw data.

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 

names = ['RC', 'SA', 'WA', 'RA', 'OH', 'O', 'GA', 'GAD', 'HL', 'CL']
data = pd.read_csv('Energy_efficiency_DataSet.csv', names=names, comment='#')
peek = data.head(20)
print(peek)

      RC     SA     WA      RA   OH  O   GA  GAD     HL     CL
0   0.98  514.5  294.0  110.25  7.0  2  0.0    0  15.55  21.33
1   0.98  514.5  294.0  110.25  7.0  3  0.0    0  15.55  21.33
2   0.98  514.5  294.0  110.25  7.0  4  0.0    0  15.55  21.33
3   0.98  514.5  294.0  110.25  7.0  5  0.0    0  15.55  21.33
4   0.90  563.5  318.5  122.50  7.0  2  0.0    0  20.84  28.28
5   0.90  563.5  318.5  122.50  7.0  3  0.0    0  21.46  25.38
6   0.90  563.5  318.5  122.50  7.0  4  0.0    0  20.71  25.16
7   0.90  563.5  318.5  122.50  7.0  5  0.0    0  19.68  29.60
8   0.86  588.0  294.0  147.00  7.0  2  0.0    0  19.50  27.30
9   0.86  588.0  294.0  147.00  7.0  3  0.0    0  19.95  21.97
10  0.86  588.0  294.0  147.00  7.0  4  0.0    0  19.34  23.49
11  0.86  588.0  294.0  147.00  7.0  5  0.0    0  18.31  27.87
12  0.82  612.5  318.5  147.00  7.0  2  0.0    0  17.05  23.77
13  0.82  612.5  318.5  147.00  7.0  3  0.0    0  17.41  21.46
14  0.82  612.5  318.5  147.00  7.0  4  0.0    0  16.95

## 3. Retrieve `features` and `class`

In [None]:
array = data.values
X = array[:,0:-2]
Y = array[:,-2]

In [None]:
print(X)  # features

[[9.800e-01 5.145e+02 2.940e+02 ... 2.000e+00 0.000e+00 0.000e+00]
 [9.800e-01 5.145e+02 2.940e+02 ... 3.000e+00 0.000e+00 0.000e+00]
 [9.800e-01 5.145e+02 2.940e+02 ... 4.000e+00 0.000e+00 0.000e+00]
 ...
 [6.200e-01 8.085e+02 3.675e+02 ... 3.000e+00 4.000e-01 5.000e+00]
 [6.200e-01 8.085e+02 3.675e+02 ... 4.000e+00 4.000e-01 5.000e+00]
 [6.200e-01 8.085e+02 3.675e+02 ... 5.000e+00 4.000e-01 5.000e+00]]


In [None]:
print(Y) # class

[15.55 15.55 15.55 15.55 20.84 21.46 20.71 19.68 19.5  19.95 19.34 18.31
 17.05 17.41 16.95 15.98 28.52 29.9  29.63 28.75 24.77 23.93 24.77 23.93
  6.07  6.05  6.01  6.04  6.37  6.4   6.37  6.4   6.85  6.79  6.77  6.81
  7.18  7.1   7.1   7.1  10.85 10.54 10.77 10.56  8.6   8.49  8.45  8.5
 24.58 24.63 24.63 24.59 29.03 29.87 29.14 28.09 26.28 26.91 26.37 25.27
 23.53 24.03 23.54 22.58 35.56 37.12 36.9  35.94 32.96 32.12 32.94 32.21
 10.36 10.43 10.36 10.39 10.71 10.8  10.7  10.75 11.11 11.13 11.09 11.16
 11.68 11.69 11.7  11.69 15.41 15.2  15.42 15.21 12.96 12.97 12.93 13.02
 24.29 24.31 24.13 24.25 28.88 29.68 28.83 27.9  26.48 27.02 26.33 25.36
 23.75 24.23 23.67 22.79 35.65 37.26 36.97 36.03 33.16 32.4  33.12 32.41
 10.42 10.46 10.32 10.45 10.64 10.72 10.55 10.68 11.45 11.46 11.32 11.49
 11.45 11.42 11.33 11.43 15.41 15.18 15.34 15.19 12.88 13.   12.97 13.04
 24.28 24.4  24.11 24.35 28.07 29.01 29.62 29.05 25.41 26.47 26.89 26.46
 22.93 23.84 24.17 23.87 35.78 35.48 36.97 36.7  32.

## 4. Prepare the `model` and the `parameters` to tune.

In [None]:
# container for all models
models = []
param_grids = []

### 4.1. Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
models.append(('LR', lr))

param_grid_lr = {                
    'normalize' : [True, False],  
}
param_grids.append(('LR', param_grid_lr))

### 4.3. [Support Vector Machines](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)

In [None]:
from sklearn.svm import SVR

svm = SVR()
models.append(('SVM', svm))

param_grid_svm = {                
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],           
    'C': [100, 10, 1.0, 0.1, 0.01, 0.001],        # TODO: you can change the values.
}
param_grids.append(('SVM', param_grid_svm))

### 4.4. [Artificial Neural Network - Multilayer Perceptron](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html)

In [None]:
from sklearn.neural_network import MLPRegressor

ann = MLPRegressor()
models.append(('MLP', ann))

param_grid_mlp = {                
    'hidden_layer_sizes': [(10,), (10,10,), (10,10,10,)],        # TODO: you can change the values.           
    'activation' : ['identity', 'logistic', 'tanh', 'relu'],        
    'solver' : ['lbfgs', 'sgd', 'adam'],
}
param_grids.append(('MLP', param_grid_mlp))

### 4.5. Voting Ensemble

In [None]:
from sklearn.ensemble import VotingClassifier

# create containers of all sub models
estimators = []

# Logistic Regression
model0 = LinearRegression()
estimators.append(('lr', model0))

# Support Vector Machine
model2 = SVR()
estimators.append(('svm', model2))

# Artificial Neural Network - Multilayer Perceptron
model3 = MLPRegressor()
estimators.append(('mlp', model3))

# Create the ENSEMBLE model
vot = VotingClassifier(estimators)
models.append(('VOT', vot))


# Parameter Grid
param_grid_vot = {
    'lr__normalize' : [True, False],           
    'svm__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],           
    'svm__C': [100, 10, 1.0, 0.1, 0.01, 0.001],                                             # TODO: you can change the values.
    'mlp__hidden_layer_sizes': [(10,), (10,10,), (10,10,10,)],                              # TODO: you can change the values.           
    'mlp__activation' : ['identity', 'logistic', 'tanh', 'relu'],        
    'mlp__solver' : ['lbfgs', 'sgd', 'adam'],

}
param_grids.append(('VOT', param_grid_vot))

### 4.6. Bagging Ensembles

#### 4.6.1. Linear Regression as base estimator

In [None]:
from sklearn.ensemble import BaggingRegressor

baglr = BaggingRegressor(base_estimator=LinearRegression(), n_estimators=10)
models.append(('BAGLR', baglr))

param_grid_baglr = {
    'base_estimator__normalize' : [True, False]                            # TODO: you can change the values.
}
param_grids.append(('BAGLR', param_grid_baglr))

#### 4.6.3. Support Vector Machine as base estimator

In [None]:
bagsvm = BaggingRegressor(base_estimator=SVR(), n_estimators=10)
models.append(('BAGSVM', bagsvm))

param_grid_bagsvm = {
    'base_estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],           
    'base_estimator__C': [100, 10, 1.0, 0.1, 0.01, 0.001],                      # TODO: you can change the values.
}
param_grids.append(('BAGSVM', param_grid_bagsvm))

#### 4.6.4. Artificial Neural Network as base estimator

In [None]:
bagann = BaggingRegressor(base_estimator=MLPRegressor(), n_estimators=10)
models.append(('BAGMLP', bagann))

param_grid_bagmlp = {
    'base_estimator__hidden_layer_sizes': [(10,), (10,10,), (10,10,10,)],                  # TODO: you can change the values.           
    'base_estimator__activation' : ['identity', 'logistic', 'tanh', 'relu'],        
    'base_estimator__solver' : ['lbfgs', 'sgd', 'adam'],
}
param_grids.append(('BAGMLP', param_grid_bagmlp))

### 4.7. Boosting Ensembles

#### 4.7.1. Linear Regression as base estimator

In [None]:
from sklearn.ensemble import AdaBoostRegressor

adalr = AdaBoostRegressor(base_estimator=LinearRegression(), n_estimators=10)
models.append(('ADALR', adalr))

param_grid_adalr = {
    'base_estimator__normalize' : [True, False],           
}
param_grids.append(('ADALR', param_grid_adalr))

#### 4.7.3. Support Vector Machines as base estimator

## 5. Finding the best parameters

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

scoring = 'neg_mean_squared_error'

for m_name, model in models:
  for p_name, p_grid in param_grids:
    if m_name==best_model and p_name==best_model:
      kfold = KFold(n_splits=10)
      grid = GridSearchCV(estimator=model, param_grid=p_grid, cv=kfold, scoring=scoring)
      print(model.get_params().keys())
      trained_model = grid.fit(X, Y)

dict_keys(['copy_X', 'fit_intercept', 'n_jobs', 'normalize'])


In [None]:
print(trained_model.best_score_)
print(trained_model.best_estimator_)

-9.805131660705205
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
