# ML Lab 11 "solutions"


## Data prep

In [1]:
import pandas as pd
from scipy.io import arff
import numpy as np

iris_data = arff.loadarff('data/iris.arff')
iris_df = pd.DataFrame(iris_data[0]) # creates a DataFrame instance
iris_df['class'] = iris_df['class'].str.decode('utf-8') # fixes byte strings, avoiding strings like b'Iris-versicolor'
iris_df.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [2]:
from sklearn import preprocessing

X = iris_df.iloc[:,:4] # the first 4 columns (and all rows!)
y = iris_df.iloc[:,4]  # the last column (and all rows!)

y.unique()

label_encoder = preprocessing.LabelEncoder()
label_encoder.fit(y.unique())
y = label_encoder.transform(y)
# y # should now be an array of 0, 1, 2 values

print(X.info())
print()
print(y)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   sepallength  150 non-null    float64
 1   sepalwidth   150 non-null    float64
 2   petallength  150 non-null    float64
 3   petalwidth   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB
None

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [3]:
from sklearn.preprocessing import Normalizer

normalizer = Normalizer()
normalizer.fit(X)
X_normalised = normalizer.transform(X)
# X_normalised

In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
# X_scaled

## MLP with hold-out validation

3-layer MLP with 10 neurons in the hidden layer. All other hyper-parameters as per default values.

Using the normalised feature values here.

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

X_train, X_test, y_train, y_test = train_test_split(X_normalised, y, test_size=0.3, random_state=None, stratify=y)

model = MLPClassifier(hidden_layer_sizes=(10))
model = model.fit(X_train,y_train)
y_pred = model.predict(X_test)

print("Accuracy", metrics.accuracy_score(y_test, y_pred))
print("\n", metrics.confusion_matrix(y_test, y_pred))
print("\n", metrics.classification_report(y_test, y_pred))

Accuracy 0.8222222222222222

 [[15  0  0]
 [ 3  7  5]
 [ 0  0 15]]

               precision    recall  f1-score   support

           0       0.83      1.00      0.91        15
           1       1.00      0.47      0.64        15
           2       0.75      1.00      0.86        15

    accuracy                           0.82        45
   macro avg       0.86      0.82      0.80        45
weighted avg       0.86      0.82      0.80        45





You should have seen warnings about the MLP not having converged above, and a rather sub-optimal performance!

So, let's see what the performance is like by just increasing the number of training iterations (epochs).

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

X_train, X_test, y_train, y_test = train_test_split(X_normalised, y, test_size=0.3, random_state=None, stratify=y)

epochs = 3000
model = MLPClassifier(hidden_layer_sizes=(10), max_iter=epochs)
model = model.fit(X_train,y_train)
y_pred = model.predict(X_test)

print("Accuracy", metrics.accuracy_score(y_test, y_pred))
print("\n", metrics.confusion_matrix(y_test, y_pred))
print("\n", metrics.classification_report(y_test, y_pred))

Accuracy 1.0

 [[15  0  0]
 [ 0 15  0]
 [ 0  0 15]]

               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00        15

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



## Normalised vs Scaled feature values

Since the Iris dataset is quite a small and simple one, let's let's use cross-validation on the wine dataset to compare performance of the MLP with normalised vs scaled feature values.

### Loading and preparing the wine dataset

In [7]:
from sklearn.datasets import load_wine
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler

# Loading the dataset
dataset = load_wine()

# Get the X (feature matrix) and y (class label vector) from the data
X, y = dataset.data, dataset.target

# Normalising feature matrix
normalizer = Normalizer()
normalizer.fit(X)
X_normalised = normalizer.transform(X)

# Scaling feature matrix
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

### Normalised feature matrix

In [8]:
from sklearn.model_selection import cross_validate
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

epochs = 5000
model = MLPClassifier(hidden_layer_sizes=(50), max_iter=epochs)

scores = cross_validate(model, X_normalised, y, cv=5)
print("Accuracy:          %0.2f (+/- %0.2f)" % (scores['test_score'].mean(), scores['test_score'].std() * 2))
print("Training time (s): %0.2f (+/- %0.2f)" % (scores['fit_time'].mean(), scores['fit_time'].std() * 2))
print("Testing time (s):  %0.2f (+/- %0.2f)" % (scores['score_time'].mean(), scores['score_time'].std() * 2))

Accuracy:          0.92 (+/- 0.11)
Training time (s): 1.22 (+/- 0.25)
Testing time (s):  0.00 (+/- 0.00)


### Scaled feature matrix

In [9]:
from sklearn.model_selection import cross_validate
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

epochs = 5000
model = MLPClassifier(hidden_layer_sizes=(30), max_iter=epochs)

scores = cross_validate(model, X_scaled, y, cv=5)
print("Accuracy:          %0.2f (+/- %0.2f)" % (scores['test_score'].mean(), scores['test_score'].std() * 2))
print("Training time (s): %0.2f (+/- %0.2f)" % (scores['fit_time'].mean(), scores['fit_time'].std() * 2))
print("Testing time (s):  %0.2f (+/- %0.2f)" % (scores['score_time'].mean(), scores['score_time'].std() * 2))

Accuracy:          0.98 (+/- 0.04)
Training time (s): 0.17 (+/- 0.03)
Testing time (s):  0.00 (+/- 0.00)


### Observations

Above, you should be able to make a few key observations regarding:

* Accuracy
* Training time

So, it seems using scaled feature values has worked the best! But, we haven't optimised other hyper-parameters. Therefore, at this point, you could still suspend conclusions in case normalisation works well with a different configuration of the MLP.

## Random Search

To be more confident about our observations above, this section conducts some hyper-parameter optimisation using Random Search.

In [10]:
# Utility function to report best scores
def report(results, rank_metric='score', n_top=3):
    """
    Utility function to report best scores.
    :param results: the cv_results_ data structure from the optimisation algorithm
    :param rank_metric: name of the metric to report results for
    :param n_top: the number of top results to report
    """
    print("\nModels ranked according to", rank_metric)
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results["rank_test_" + rank_metric] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.2f} (+/- {1:.2f})".format(
                  results["mean_test_" + rank_metric][candidate],
                  results["std_test_" + rank_metric][candidate]*2))
            print("Params: {0}".format(results['params'][candidate]))
            print("")

In [11]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from time import time
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

@ignore_warnings(category=ConvergenceWarning)
def random_search(model, param_dict, n_iter_search, X, y):
    """
    Executing random search as part of this function in order to
    utilise the annotation above to suppress warnings about
    convergence (lack thereof).
    """

    
    # initialise random search
    random_search = RandomizedSearchCV(model, param_distributions=param_dict,
                                       n_iter=n_iter_search, cv=5)

    # run random search
    print("> STARTING RANDOM SEARCH ...")
    start_time = time()
    random_search.fit(X, y)
    end_time = time()

    print("> RANDOM SEARCH COMPLETE")

    print("\nRandomizedSearchCV took %.2f seconds for %d candidates"
          " parameter settings." % ((end_time - start_time), n_iter_search))
    
    return random_search.cv_results_

### With normalised feature matrix

In [12]:
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

# instantiating model
model = MLPClassifier()

# specify parameters and distributions to sample from
param_dict = {"hidden_layer_sizes": [(5), (10), (15), (20), (30), (40), (50)],
              "max_iter": [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000],
              "activation": ['identity', 'logistic', 'tanh', 'relu'],
              "learning_rate": ['constant', 'invscaling', 'adaptive']}

# run random search
n_iter_search = 5 # increase this to sample even more values from the param_dict
results = random_search(model, param_dict, n_iter_search, X_normalised, y)
report(results)

> STARTING RANDOM SEARCH ...
> RANDOM SEARCH COMPLETE

RandomizedSearchCV took 12.87 seconds for 5 candidates parameter settings.

Models ranked according to score
Model with rank: 1
Mean validation score: 0.92 (+/- 0.11)
Params: {'max_iter': 3000, 'learning_rate': 'invscaling', 'hidden_layer_sizes': 40, 'activation': 'tanh'}

Model with rank: 2
Mean validation score: 0.81 (+/- 0.43)
Params: {'max_iter': 4500, 'learning_rate': 'invscaling', 'hidden_layer_sizes': 15, 'activation': 'tanh'}

Model with rank: 3
Mean validation score: 0.81 (+/- 0.21)
Params: {'max_iter': 1000, 'learning_rate': 'adaptive', 'hidden_layer_sizes': 50, 'activation': 'identity'}



### With scaled feature matrix

In [13]:
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

# instantiating model
model = MLPClassifier()

# specify parameters and distributions to sample from
param_dict = {"hidden_layer_sizes": [(5), (10), (15), (20), (30), (40), (50), (60), (70), (80), (90), (100)],
              "max_iter": [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000],
              "activation": ['identity', 'logistic', 'tanh', 'relu'],
              "learning_rate": ['constant', 'invscaling', 'adaptive']}

# run random search
n_iter_search = 5 # increase this to sample even more values from the param_dict
results = random_search(model, param_dict, n_iter_search, X_scaled, y)
report(results)

> STARTING RANDOM SEARCH ...
> RANDOM SEARCH COMPLETE

RandomizedSearchCV took 5.08 seconds for 5 candidates parameter settings.

Models ranked according to score
Model with rank: 1
Mean validation score: 0.99 (+/- 0.03)
Params: {'max_iter': 2500, 'learning_rate': 'invscaling', 'hidden_layer_sizes': 50, 'activation': 'logistic'}

Model with rank: 2
Mean validation score: 0.98 (+/- 0.04)
Params: {'max_iter': 1000, 'learning_rate': 'invscaling', 'hidden_layer_sizes': 50, 'activation': 'tanh'}

Model with rank: 2
Mean validation score: 0.98 (+/- 0.04)
Params: {'max_iter': 3000, 'learning_rate': 'invscaling', 'hidden_layer_sizes': 15, 'activation': 'tanh'}



## Deep neural network!

Ok, probably not so exciting, but the irony is that people talk about Deep Learning as something magical, and we can go from shallow to deep learning by adding 4 characters (including a space)...

In [14]:
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

epochs = 5000
model = MLPClassifier(hidden_layer_sizes=(15, 15), max_iter=epochs)

scores = cross_validate(model, X_scaled, y, cv=5)
print("Accuracy:          %0.2f (+/- %0.2f)" % (scores['test_score'].mean(), scores['test_score'].std() * 2))
print("Training time (s): %0.2f (+/- %0.2f)" % (scores['fit_time'].mean(), scores['fit_time'].std() * 2))
print("Testing time (s):  %0.2f (+/- %0.2f)" % (scores['score_time'].mean(), scores['score_time'].std() * 2))

Accuracy:          0.97 (+/- 0.05)
Training time (s): 0.16 (+/- 0.02)
Testing time (s):  0.00 (+/- 0.00)
