# Machine Learning Lab 12 "solutions"

Focuses on MLPs in Keras.

## Imports


In [17]:
# Data and Datasets
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.datasets import load_wine
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

# Validation methods
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Metrics
from sklearn import metrics
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

# Tensorflow (and Keras)
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
from tensorflow.keras.utils import to_categorical

# Classifiers
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB

# Hyper-parameter optimisation
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Feature selection & feature engineering
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer

# Stats
from scipy.stats import randint as sp_randint
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from scipy.stats import shapiro     # Shapiro Wilk
from scipy.stats import normaltest  # D’Agostino’s K^2
from scipy.stats import anderson    # Anderson-Darling
from scipy.stats import ttest_ind    # independent student t-test; assumes normality
from scipy.stats import mannwhitneyu # non-parametric; doesn't assume normality

# Visualisation
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import SVG
from graphviz import Source
from IPython.display import display
from sklearn.tree import export_graphviz

# Utils
import pprint
import numpy as np
from time import time

## Loading the datasets

### Banknote dataset

In [2]:
bank_df = pd.read_csv('data/data_banknote_authentication.csv')
bank_df.head()

Unnamed: 0,var,skew,curt,ent,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [3]:
bank_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1372 entries, 0 to 1371
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   var     1372 non-null   float64
 1   skew    1372 non-null   float64
 2   curt    1372 non-null   float64
 3   ent     1372 non-null   float64
 4   class   1372 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 53.7 KB


In [4]:
bank_df.describe()

Unnamed: 0,var,skew,curt,ent,class
count,1372.0,1372.0,1372.0,1372.0,1372.0
mean,0.433735,1.922353,1.397627,-1.191657,0.444606
std,2.842763,5.869047,4.31003,2.101013,0.497103
min,-7.0421,-13.7731,-5.2861,-8.5482,0.0
25%,-1.773,-1.7082,-1.574975,-2.41345,0.0
50%,0.49618,2.31965,0.61663,-0.58665,0.0
75%,2.821475,6.814625,3.17925,0.39481,1.0
max,6.8248,12.9516,17.9274,2.4495,1.0


In [5]:
X_bank = bank_df.iloc[:,:len(bank_df.columns)-1].to_numpy()
y_bank = bank_df.iloc[:,len(bank_df.columns)-1].to_numpy()

### Wine dataset

In [6]:
# Loading the dataset
dataset = load_wine()

# Convert to Pandas DataFrame
dataset_df = pd.DataFrame(data=np.c_[dataset['data'], dataset['target']],
                     columns=dataset['feature_names'] + ['target'])

# Converting class labels to int
dataset_df['target'] = dataset_df['target'].astype('int')

# Storing references to feature names and (unique) class labels
feature_names = dataset['feature_names']
class_labels = dataset_df['target'].unique().astype('str')

# Get the X (feature matrix) and y (class label vector) from the data
X_wine, y_wine = dataset.data, dataset.target

print("Shape of feature matrix:", X_wine.shape)

# And just to ensure we've loaded what we expect..
dataset_df.head()

Shape of feature matrix: (178, 13)


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


## Keras MLP on the bank dataset

Example of binary classification.

### Data processing

In [7]:
# scaling feature matrix
scaler = StandardScaler()
scaler.fit(X_bank)
X_bank_scaled = scaler.transform(X_bank)

### Predict utility function

In [8]:
def predict_classes(model, X_test):
    """
    A function that simulates a recently deprecated function from Keras,
    to be able to get discrete class label predictions, rather than
    real-valued outputs. That will then allow us to use metric
    calculation functions from Scikit-Learn directly.
    
    This has been adapted from Jason Brownlee.
    """
    # predict test set and convert to class label
    y_pred_raw = model.predict(X_test)
    y_pred = (y_pred_raw > 0.5).flatten().astype(int)
    
    return y_pred

### Hold-out validation

#### With simoid output

In [9]:
# Creating the model
model = Sequential()
model.add(Dense(X_bank.shape[1], input_dim=X_bank.shape[1], activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compiling the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Summary information about the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 4)                 20        
                                                                 
 dense_1 (Dense)             (None, 8)                 40        
                                                                 
 dense_2 (Dense)             (None, 1)                 9         
                                                                 
Total params: 69
Trainable params: 69
Non-trainable params: 0
_________________________________________________________________


2021-12-10 09:30:40.250358: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
# Setting up hold-out validation
X_train, X_test, y_train, y_test = train_test_split(X_bank_scaled, y_bank, test_size=0.3, random_state=None, stratify=y_bank)

# Training the model
print("Training started")
model.fit(X_train, y_train, epochs=50, batch_size=10, verbose=0)
print("Training complete")

# Performance on the training data
y_pred_tr = predict_classes(model, X_train)

# Testing model
y_pred = predict_classes(model, X_test)
print("Testing complete")

# Printing out confusion matrix and accuracy (on the training and test sets)
print("\nConfusion matrix:\n")
print(metrics.confusion_matrix(y_test, y_pred))
print("\nAccuracy (Training): %0.3f " % (metrics.accuracy_score(y_train, y_pred_tr)))
print("Accuracy (Testing):  %0.3f " % (metrics.accuracy_score(y_test, y_pred)))

Training started
Training complete
Testing complete

Confusion matrix:

[[229   0]
 [  0 183]]

Accuracy (Training): 1.000 
Accuracy (Testing):  1.000 


#### With tanh output

In [15]:
# Creating the model
model = Sequential()
model.add(Dense(X_bank.shape[1], input_dim=X_bank.shape[1], activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='tanh'))

# Compiling the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Setting up hold-out validation
X_train, X_test, y_train, y_test = train_test_split(X_bank_scaled, y_bank, test_size=0.3, random_state=None, stratify=y_bank)

# Training the model
print("Training started")
model.fit(X_train, y_train, epochs=50, batch_size=10, verbose=0)
print("Training complete")

# Performance on the training data
y_pred_tr = predict_classes(model, X_train)

# Testing model
y_pred = predict_classes(model, X_test)
print("Testing complete")

# Printing out confusion matrix and accuracy (on the training and test sets)
print("\nConfusion matrix:\n")
print(metrics.confusion_matrix(y_test, y_pred))
print("\nAccuracy (Training): %0.3f " % (metrics.accuracy_score(y_train, y_pred_tr)))
print("Accuracy (Testing):  %0.3f " % (metrics.accuracy_score(y_test, y_pred)))

Training started
Training complete
Testing complete

Confusion matrix:

[[229   0]
 [  0 183]]

Accuracy (Training): 1.000 
Accuracy (Testing):  1.000 


### Cross-validation

For this example, we wrap the Keras model into a KerasClassifier object from Scikit-Learn, so we can use it like a "normal" Scikit-Learn classifier. Thus, able to use functions like the ``cross_validate`` function from Scikit-Learn.

PS: the ``KerasClassifier`` from the tensorflow wrapper library has been deprecated, so showing use of Sci-Keras as well, below.

In [11]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Creating the model - same as above
# This needs to be in a function when using the KerasClassifier wrapper to turn this
# into a Scikit-Learn classifier
def mlp_model():
    model = Sequential()
    model.add(Dense(12, input_dim=X_bank.shape[1], activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    # Compiling the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

# Wrapping Keras model into a Scikit-Learn Classifier (estimator)
model = KerasClassifier(build_fn=mlp_model, epochs=50, batch_size=10, verbose=0)

# 5-fold cross-validation
scores = cross_validate(model, X_bank_scaled, y_bank, cv=5)
print("Accuracy:          %0.2f (+/- %0.2f)" % (scores['test_score'].mean(), scores['test_score'].std() * 2))
print("Training time (s): %0.2f (+/- %0.2f)" % (scores['fit_time'].mean(), scores['fit_time'].std() * 2))
print("Testing time (s):  %0.2f (+/- %0.2f)" % (scores['score_time'].mean(), scores['score_time'].std() * 2))

  model = KerasClassifier(build_fn=mlp_model, epochs=50, batch_size=10, verbose=0)


Accuracy:          1.00 (+/- 0.00)
Training time (s): 3.28 (+/- 0.09)
Testing time (s):  0.15 (+/- 0.11)


## Keras MLP on wine dataset

Example of multi-class classification.

### Data processing

In [12]:
# One-hot encoding of the class labels
y_wine_encoded = to_categorical(y_wine)

# scaling feature matrix
scaler = StandardScaler().fit(X_wine)
X_wine_scaled = scaler.transform(X_wine)

### Cross-validation

For this example, we also wrap the Keras model into a KerasClassifier object from Scikit-Learn, so we can use it like a "normal" Scikit-Learn classifier.

In [13]:
# Creating the model
def mlp_one_hot_model():
    model = Sequential()
    model.add(Dense(X_wine.shape[1], input_dim=X_wine.shape[1], activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    # Compiling the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

# Wrapping Keras model into a Scikit-Learn Classifier (estimator)
model = KerasClassifier(build_fn=mlp_one_hot_model, epochs=150, batch_size=5, verbose=0)

# 5-fold cross-validation
scores = cross_validate(model, X_wine_scaled, y_wine_encoded, cv=5)

print("Accuracy:          %0.2f (+/- %0.2f)" % (scores['test_score'].mean(), scores['test_score'].std() * 2))
print("Training time (s): %0.2f (+/- %0.2f)" % (scores['fit_time'].mean(), scores['fit_time'].std() * 2))
print("Testing time (s):  %0.2f (+/- %0.2f)" % (scores['score_time'].mean(), scores['score_time'].std() * 2))

  model = KerasClassifier(build_fn=mlp_one_hot_model, epochs=150, batch_size=5, verbose=0)


Accuracy:          0.97 (+/- 0.04)
Training time (s): 2.66 (+/- 0.10)
Testing time (s):  0.11 (+/- 0.01)


## Deep MLP

### Wine dataset (multi-class)
This example uses the Wine dataset, following from the shallow MLP used above on this dataset.

**PS**: also using the new KerasClassifier wrapper from SCI-KERAS here. Most of the code is identical, just the name of the argument refering to the model function has been renamed from ``build_fn`` to ``model``.

In [14]:
# USING SCI-KERAS
# !pip3 install scikeras
from scikeras.wrappers import KerasClassifier

# Creating the model
def mlp_one_hot_deep_model():
    model = Sequential()
    model.add(Dense(X_wine.shape[1], input_dim=X_wine.shape[1], activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    # Compiling the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

# Wrapping Keras model into a Scikit-Learn Classifier (estimator)
model = KerasClassifier(model=mlp_one_hot_deep_model, epochs=150, batch_size=5, verbose=0)

# 5-fold cross-validation
scores = cross_validate(model, X_wine_scaled, y_wine_encoded, cv=5)
print("Accuracy:          %0.2f (+/- %0.2f)" % (scores['test_score'].mean(), scores['test_score'].std() * 2))
print("Training time (s): %0.2f (+/- %0.2f)" % (scores['fit_time'].mean(), scores['fit_time'].std() * 2))
print("Testing time (s):  %0.2f (+/- %0.2f)" % (scores['score_time'].mean(), scores['score_time'].std() * 2))

Accuracy:          0.94 (+/- 0.10)
Training time (s): 2.81 (+/- 0.07)
Testing time (s):  0.11 (+/- 0.20)
