## Exercise 2.3: MLP basics with sklearn

ITU KSADMAL1KU-NLP - Advanced Machine Learning for NLP in KCS 2024

by Stefan Heinrich, Bertram Højer, Christian H. Rasmussen, & material by Kevin Murphy.

All info and static material: https://learnit.itu.dk/course/view.php?id=3024579

-------------------------------------------------------------------------------

In [None]:
# @title #### import dependencies

from IPython.display import display
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn import model_selection
from sklearn import metrics
from sklearn import neural_network
import seaborn as sns

#### Loading Iris dataset with seaborn


In [None]:
iris_df = sns.load_dataset("iris")

display(iris_df.head(n=3))

plot_raw = sns.scatterplot(x=iris_df['sepal_length'], y=iris_df['sepal_width'],
                           hue=iris_df['species'])

#### Run logistic regression via sklearn

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html#sklearn.linear_model.LogisticRegression

In [None]:
# Create an instance of Logistic Regression Classifier and fit the data.
r_strength = 100000  # Inverse of regularization strength in [1,Inf]
l_solver = 'newton-cg'  # Optimisation algorithm in ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
m_class = 'multinomial'  # see documentation

logreg = LogisticRegression(C=r_strength, solver=l_solver,
                            multi_class=m_class)

# directly access iris_df['sepal_length'] and iris_df['sepal_width']
iris_df_inputs = iris_df[['sepal_length', 'sepal_width']]
display(iris_df.head(n=3))
# other options:
#iris_inputs = iris_df.values[:,:2]
#iris_inputs = iris_df.iloc[:,:2]

#convert 'species' from labels to ids
iris_classes = list(set(iris_df['species']))
iris_classes_dict = {iris_classes[k]: k for k in range(len(iris_classes))}
iris_species_ids = iris_df['species'].replace(iris_classes_dict)

# we only take the first two features
#logreg.fit(iris_data_inputs[:, :2], iris_data_targets_ids)
logreg.fit(iris_df_inputs, iris_species_ids)

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = iris_df['sepal_length'].min() - .5, iris_df['sepal_length'].max() + .5
y_min, y_max = iris_df['sepal_width'].min() - .5, iris_df['sepal_width'].max() + .5
h = .02  # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
print(xx)
print(yy)
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])  # this is not a clean way to do and will raise a warning (we only process values here, but trained with dataframes that had names), but for our purpose that's fine
Z = Z.reshape(xx.shape)

plot_rsk = sns.scatterplot(x=iris_df['sepal_length'], y=iris_df['sepal_width'],
                           hue=iris_df['species'], cmap=plt.cm.Paired)
plot_rsk.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.1)


#### Perform supervised training of an MLP via sklearn

https://scikit-learn.org/stable/modules/neural_networks_supervised.html

In [None]:
n_datapoints = len(iris_df)
# here we define, which variables we want to use as input for learning
iris_df_inp = iris_df[
    ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
iris_df_out = iris_df['species']

p_random_seed = 13
p_datasplit = 0.2  # how much percent of the data is used for testing
random.seed(p_random_seed)  # init the random generator with a seed

# for the split we use the sklearn model_selection module
iris_df_inp_train, iris_df_inp_test, iris_df_out_train, iris_df_out_test = model_selection.train_test_split(
    iris_df_inp, iris_df_out, test_size=p_datasplit,
    random_state=p_random_seed)
#display(iris_df_inp_train.head(n=3), iris_df_out_train.head(n=3))
#display(iris_df_inp_test.head(n=3), iris_df_out_test.head(n=3))

In [None]:
# parameters for the MLP shape
hidden_layer_sizes = (5, 4)  # tuple of sizes of hidden layers
activation ='relu'  # activation_fkt, {‘identity’, ‘logistic’, ‘tanh’, ‘relu’}

#parameters for the learning
solver = 'sgd'  # optimizer, {‘lbfgs’, ‘sgd’, ‘adam’}
learning_rate_init = 0.01  # learning_rate, try something smaller first!
learning_rate = 'constant'  # learning_rate_type, {‘constant’, ‘invscaling’, ‘adaptive’}
max_iter = 20  # epoches, number of training interations
batch_size = 1  # number of training examples per epoch
momentum = 0.95  # [0,1]

clf_mlp = neural_network.MLPClassifier(
    random_state=p_random_seed, solver=solver, activation=activation,
    hidden_layer_sizes=hidden_layer_sizes, learning_rate_init=learning_rate_init,
    max_iter=max_iter, learning_rate=learning_rate, momentum=momentum,
    batch_size=batch_size, nesterovs_momentum=True, alpha=1e-05,
    beta_1=0.9, beta_2=0.999, epsilon=1e-08, early_stopping=False,
    n_iter_no_change=10, power_t=0.5, tol=0.0001, validation_fraction=0.1,
    shuffle=True, verbose=False, warm_start=False)

clf_mlp.fit(iris_df_inp_train, iris_df_out_train)
test_accuracy = clf_mlp.score(iris_df_inp_test, iris_df_out_test)
print("Prediction accuracy:", test_accuracy)

## **uncomment this for iterative fitting (to see every epoch)
# for k in range(max_iter):
#     clf_mlp.partial_fit(iris_df_inp_train, iris_df_out_train,
#                     classes=list(set(iris_df['species'])))
#     test_accuracy = clf_mlp.score(iris_df_inp_test, iris_df_out_test)
#     print("Prediction epoch: ",k," accuracy:", test_accuracy)


#### Analyse the mlp training results

The following plots are examples for getting *some* understanding of the model performance.

In [None]:
test_pred = clf_mlp.predict(iris_df_inp_test)
print(metrics.classification_report(iris_df_out_test, test_pred))

confusion_matrix = metrics.confusion_matrix(iris_df_out_test, test_pred)
plot_tree_cm = sns.heatmap(confusion_matrix, annot=True, center=0)
plot_tree_cm.set_xlabel('predicted label')
plot_tree_cm.set_ylabel('true label')

In [None]:
#plot the exact predictions
pred = clf_mlp.predict(iris_df_inp_test)
print("Prediction:", list(zip(*[pred, iris_df_out_test])))

proba = clf_mlp.predict_proba(iris_df_inp_test)
print("Prediction probabilities:", proba)

#plot the resulting weight matrix
print("Weights:", display([coef.shape for coef in clf_mlp.coefs_],  clf_mlp.coefs_))