# Session 6

Image Processing


---


Michael de la Maza

AI/ML

Hult International Business School

Adapted from "Hands-on Machine Learning" by Aurelien Geron

### Image processing with traditional machine learning

In [1]:
#Load dataset
# Takes time to load

from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', as_frame = False, parser = 'auto')

In [None]:
# Set training and test set

X, y = mnist.data, mnist.target

print(X[:5])
print(X.shape)

print(y[:5])
print(y.shape)

# 70K images, 784 'features' (28x28 pixels)

In [None]:
# Plot one of the instances/images

import matplotlib.pyplot as plt

def plot_digit(image_data):
  image = image_data.reshape(28,28)
  plt.imshow(image, cmap="binary")
  plt.axis("off")

some_digit = X[0]
plot_digit(some_digit)
plt.show()

print(y[0])

In [None]:
# Exercise: Plot several other digits. What do you see?
# Do the classifications correspond to what you see?

In [4]:
# Define training and test set

X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]


In [5]:
# Training classifier to recognize '5'

y_train_5 = (y_train == '5')
y_test_5 = (y_test == '5')



In [6]:
# Train classifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict

forest_clf = RandomForestClassifier(random_state=42)


# Takes 1-2 minutes
y_probas_forest = cross_val_predict(forest_clf, X_train, y_train_5, cv=3, method="predict_proba")

In [None]:
# Print probabilities for first two instances

print(y_probas_forest[:2]) # [False True] format

In [8]:
# Plot precision-recall curve
# Refresher: What is precision? What is recall?
# Are the classes balanced?

from sklearn.metrics import precision_recall_curve

y_scores_forest = y_probas_forest[:, 1] # positive class probabilities

precisions_forest, recalls_forest, thresholds_forest = precision_recall_curve(y_train_5, y_scores_forest)


In [None]:
plt.plot(recalls_forest, precisions_forest, "b-", linewidth=2, label="Random Forest")
plt.xlabel("Recall", fontsize=16)
plt.ylabel("Precision", fontsize=16)
plt.legend(loc="lower left", fontsize=16)
plt.show()

In [12]:
# Train RandomForestClassifier on training and test set

# Train classifier
forest_clf = RandomForestClassifier(random_state=42)
forest_clf.fit(X_train, y_train_5)

# Predict probabilities on test set
y_probas_forest = forest_clf.predict_proba(X_test)

### Multi-class prediction

In [None]:
from sklearn.model_selection import cross_val_score

cross_val_score(forest_clf, X_train[:1000], y_train[:1000], cv=3, scoring="accuracy")

# Over 85% accuracy on all folds. Much better than 10% baseline
# Is this surprising?

In [13]:
# Confusion matrix

from sklearn.metrics import ConfusionMatrixDisplay

# Takes 1-2 minutes
y_train_pred = cross_val_predict(forest_clf, X_train, y_train, cv=3)

In [None]:
ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred)
plt.show()

In [None]:
# 5 minute exercise
# Explain this diagram and the major findings to a business audience.
# Why are the numbers along the diagonal largest?
# What numbers get confused with other numbers?

In [None]:
# Show percentages
ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred, normalize="true", values_format=".0%")

In [None]:
# 5 minute exercise
# Explain this confusion matrix
# What number is the easiest to classify (i.e., has the highest classification accuracy)? Why do you think it is easy to classify?
# Which number is the hardest to classify? Why



In [None]:
# 5 minute exercise
# Modify the ConfusionMatrixDisplay command to show additional precision

In [None]:
# Zero out diagonal to make errors clearer

sample_weight = (y_train_pred != y_train)
ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred, sample_weight=sample_weight, normalize="true", values_format=".0%")

In [17]:
# Examine 8 and 0 being confused
cl_a, cl_b = '8', '0'
X_aa = X_train[(y_train == cl_a) & (y_train_pred == cl_a)]
X_ab = X_train[(y_train == cl_a) & (y_train_pred == cl_b)]
X_ba = X_train[(y_train == cl_b) & (y_train_pred == cl_a)]
X_bb = X_train[(y_train == cl_b) & (y_train_pred == cl_b)]



In [None]:
print(X_aa[0].shape)

In [19]:
# Magic code
import numpy as np

def plot_digits(instances, images_per_row=10, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    # This is equivalent to n_rows = ceil(len(instances) / images_per_row):
    n_rows = (len(instances) - 1) // images_per_row + 1

    # Append empty images to fill the end of the grid, if needed:
    n_empty = n_rows * images_per_row - len(instances)
    padded_instances = np.concatenate([instances, np.zeros((n_empty, size * size))], axis=0)

    # Reshape the array so it's organized as a grid containing 28×28 images:
    image_grid = padded_instances.reshape((n_rows, images_per_row, size, size))

    # Combine axes 0 and 2 (vertical image grid axis, and vertical image axis),
    # and axes 1 and 3 (horizontal axes). We first need to move the axes that we
    # want to combine next to each other, using transpose(), and only then we
    # can reshape:
    big_image = image_grid.transpose(0, 2, 1, 3).reshape(n_rows * size,
                                                         images_per_row * size)
    # Now that we have a big image, we just need to show it:
    plt.imshow(big_image, cmap = mpl.cm.binary, **options)
    plt.axis("off")

In [None]:
import matplotlib as mpl

plt.figure(figsize=(8,8))
plt.subplot(221); plot_digits(X_aa[:25], images_per_row=5)
plt.subplot(222); plot_digits(X_ab[:25], images_per_row=5)
plt.subplot(223); plot_digits(X_ba[:25], images_per_row=5)
plt.subplot(224); plot_digits(X_bb[:25], images_per_row=5)

# Magic code
fig = plt.gcf()
fig.text(0.5, 0.04, 'Predicted Label', ha='center')
fig.text(0.04, 0.5, 'True Label', va='center', rotation='vertical')
fig.text(0.1, 0.8, '8', ha='center', fontsize=12)
fig.text(0.3, 0.1, '8', ha='center', fontsize=12)
fig.text(0.1, 0.3, '0', ha='center', fontsize=12)
fig.text(0.8, 0.1, '0', ha='center', fontsize=12)

plt.show()

In [21]:
# 5 minute exercise
# Modify the code to compare '3' and '5'

### Image processing with CNN

In [22]:
from numpy import mean
from numpy import std
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD

In [None]:
# load the dataset
(trainX, trainY), (testX, testY) = mnist.load_data()

In [None]:
# 5 minute exercise
# What is the size of trainX, trainY, testX, testY

In [24]:
# preprocessing - minimal
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))
trainY = to_categorical(trainY)
testY = to_categorical(testY)

In [25]:
# normalize pixel values to 0 to 1
trainX = trainX.astype('float32') / 255
testX = testX.astype('float32') / 255

In [26]:
# define the model
# in keras, every layer is defined
# then all layers are combined into a neural network

model = Sequential()
# 32 filters 3x3 convolution of 28x28 image
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
# Takes max value from 2x2 grid
model.add(MaxPooling2D((2, 2)))
# Previous two layers output 2D, this converts to 1D
model.add(Flatten())
# 'Standard' layer with 100 neurons, densely connected
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
# 'Standard' layer with 10 neurons, softmax = multi-class probability
model.add(Dense(10, activation='softmax'))

In [27]:
# optimizer: how to update weights
opt = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# train model - takes 5+ minutes
model.fit(trainX, trainY, epochs=10, batch_size=32, validation_data=(testX, testY))

In [None]:
# compute accuracy
_, acc = model.evaluate(testX, testY, verbose=0)
print('> %.3f' % (acc * 100.0))

In [None]:
# 5 minute exercise
# Compare to randomforest model

In [None]:
# 10 minute exercise
# Modify the CNN. What happens to the accuracy?
# Try: Reducing the number of filters from 32 to 4
# model.add(Conv2D(32, (3, 3)

In [None]:
# The End