# Confuison Matrix

* `True positive` = in binary classification, when the model should predict 1 and the truth is 1
* `True negative` = in binary classification, when the model should predict 0 and the truth is 0
* `False positive` = in binary classification, when the model should predict 1 and the truth is 0
* `False negative` = in binary classification, when the model should predict 0 and the truth is 1

In [None]:
from sklearn.datasets import make_circles

# Make 1000 examples
n_samples = 1000

# Create circles
X, y = make_circles(n_samples,
                    noise=0.03,
                    random_state=42)

# Slipt into training data and test data
X_train, y_train = X[:800], y[:800]
X_test, y_test = X[800:], y[800:]

In [None]:
import tensorflow as tf

# Set the random seed
tf.ramdom.seed(42)

# 1.Create the model usingthe Sequential API
model_1 = tf.keras.Sequential([
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid'),
])

#2. Compile the model
model_1.compile(loss= "binary_crossentropy",
                optimizers=tf.keras.optimizers.Adam(lr=0.02),
                matrics=["accuracy"])

# 3. FIt the model
model_1.fit(X_train, y_train, epochs=20)

In [None]:
# Create a confusion matrix
from sklearn.metrics import confusion_matrix

# Make predictions
y_preds =  model_1.predict(X_test)

# Create confusion matrix
confusion_matrix(y_test, y_preds)

Looks like our predictions array has come out in prediction propability form. The standard output from the sigmoid (or softmax) activation functions. So now we should convert them.

In [None]:
# Convert the predictions probabilities to binary format and view the first 10
tf.round(y_preds)[:10]

In [None]:
# Create confusion matrix
confusion_matrix(y_test, tf.round(y_preds))

We now will prettify the confusion matrix

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt


figsize = (10, 10)

# Create the confusion matrix
cm = confusion_matrix(y_test, tf.round(y_preds))
cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
n_classes = cm.shape[0]

# Lets prettify it
fig, ax = plt.subplots(figsize=figsize)

# Create a matrix plot
cax = ax.matshow(cm, cmap=plt.cm.Blues)
fig.colorbar(cax)

# Create classes
classes = False

if classes:
    labels = classes
else:
    labels = np.arange(cm.shape[0])

# Label the axis
ax.set(title="Confusion Matrix",
       xlabel="Predicted Label",
       ylabel="True Label",
       xticks =np.arange(n_classes),
       yticks =np.arange(n_classes),
       xticklabels = labels,
       yticklabels = labels)

#Set x-axis labels to the bottom
ax.xaxis.set_label_position("bottom")
ax.xaxis.set_tick_bottom()

# Adjust label size 
ax.xaxis.label.set_size(20)
ax.yaxis.label.set_size(20)
ax.title.set_size(20)

# Set the threshold for different colors
threshold = (cm.max() + cm.min()) / 2.

# Plot the text on each cell
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, f"{cm[i, j]} ({cm_norm[i,j]*100:.1f}%)",
    horizontalalignment="center",
    color="white" if cm[i, j] > threshold else "black",
    size=15)

# Working with a larger Example (Multiclass Classification)

When you have more than two classes as an option, its known as `multiclass classification`.

* This means if you have 3 different classes, its multiclass classification.
* It also means that if you have 100 different classes its `multiclass classification`

To practice multiclass classification, we are going to build a neural network to classify images of different items of clothing.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist

# The data has already been sorted into training and testing for us
(train_data, train_labels), (test_data, test_labels) = fashion_mnist.load_data()

In [None]:
# Show the first training example
print(f"Training sample:\n{train_data[0]}\n")
print(f"Training label:\n{train_labels[0]}\n")

In [None]:
# Check the shape of a single example
train_data[0].shape, train_labels[0].shape

In [None]:
# Plot a single sample
import matplotlib.pyplot as plt

plt.imread(train_data[0])

In [None]:
# Check out samples label
train_labels[7]

In [None]:
# Create a small list so we can index into our training labels so they are human-readable
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankele boot"]

len(class_names)

In [None]:
# Plot an example image and its labels
index_of_choise = 2000
plt.imshow(train_data[index_of_choise], cmap=plt.cm.binary)
plt.title(class_names[train_labels[index_of_choise]])

In [None]:
# Plot multiple random images of fashion MNIST
import random
plt.figure(figsize=(7,7))
for i in range(4):
    ax = plt.subplot(2, 2, i+1)
    rand_index = random.choise(range(len(train_data)))
    plt.imshow(train_data[rand_index], cmap=plt.cm.binary)
    plt.title(class_names[train_labels[rand_index]])
    plt.axis(False)

## Building a multiclass classification model

For our multiclass classification model, we can use a simialar architecture to our binary classifiers, however we are going to have to tweak a few things:

* Input shape = 28x28 (the shape of one image)
* Output shape = 10 (one per class of clothing)
* Loss function = tf.keras.losses.CategoricalCrossentropy()
    * If your labels are one-hot encoded use CategoricalCrossentropy()
    * If your labels are intiger form use SparseCategoricalCrossentropy()
* Output layer activation = Softmax (not Sigmoid)

* `Flatten` = flatten the input

In [None]:
# OUR data needs to be flattened from 28*28 to 784
flatten_model= tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=(28,28))])
flatten_model.output_shape

In [None]:
# Set random seed
tf.random.set_seed(42)

# Create the model
model_2 = tf.keras.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(10, activation=tf.keras.activations.softamax),
])

#2. Compile the model
model_2.compile(loss= tf.keras.losses.SparseCategoricalCrossentropy(),
                optimizers=tf.keras.optimizers.Adam(),
                matrics=["accuracy"])

# 3. FIt the model
non_norm_history = model_2.fit(train_data, train_labels, epochs=10, validation_data=(test_data))

In [None]:
# Check the model summary
model_2.summary()

In [None]:
# Check the min and the max values of the training data
train_data.min(), train_data.max()

Neural networks prefer data to be scaled (or normalized), this means they like to have the numbers in the tensors they try to find patterns between 0 & 1

In [None]:
# We can get our training and testing data between 0 & 1 by dividing by the maximum
train_data_norm = train_data/255.0
test_data_norm = test_data/255.0

#Check the min and max values of the scaled training data
train_data_norm.min(), train_data_norm.max()

In [None]:
# Now that our data is normalized , lets build a model to find patterns in it


# Set random seed
tf.random.set_seed(42)

# Create the model
model_3 = tf.keras.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(10, activation=tf.keras.activations.softamax),
])

#2. Compile the model
model_3.compile(loss= tf.keras.losses.SparseCategoricalCrossentropy(),
                optimizers=tf.keras.optimizers.Adam(),
                matrics=["accuracy"])

# 3. FIt the model
norm_history = model_3.fit(train_data_norm, train_labels)

## Compare the loss curves between the 2 models

In [None]:
import pandas as pd
# Plot non normalized data loss curves
pd.DataFrame(non_norm_history.history).plot(title="Non normalized data")
# Plot normalized data loss curves
pd.DataFrame(norm_history.history).plot(title="Normalized data")

* Note : The same model with slightly different data can produce dramatically different results. So when you are comparing the models, its important to make sure that you are comparing them on the same criteria (e.g. same architecture but different data or same data but different architecture)

## Finding the ideal learning rate

In [None]:

# Set random seed
tf.random.set_seed(42)

# Create the model
model_4 = tf.keras.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(10, activation=tf.keras.activations.softamax),
])

#2. Compile the model
model_4.compile(loss= tf.keras.losses.SparseCategoricalCrossentropy(),
                optimizers=tf.keras.optimizers.Adam(),
                matrics=["accuracy"])

# Create the learning rate callback
lr_scheduler = tf.keras.callback.LearningRateScheduler(lambda epoch: 1e-3 * 10** (epoch/20))

# 3. FIt the model
find_lr_history = model_4.fit(train_data_norm, 
                              train_labels, 
                              epochs=40, 
                              validation_data=(test_data_norm, test_labels), 
                              callbacks=[lr_scheduler])

In [None]:
# Plot the learning rate decay curve
import numpy as np
import matplotlib.pyplot as pl

lrs = 1e-3 * (10**(tf.range(40)/20))
plt.semilogx(lrs, find_lr_history.history["loss"])
plt.xlabel("Learning Rate")
plt.ylabel("Loss")
plt.title("Finding the ideal learning rate")

Lets refit our model with the ideal learning rate

In [None]:
# Set random seed
tf.random.set_seed(42)

# Create the model
model_4 = tf.keras.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(10, activation=tf.keras.activations.softamax),
])

#2. Compile the model
model_4.compile(loss= tf.keras.losses.SparseCategoricalCrossentropy(),
                optimizers=tf.keras.optimizers.Adam(lr=0.001),
                matrics=["accuracy"])


# 3. FIt the model
find_lr_history = model_4.fit(train_data_norm, 
                              train_labels, 
                              epochs=40, 
                              validation_data=(test_data_norm, test_labels))

## Evaluate our multi-class classification model

To evaluate our multi-class classification model we could:
* Evaluate its performance using other classification metrics
(such as confusion matrix)
* Asses some of its predictions (through visualization)
*Imporove its results (by traing it for longer or changing the architecture)
* Save amd export it to use in an application

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15):
    # Create the confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
    n_classes = cm.shape[0]

    # Lets prettify it
    fig, ax = plt.subplots(figsize=figsize)

    # Create a matrix plot
    cax = ax.matshow(cm, cmap=plt.cm.Blues)
    fig.colorbar(cax)

    # Set labels to be classes
    if classes:
        labels = classes
    else:
        labels = np.arange(cm.shape[0])

    # Label the axis
    ax.set(title="Confusion Matrix",
           xlabel="Predicted Label",
           ylabel="True Label",
           xticks=np.arange(n_classes),
           yticks=np.arange(n_classes),
           xticklabels=labels,
           yticklabels=labels)

    # Set x-axis labels to the bottom
    ax.xaxis.set_label_position("bottom")
    ax.xaxis.set_tick_bottom()

    # Adjust label size
    ax.xaxis.label.set_size(text_size)
    ax.yaxis.label.set_size(text_size)
    ax.title.set_size(text_size)

    # Set the threshold for different colors
    threshold = (cm.max() + cm.min()) / 2.

    # Plot the text on each cell
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, f"{cm[i, j]} ({cm_norm[i,j]*100:.1f}%)",
                 horizontalalignment="center",
                 color="white" if cm[i, j] > threshold else "black",
                 size=text_size)
    

In [None]:
class_names

In [None]:
# Make some predictions with our model
y_probs = model_4.predict(test_data_norm)

# View the first 5 predictions
y_preds[:5]

In [None]:
y_probs[0], tf.argmax(y_probs[0]), class_names[tf.argmax(y_probs[0])]

In [None]:
# Convert the predictions probabilities to initgers
y_preds = y_probs.argmax(axis = 1)

# View the 10 predictions labels
y_preds[:10]

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_true=test_labels,
                 y_pred=y_preds)

In [None]:
# Make a prettier confusion matrix
make_confusion_matrix(y_true=test_labels,
                      y_pred=y_preds,
                      classes=class_names,
                      figsize=(15,15),
                      text_size=10)