# Physics 494/594
## Classification

In [None]:
# %load ./include/header.py
import numpy as np
import matplotlib.pyplot as plt
import sys
from tqdm import trange,tqdm
sys.path.append('./include')
import ml4s

%matplotlib inline
%config InlineBackend.figure_format = 'svg'
plt.style.use('./include/notebook.mplstyle')
np.set_printoptions(linewidth=120)
ml4s.set_css_style('./include/bootstrap.css')
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

## Last Notebook

### [Notebook Link: 18_Introduction_to_Keras.ipynb](./18_Introduction_to_Keras.ipynb)

- Learn how to use the `keras` and `tensorflow` libraries to build sequential deep neural networks.
- Learn a simple 2D logical function

### Last Time
- Derived a new cost function for classificaiton tasks: **the binary cross-entropy**

\begin{equation}
C(\boldsymbol{w})=-\frac{1}{N} \sum_{n=1}^{N}\left[y^{(n)} \ln a^{L}+\left(1-y^{(n)}\right) \ln \left(1-a^{L}\right)\right]
\end{equation}

## Today

- Study a new type of problems involving the classification of inputs (map continue/discrete inputs into discrete outputs)
- New cost functions: binary and categorical cross-entropy

### Import tensorflow

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import datetime

### Recall our Rectangle Problem

In [None]:
L = 3
x = [0,0,0,1,1,0,1,1,0]

def print_rectangle(x):
    print(''.join([ci if (i+1)%L else ci+'\n' for i,ci in 
                 enumerate([' ▉ ' if cx else ' ░ ' for i,cx in enumerate(x)])]))
print_rectangle(x)

### Load all possible configurations

I have generated all $2^9 = 512$ configuration vectors and provided them with lables in a file `../data/rectangles_3x3.dat`.

In [None]:
data = np.loadtxt('../data/rectangles_3x3.dat')
rectangles = data[:,:-1].astype(dtype=float)
labels = data[:,-1].astype(dtype=int)

print(f'Num rectangles/total = {np.sum(labels)}/{2**(L*L)}')

This is a difficult classification problem, as we can immediately achieve **94%** accuracy by just saying there are no rectangles!
### Plot all configurations

In [None]:
%%time
fig,ax = plt.subplots(ncols=16,nrows=32,figsize=(16,32))
axes = ax.flatten()
cmaps = ['binary', 'Oranges']
for i,cax in enumerate(ax.flatten()):
    cax.matshow(rectangles[i,:].reshape(L,L), cmap=cmaps[labels[i]],vmin=0,vmax=1)
    cax.set_xticks([])
    cax.set_yticks([])  

### Creating the Dataset

We do our usual 90/10 train/test split.

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(rectangles, labels, test_size=0.1)

### Setup our network

In order to use the `binary_crossentropy` we need our output layer to have a single neuron and include `sigmoid` activation.

<!--
model = keras.Sequential(
[
    layers.Dense(128,input_shape=(L*L,),activation='selu', kernel_initializer='lecun_normal'),
    layers.Dense(256,activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(256,activation='relu',kernel_regularizer=tf.keras.regularizers.l2(l2=1e-2)),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
])
model.summary()
-->

In [None]:
model = keras.Sequential(
[
    layers.Dense(4,input_shape=(L*L,),activation='relu'),
    layers.Dense(16,activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
model.summary()

#### Choose the cost function (loss) and optimizer

**Note:** We have a new metric, which is `accuracy` or how many inputs were classified correctly.

In [None]:
model.compile(loss=keras.losses.binary_crossentropy, optimizer='adam', metrics=['accuracy']) 

#### 4. Train the model

In [None]:
batch_size = 128
epochs = 32

training_history = {}
training_history['test'] = model.fit(x_train,y_train, epochs=epochs,validation_data=(x_test,y_test), verbose=1)

In [None]:
# look into training history
fig,ax = plt.subplots(2,1, sharex=True, sharey=True, figsize=(5,5))

# summarize history for accuracy
ax[0].plot(training_history['test'].history['accuracy'])
ax[0].plot(training_history['test'].history['val_accuracy'], ls='--', color=colors[-3])
ax[0].set_ylabel('model accuracy')
ax[0].legend(['train', 'test'], loc='best')
ax[0].set_ylim(0,1)

# summarize history for loss
ax[1].plot(training_history['test'].history['loss'])
ax[1].plot(training_history['test'].history['val_loss'], ls='--', color=colors[-3])
ax[1].set_ylabel('model loss')
ax[1].set_xlabel('epoch')
ax[1].legend(['train', 'test'], loc='best');

### Investigate where the network is making mistakes

We chose a *threshold probability* of 0.5 for our classification.  Mistakes occuren when the label and prediction disagree.

In [None]:
# evaluate model
score = model.evaluate(x_test, y_test, verbose=2)

predictions = np.zeros(rectangles.shape[0],dtype=int)
predictions[np.where(model(rectangles)>=0.6)[0]] = 1

mistakes = np.where(labels != predictions)[0]
num_mistakes = len(mistakes)

print(f'Num. Mistakes  = {num_mistakes}')

### Plot a Confusion Matrix

This is the standard approach to getting a broad view of how well your classifier is doing.

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(labels, predictions)
cm_display = ConfusionMatrixDisplay(cm,display_labels=['No Rectangle','Rectangle']).plot(cmap='Blues')

### Plot the configurations that were misclassified

In [None]:
num_cols = 16
num_rows = num_mistakes//num_cols + 1
cmaps = ['binary', 'Oranges']

fig,ax = plt.subplots(ncols=num_cols,nrows=num_rows,figsize=(num_cols,num_rows))
for i,cax in enumerate(ax.flatten()):
    if i < len(mistakes):
        idx = mistakes[i]
        cax.matshow(rectangles[idx,:].reshape(L,L), cmap=cmaps[labels[idx]],vmin=0,vmax=1)
        cax.set_xticks([])
        cax.set_yticks([])
    else:
        cax.axis('off')