# Assignment Notebook NN - fully connected

Import modules

In [1]:
# data tools
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd 

# sklearn tools
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

# tf tools
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import SGD

import cv2

# Import sklearn metrics
from sklearn import metrics
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer

2022-05-17 18:28:23.151923: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-17 18:28:23.151959: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Define plot history function 

In [2]:
def plot_history(H, epochs):
    plt.style.use("seaborn-colorblind")

    plt.figure(figsize=(12,6))
    plt.subplot(1,2,1)
    plt.plot(np.arange(0, epochs), H.history["loss"], label="train_loss")
    plt.plot(np.arange(0, epochs), H.history["val_loss"], label="val_loss", linestyle=":")
    plt.title("Loss curve")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.tight_layout()
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(np.arange(0, epochs), H.history["accuracy"], label="train_acc")
    plt.plot(np.arange(0, epochs), H.history["val_accuracy"], label="val_acc", linestyle=":")
    plt.title("Accuracy curve")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.tight_layout()
    plt.legend()
    plt.show()

Load data 

In [2]:
# > Load data 
def load_data():
    # Print info 
    print("[info] Loading data...")
    # Load data 
    X, y = fetch_openml("mnist_784", return_X_y=True)
    # Get label names 
    label_names = sorted(y.unique())
    # Print info
    print("[info] Data loaded")
    
    return (X, y, label_names)

In [3]:
X, y, label_names = load_data()

[info] Loading data...
[info] Data loaded


In [5]:
type(y)

pandas.core.series.Series

In [7]:
y[:10]

0    5
1    0
2    4
3    1
4    9
5    2
6    1
7    3
8    1
9    4
Name: class, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

In [5]:
X.shape

(70000, 784)

In [6]:
type(X)

pandas.core.frame.DataFrame

In [6]:
sorted(y.unique())

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

Prepare data 

In [4]:
# > Prepare data
def prep_data(X, y):
    # Print info 
    print("[info] Processing data...")
    # Converting X and y to numpy arrays
    X = np.array(X)
    y = np.array(y)
    # Splitting data 
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        random_state=42,
                                                        test_size = 0.2)
    # Scaling the features
    X_train_scaled = X_train / 255
    X_test_scaled = X_test / 255
    
    # I don't really remember why I do this 
    # Is it so I can use a logistic regression as an activation function on categorical data with more than 1 categories? 
    y_train_binarized = LabelBinarizer().fit_transform(y_train)
    y_test_binarized = LabelBinarizer().fit_transform(y_test)
    
    return (X_train_scaled, X_test_scaled, y_train_binarized, y_test_binarized)

In [8]:
# processing data
X_train, X_test, y_train, y_test = prep_data(X, y)

[info] Processing data...


Build model 

In [5]:
# > Create model
def create_model():
    # Print info 
    print("[INFO] Initializing model")
    
    # define simple architecture 784x256x128x10
    model = Sequential()
    model.add(Dense(256, input_shape=(784,), activation="relu"))
    model.add(Dense(128, activation="relu"))
    model.add(Dense(10, activation="softmax")) #softmax generalises LogReg for multiclass tasks
    
    # define the gradient descent
    sgd = SGD(0.01)
    # compile model
    model.compile(loss="categorical_crossentropy",
                  optimizer=sgd,
                  metrics=["accuracy"])

    # define the gradient descent
    sgd = SGD(0.01)
    # compile model
    model.compile(loss="categorical_crossentropy",
                  optimizer=sgd,
                  metrics=["accuracy"])
    
    # Print info
    print("[INFO] Model summary:")
    model.summary()
    
    return model

In [21]:
model = create_model()

[INFO] Initializing model
[INFO] Compiling model
[INFO] Model compiled!
[INFO] Model summary:
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 256)               200960    
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 10)                1290      
                                                                 
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
_________________________________________________________________


In [9]:
# To keep me from adding to the model when I do not want to 
tf.keras.backend.clear_session() # Not sure if this smar to have in the final product?

In [10]:
# Define model
#model = Sequential()

# First set of layers CONV => ReLU => MAXPOOL
#model.add(Conv2D(32, (3,3),
                #padding = "same", 
                #input_shape = (784,)))
#model.add(Activation("relu"))
#model.add(MaxPooling2D(pool_size = (2,2),
                #      strides = (2,2)))

# Second set of layers CONV => RelU => MAXPOOL 
#model.add(Conv2D(50, (5,5),
                #padding="same"))
#model.add(Activation("relu"))
#model.add(MaxPooling2D(pool_size = (2,2),
                 #      strides = (2,2)))

# FC => ReLU
#model.add(Flatten())
#model.add(Dense(512))
#model.add(Activation("relu"))

# Softmax classifier 
#model.add(Dense(10))
#model.add(Activation("softmax"))

In [11]:
# define simple architecture 784x256x128x10
model = Sequential()
model.add(Dense(256, input_shape=(784,), activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(10, activation="softmax")) #softmax generalises LogReg for multiclass tasks

2022-05-15 16:06:23.053186: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-05-15 16:06:23.053243: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (j-71038-job-0): /proc/driver/nvidia/version does not exist
2022-05-15 16:06:23.053702: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Compile model 

In [13]:
# define the gradient descent
sgd = SGD(0.01)
# compile model
model.compile(loss="categorical_crossentropy",
              optimizer=sgd,
              metrics=["accuracy"])

Model summary

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               200960    
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 10)                1290      
                                                                 
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
_________________________________________________________________


Train model 

In [15]:
history = model.fit(X_train, y_train,
                    validation_data = (X_test, y_test),
                    epochs = 10,
                    validation_split = 0.1,
                    batch_size = 32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Evaluate model

In [6]:
# > Evaluate model
def evaluate(model, X_test, y_test, label_names):
    # evaluate network
    predictions = model.predict(X_test, batch_size=32)
    # print classification report
    print(predictions[0])
    # print classification report
    report = classification_report(y_test.argmax(axis=1), 
                                   predictions.argmax(axis=1), 
                                   target_names=label_names)
    # Print metrics
    print(report)
    # Save metrics
    #outpath = os.path.join("output", "tensorflow_report.txt")
    #with open(outpath, "w") as f:
    #    f.write(report)

In [17]:
evaluate(model, X_test, y_test, label_names)

[1.3566627e-08 2.0288253e-09 6.0252928e-06 6.0445187e-04 3.6819141e-09
 1.1664617e-05 5.8208915e-10 1.2170537e-08 9.9937564e-01 2.2069651e-06]
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      1343
           1       0.97      0.98      0.98      1600
           2       0.96      0.96      0.96      1380
           3       0.95      0.95      0.95      1433
           4       0.96      0.95      0.96      1295
           5       0.96      0.95      0.96      1273
           6       0.97      0.97      0.97      1396
           7       0.97      0.97      0.97      1503
           8       0.96      0.94      0.95      1357
           9       0.94      0.96      0.95      1420

    accuracy                           0.96     14000
   macro avg       0.96      0.96      0.96     14000
weighted avg       0.96      0.96      0.96     14000



__Try to also load cifar 10__

In [8]:
# > Load data 
def load_data():
    # Print info 
    print("[info] Loading data...")
    # Load data 
    X, y = fetch_openml("mnist_784", return_X_y=True)
    # Get label names 
    label_names = sorted(y.unique())
    # Print info
    print("[info] Data loaded")
    
    return (X, y, label_names)

In [10]:
# > Prepare data
def normalize(X_train, X_test):
    # Scaling the features
    X_train_scaled = X_train / 255
    X_test_scaled = X_test / 255
    
    return (X_train_scaled, X_test_scaled)

In [8]:
def load_mnist():
    # Print info 
    print("[info] Loading data...")
    # Load data 
    X, y = fetch_openml("mnist_784", return_X_y=True)
    # Get label names 
    label_names = sorted(y.unique())
    # Print info
    print("[info] Data loaded")
    
    # Converting X and y to numpy arrays
    X = np.array(X)
    y = np.array(y)
    
    # Splitting data 
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        random_state=42,
                                                        test_size = 0.2)
    return (X_train, X_test, y_train, y_test, label_names)
    

In [8]:
# > Load cifar-10 data
def load_cifar():
    # Print info
    print("[INFO] loading data...")
    # Load data
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    # Initialize label names
    label_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
    # Print info
    print("[INFO] Data loaded")
    
    return (X_train, X_test, y_train, y_test, label_names)

In [9]:
X_train, X_test, y_train, y_test, label_names = load_cifar()

[INFO] loading data...
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[INFO] Data loaded


In [10]:
X_train.shape

(50000, 32, 32, 3)

In [12]:
type(y_train)

numpy.ndarray

In [14]:
y_train.shape

(50000, 1)

In [15]:
y[:10]

0    5
1    0
2    4
3    1
4    9
5    2
6    1
7    3
8    1
9    4
Name: class, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

In [88]:
type(X_train)

numpy.ndarray

In [89]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

Create function that reshapes cifar 10

In [38]:
import numpy as np

array = np.empty((0,0), int)

array = np.append(array, np.array([[1,3,5]]), axis=0)
array = np.append(array, np.array([[2,4,6]]), axis=0)

print(array)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 0 and the array at index 1 has size 3

In [24]:
array = np.empty((0,3), int)

In [39]:
X_train.shape

(50000, 32, 32, 3)

In [48]:
X_flat = []
for image in tqdm(X_train[:10]):
    # Create dataset for logistic regression
    gray = cv2.bitwise_not(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
    X_flat.append(gray.flatten())

100%|██████████| 10/10 [00:00<00:00, 8456.26it/s]


In [49]:
print(X_flat)

[array([193, 210, 208, ...,  81, 143, 165], dtype=uint8), array([ 78, 120, 154, ..., 119, 118, 118], dtype=uint8), array([  0,   2,   2, ..., 171, 171, 170], dtype=uint8), array([234, 225, 224, ..., 213, 204, 195], dtype=uint8), array([ 71,  73,  66, ..., 176, 180, 178], dtype=uint8), array([147, 156, 153, ..., 204, 200, 195], dtype=uint8), array([ 90, 143, 124, ..., 157, 159, 128], dtype=uint8), array([220, 218, 211, ..., 128, 130, 133], dtype=uint8), array([ 64,  66,  68, ..., 156, 155, 155], dtype=uint8), array([133, 156, 166, ..., 146, 145, 145], dtype=uint8)]


In [51]:
X_flat

[array([193, 210, 208, ...,  81, 143, 165], dtype=uint8),
 array([ 78, 120, 154, ..., 119, 118, 118], dtype=uint8),
 array([  0,   2,   2, ..., 171, 171, 170], dtype=uint8),
 array([234, 225, 224, ..., 213, 204, 195], dtype=uint8),
 array([ 71,  73,  66, ..., 176, 180, 178], dtype=uint8),
 array([147, 156, 153, ..., 204, 200, 195], dtype=uint8),
 array([ 90, 143, 124, ..., 157, 159, 128], dtype=uint8),
 array([220, 218, 211, ..., 128, 130, 133], dtype=uint8),
 array([ 64,  66,  68, ..., 156, 155, 155], dtype=uint8),
 array([133, 156, 166, ..., 146, 145, 145], dtype=uint8)]

In [55]:
X_flat_df = pd.DataFrame(X_flat)

In [61]:
X_flat_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,-63,-46,-48,-53,-71,-87,-102,-106,-113,-116,...,-84,-85,-92,-83,-65,-76,122,81,-113,-91
1,78,120,-102,-102,122,86,61,54,101,-117,...,-23,-46,-66,-94,-117,125,122,119,118,118
2,0,2,2,2,2,2,2,2,2,2,...,-77,-82,-79,-68,-65,-71,-79,-85,-85,-86
3,-22,-31,-32,-34,-36,-34,-35,-20,-24,-27,...,-75,-58,-75,-62,-68,-69,-53,-43,-52,-61
4,71,73,66,58,54,55,53,55,55,56,...,-90,-87,-84,-81,-81,-77,-76,-80,-76,-78


In [63]:
32*32

1024

In [99]:
def reshape_data(X):
    # Define empthy list 
    X_flat = []
    # For each image in the file...
    for image in tqdm(X):
        # Convert to grayscale
        gray = cv2.bitwise_not(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
        # Flatten and append to list 
        X_flat.append(np.array(gray.flatten()))
    # Convert to dataframe
    X_flat_df = np.array(X_flat)
    
    return X_flat_df

In [67]:
X_flat = reshape_data(X_train[:10])

100%|██████████| 10/10 [00:00<00:00, 6685.22it/s]


In [68]:
X_flat

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,-63,-46,-48,-53,-71,-87,-102,-106,-113,-116,...,-84,-85,-92,-83,-65,-76,122,81,-113,-91
1,78,120,-102,-102,122,86,61,54,101,-117,...,-23,-46,-66,-94,-117,125,122,119,118,118
2,0,2,2,2,2,2,2,2,2,2,...,-77,-82,-79,-68,-65,-71,-79,-85,-85,-86
3,-22,-31,-32,-34,-36,-34,-35,-20,-24,-27,...,-75,-58,-75,-62,-68,-69,-53,-43,-52,-61
4,71,73,66,58,54,55,53,55,55,56,...,-90,-87,-84,-81,-81,-77,-76,-80,-76,-78
5,-109,-100,-103,-128,121,106,-105,-49,-41,-44,...,-26,-29,-31,-33,-39,-44,-48,-52,-56,-61
6,90,-113,124,113,111,101,83,66,51,75,...,89,115,-115,-126,-125,-115,-116,-99,-97,-128
7,-36,-38,-45,-78,-79,-47,-47,-61,-74,-79,...,-118,-112,120,116,126,108,105,-128,-126,-123
8,64,66,68,68,65,62,63,71,69,77,...,39,44,51,70,-117,-85,-95,-100,-101,-101
9,-123,-100,-90,-98,-100,121,86,87,115,-90,...,-95,-96,-99,-101,-103,-106,-108,-110,-111,-111


In [69]:
X_train = reshape_data(X_train)

100%|██████████| 50000/50000 [00:00<00:00, 86684.43it/s]


In [70]:
X_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,-63,-46,-48,-53,-71,-87,-102,-106,-113,-116,...,-84,-85,-92,-83,-65,-76,122,81,-113,-91
1,78,120,-102,-102,122,86,61,54,101,-117,...,-23,-46,-66,-94,-117,125,122,119,118,118
2,0,2,2,2,2,2,2,2,2,2,...,-77,-82,-79,-68,-65,-71,-79,-85,-85,-86
3,-22,-31,-32,-34,-36,-34,-35,-20,-24,-27,...,-75,-58,-75,-62,-68,-69,-53,-43,-52,-61
4,71,73,66,58,54,55,53,55,55,56,...,-90,-87,-84,-81,-81,-77,-76,-80,-76,-78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,86,81,76,75,76,76,72,68,68,68,...,-61,-57,-53,-51,-53,-49,-47,-46,-43,-44
49996,43,41,39,37,37,40,45,48,46,44,...,117,111,109,103,107,102,99,101,96,97
49997,76,76,75,73,73,73,72,71,69,69,...,-73,-74,-83,-83,-77,-74,-68,-58,-48,-36
49998,38,41,42,42,43,45,45,45,44,45,...,87,80,76,75,72,74,77,74,71,70


In [108]:
    # Loading data
    X_train, X_test, y_train, y_test, label_names = load_cifar()
    # Reshape data 
    X_train = reshape_data(X_train)
    X_test = reshape_data(X_test)
    # normalize data
    X_train, X_test = normalize(X_train, X_test)

[INFO] loading data...
[INFO] Data loaded


100%|██████████| 50000/50000 [00:00<00:00, 72180.71it/s]
100%|██████████| 10000/10000 [00:00<00:00, 75935.21it/s]


In [109]:
type(X_train)

numpy.ndarray

In [113]:
def train_model(X_train, y_train):
    # Print info
    print("[info] Training model...")
    # Initialyzing model
    clf = LogisticRegression(multi_class="multinomial")
    # Training model 
    clf = LogisticRegression(penalty="none",
                             tol=0.1,
                             solver="saga",
                             multi_class="multinomial").fit(X_train, y_train.ravel())
    return clf

In [114]:
train_model(X_train, y_train)

[info] Training model...


In [116]:
type(y_train)

numpy.ndarray

In [119]:
# > Create model
def create_model(data_width):
    # Print info 
    print("[INFO] Initializing model")
    
    # define simple architecture 784x256x128x10
    model = Sequential()
    model.add(Dense(256, input_shape=(data_width,), activation="relu"))
    model.add(Dense(128, activation="relu"))
    model.add(Dense(10, activation="softmax")) #softmax generalises LogReg for multiclass tasks
    
    # define the gradient descent
    sgd = SGD(0.01)
    # compile model
    model.compile(loss="categorical_crossentropy",
                  optimizer=sgd,
                  metrics=["accuracy"])

    # define the gradient descent
    sgd = SGD(0.01)
    # compile model
    model.compile(loss="categorical_crossentropy",
                  optimizer=sgd,
                  metrics=["accuracy"])
    
    # Print info
    print("[INFO] Model summary:")
    model.summary()
    
    return model

In [120]:
# Building model
model = create_model(1024)
# Train model with extra validation split
history = model.fit(X_train, y_train,
                    validation_data = (X_test, y_test),
                    epochs = 3,
                    validation_split = 0.1,
                    batch_size = 32,
                    verbose = 1)

[INFO] Initializing model
[INFO] Model summary:
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 256)               262400    
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 10)                1290      
                                                                 
Total params: 296,586
Trainable params: 296,586
Non-trainable params: 0
_________________________________________________________________
Epoch 1/3


ValueError: in user code:

    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 860, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py", line 918, in compute_loss
        return self.compiled_loss(
    File "/opt/conda/lib/python3.9/site-packages/keras/engine/compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/opt/conda/lib/python3.9/site-packages/keras/losses.py", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "/opt/conda/lib/python3.9/site-packages/keras/losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/opt/conda/lib/python3.9/site-packages/keras/losses.py", line 1789, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/opt/conda/lib/python3.9/site-packages/keras/backend.py", line 5083, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 10) are incompatible


In [122]:
X_train.shape[1]

1024