# CONVOLUTIONAL NEURAL NETWORK NB (CNN-First on DL)  

## libraries and env configuration 


In [1]:
#packages from tensor flow
import tensorflow as Tf

# tensor flow for optimizing the model 


# basis packages 
import os as os 
import matplotlib.pyplot as plt
import numpy as np
import scipy 
import ssl

In [2]:
physicalDevice = Tf.config.experimental.list_physical_devices('GPU')
Tf.config.experimental.set_memory_growth(physicalDevice[0], True)
print("Num GPUs Available and used :  ", len(physicalDevice))


Num GPUs Available and used :   1


In [3]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2' # Suppress TensorFlow logging (1)	

## data preprocessing 

In [4]:
# Load MNIST dataset
(xTrainMnist,yTrainMnist),(xTestMnist,yTestMnist)=Tf.keras.datasets.mnist.load_data()

### normalization 

In [5]:
xTrainMnist.max()

255

In [6]:
xTestMnist.max()


255

In [7]:
xTestMnist = xTestMnist.astype('float32') / 255 

#### changing normalization for data train: 

In [8]:
# we normalize tghe data lessing the mean and the standar deviation to xtrain data in order to have a better performance on the training process 

MnistMean = xTrainMnist.mean()
MnistStd = xTrainMnist.std()

# normalize the data dividing by the sd assecuring the none zero value of the sd

xTrainMnist = (xTrainMnist-MnistMean)/(MnistStd+1e-7)

# also normalize the test data using mean and std from training data cause the idea is that the network doesnt know these parameters of the test set

xTestMnist = (xTestMnist - MnistMean)/(MnistStd+1e-7)

""" also could use : 
mean = np.mean(xTrainMnist)
print(mean)
"""

' also could use : \nmean = np.mean(xTrainMnist)\nprint(mean)\n'

### split train data

In [9]:
xTrainMnist, xValidMnist = xTrainMnist[5000:],xTrainMnist[:5000]
yTrainMnist, yValidMnist = yTrainMnist[5000:],yTrainMnist[:5000]

look dimension size

In [11]:
xTrainMnist.shape, yTrainMnist.shape, xValidMnist.shape, yValidMnist.shape , xTestMnist.shape, yTestMnist.shape

((55000, 28, 28), (55000,), (5000, 28, 28), (5000,), (10000, 28, 28), (10000,))

### data argumentation for best performance

In [12]:
datagen = Tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    horizontal_flip = True,
    vertical_flip = True
    )

### see number of classes/labels in order to binarize 

In [13]:
len(np.unique(yTrainMnist)) # 10 classes

10

####  binarizing the labels in order to use only categorical cross entropy without sparse 


In [15]:
yTrainMnist = Tf.keras.utils.to_categorical(yTrainMnist)
yTestMnist = Tf.keras.utils.to_categorical(yTestMnist)


In [16]:
yTrainMnist[0] # one hot encoding of the labels

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [None]:
print(yTrainMnist[0:10]) # display the first 10 labels of the training set
print(yTestMnist[0:10]) # display the first 10 labels of the test set

In [None]:
xTrainMnist.shape, yTrainMnist.shape, xTestMnist.shape, yTestMnist.shape

## creating another structure for the sequential model 

proceed to use a kernel recognition filter on every layer, besides of another techniques to avoid overfitting: 

- dropout
- batch normalization 
- flatten 
- global average pooling
- regularizacion l1 o l2
- estructura de hyperparámetros
- funciones de activacion 
- (PRUNNING & SPARSITY ¿?) 
    * Función: Eliminan conexiones o neuronas innecesarias en la red, reduciendo la complejidad del modelo y mejorando la eficiencia computacional.​

    * Implementación: Se aplican después del entrenamiento inicial para identificar y eliminar pesos insignificantes.

In [17]:
KernelBase = 32 
WeightRegularizer = 1e-4 

In [20]:
MnistModel = Tf.keras.models.Sequential() 

making layer by layer 
First one is a convolutional sequence of layers : 


In [22]:
MnistModel.add(Tf.keras.layers.Conv2D(KernelBase, (3,3), padding = 'same', input_shape=(28,28,1), kernel_regularizer = Tf.keras.regularizers.l2(WeightRegularizer)))

once we add the first convolutional layer with 32 filters of size 3x3 and a regularization term to avoid overfitting
proceed adding the activation function ReLU to the output of the convolutional layer

In [None]:
MnistModel.add(Tf.keras.layers.Activation('relu'))


 finally goes with the batch normalization to normalize the output of the previous layer


In [23]:
MnistModel.add(Tf.keras.layers.BatchNormalization())

also implement a maxpooling 2d layer and dropout on this another layer : 


In [26]:
MnistModel.add(Tf.keras.layers.Conv2D(KernelBase, (3, 3), padding='same', activation='relu', kernel_regularizer=Tf.keras.regularizers.l2(WeightRegularizer), input_shape=(28, 28, 1)))
MnistModel.add(Tf.keras.layers.LeakyReLU(alpha=0.1)) # we use a different activation function to see if it improves the performance of the model
MnistModel.add(Tf.keras.layers.BatchNormalization())
MnistModel.add(Tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')) 
MnistModel.add(Tf.keras.layers.Dropout(0.25)) # we add a dropout layer to reduce overfitting 

In [None]:
"""
Left this to the end of the model 
# Add a Flatten layer to convert the 2D images to 1D vectors  ( transforming my 28 x 28 tensor to a 1D array ) 
BasicMnistModel.add(Tf.keras.layers.Flatten(input_shape=(28, 28)))  
BasicMnistModel.summary() # display the summary of the model "
""" 

'\nLeft this to the end of the model \n# Add a Flatten layer to convert the 2D images to 1D vectors  ( transforming my 28 x 28 tensor to a 1D array ) \nBasicMnistModel.add(Tf.keras.layers.Flatten(input_shape=(28, 28)))  \nBasicMnistModel.summary() # display the summary of the model "\n'

In [None]:
# proceed compiling the model 
MnistModel.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy', metrics= ['accuracy']) # use Adam optimizer and sparse categorical crossentropy loss function

### proceding training the model 

In [None]:
# fit the model to the training data
MnistHistory = BasicMnistModel.fit(xTrainMnist, yTrainMnist, epochs = 5 , batch_size = 32, validation_split = 0.3, validation_data=(xTestMnist, yTestMnist), verbose = 1) # fit the model to the training data with 5 epochs and a batch size of 32, using 30% of the training data for validation and displaying the progress bar

### lets see accuracy of the Mnist model : 


In [None]:
loss, acc = BasicMnistModel.evaluate(xTestMnist, yTestMnist) # evaluate the model on the test set


In [None]:
plt.plot(MnistHistory.history['loss'], label='Loss (TRAIN)')
plt.plot(MnistHistory.history['val_loss'], label='Loss (VALIDATION)')
plt.xlabel('EPOCHS')
plt.ylabel('LOSS')
plt.title('Loss per epoch')
plt.legend()
plt.grid()
plt.show()
print(f" error w/h noisy at the entry of the model : {loss*100:.2f}%")


In [None]:
plt.plot(MnistHistory.history['accuracy'], label='accuracy (TRAIN)')
plt.plot(MnistHistory.history['val_accuracy'], label='accuracy (VALIDATION)')
plt.xlabel('EPOCHS')
plt.ylabel('accuracy')
plt.title('accuracy per epoch')
plt.legend()
plt.grid()
plt.show()

print(f"accuracy w/h noisy at the entry of the model : {acc*100:.2f}%")


## proceed with a class prediciton for our random sample

use a random variable to our data sample 

In [None]:
id = np.random.choice(xTestMnist.shape[0], 1000, replace=False) 


In [None]:
MnistPredictions = BasicMnistModel.predict(xTestMnist[id[0]:id[0]+1]) # make predictions on the test set

In [None]:
predicted_class = np.argmax(MnistPredictions, axis=1)
plt.imshow(xTestMnist[id[0]])

print("predicted class:" , predicted_class)
print("rial class: ",yTestMnist[id[0]]) # display the true class of the image