# Recognizing 2 and 5 in MNIST Dataset

In [85]:
import os
import matplotlib.pyplot as plt
from matplotlib.image import imread
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from NeuralNetworks.code.layers.fullyconnected import FC
from NeuralNetworks.code.layers.convolution2d import Conv2D
from NeuralNetworks.code.layers.maxpooling2d import MaxPool2D
from NeuralNetworks.code.activations.activations import get_activation
from NeuralNetworks.code.losses.meansquarederror import MeanSquaredError
from NeuralNetworks.code.losses.binarycrossentropy import BinaryCrossEntropy
from NeuralNetworks.code.models.model import Model
from NeuralNetworks.code.optimizers.adam import Adam
from NeuralNetworks.code.optimizers.gradientdescent import GD

print('libs imported successfully!')

libs imported successfully!


## Data loading
importing images

In [62]:

current_directory = os.getcwd()
mnist_folder = os.path.join(current_directory, "MNIST")

print("MNIST folder path:", mnist_folder)

MNIST folder path: C:\Users\Samin\Desktop\University\Term 7\Computational Intelligence\Projects\NeuralNetworks\code\datasets\MNIST


In [63]:
image_paths = []
for folder_name in os.listdir(mnist_folder):
    print(f'Opening folder {folder_name}')
    folder_path = os.path.join(mnist_folder, folder_name)
    if os.path.isdir(folder_path):
        image_paths.extend([os.path.join(folder_path, file_name) for file_name in os.listdir(folder_path)])
    else:
        print(f"{folder_path} is not a directory.")
print('Done')

Opening folder 2
Opening folder 5
Done


In [64]:
data = []
labels = []

In [65]:
def load_images_and_labels(image_paths):
    for image_path in image_paths:
        image = imread(image_path)
        if int(os.path.basename(os.path.dirname(image_path))) == 2:
           label = 0
        else:
           label = 1
           # Assuming the parent folder name represents the label
        data.append(image)
        labels.append(label)
    print('Images loaded and labeled successfully')

In [66]:
load_images_and_labels(image_paths)

Images loaded and labeled successfully


### Converting to dataset

In [67]:
data = np.array(data)
labels = np.array(labels)

In [68]:
data.shape

(2000, 28, 28)

In [69]:
labels.shape

(2000,)

In [70]:
flattened_data = data.reshape(data.shape[0], -1)  # Reshape to (2000, 784)

In [71]:
dataset = np.column_stack((flattened_data, labels))


In [72]:
df = pd.DataFrame(dataset, columns=[f"pixel_{i}" for i in range(flattened_data.shape[1])] + ["label"])


In [73]:
df.to_csv("mnist_dataset.csv", index=False)

PermissionError: [Errno 13] Permission denied: 'mnist_dataset.csv'

## Exploring dataset

In [87]:
df = pd.read_csv("mnist_dataset.csv")
num_rows, num_cols = df.shape
print("Number of rows:", num_rows)
print("Number of columns:", num_cols)

Number of rows: 2000
Number of columns: 785


In [88]:
df.head()

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_775,pixel_776,pixel_777,pixel_778,pixel_779,pixel_780,pixel_781,pixel_782,pixel_783,label
0,3,0,10,0,0,3,0,2,7,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,5,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,7,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,0,0,0,0,0


In [89]:
df.describe()

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_775,pixel_776,pixel_777,pixel_778,pixel_779,pixel_780,pixel_781,pixel_782,pixel_783,label
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,...,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
mean,0.3635,0.463,0.5465,0.3495,0.489,0.4605,0.4105,0.4355,1.9175,2.3345,...,0.2145,0.151,0.281,0.228,0.209,0.0045,0.0025,0.0035,0.0035,0.5
std,1.47833,1.829564,1.973014,1.511123,1.864301,1.882342,1.794882,1.665,3.052434,3.964779,...,1.25908,0.906427,1.381663,1.216041,1.215343,0.201246,0.111803,0.156525,0.120395,0.500125
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,16.0,17.0,17.0,18.0,19.0,23.0,31.0,16.0,18.0,24.0,...,20.0,12.0,19.0,18.0,16.0,9.0,5.0,7.0,5.0,1.0


In [90]:
label_counts = df['label'].value_counts()
label_counts

label
0    1000
1    1000
Name: count, dtype: int64

### Creating test and train

In [91]:
# Separate features (pixels) and labels
X = df.drop('label', axis=1)  # Features (pixel columns)
y = df['label']  # Labels

In [92]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [93]:
X_train.shape

(1600, 784)

In [94]:
X_train

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_774,pixel_775,pixel_776,pixel_777,pixel_778,pixel_779,pixel_780,pixel_781,pixel_782,pixel_783
968,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
240,0,0,1,2,0,5,7,0,0,0,...,0,0,0,0,0,0,0,0,0,0
819,0,0,0,0,0,0,0,0,0,10,...,0,0,0,0,0,0,0,0,0,0
692,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
420,0,1,5,1,0,2,3,0,0,9,...,0,17,0,0,0,4,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,0,0,0,0,0,0,0,0,4,0,...,0,0,0,0,0,0,0,0,0,0
1294,0,0,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0
860,0,0,0,0,0,0,0,0,2,2,...,0,0,0,0,0,0,0,0,0,0
1459,0,0,0,0,0,0,0,0,0,7,...,0,0,0,0,0,0,0,0,0,0


In [95]:
X_test.shape

(400, 784)

In [96]:
X_test

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_774,pixel_775,pixel_776,pixel_777,pixel_778,pixel_779,pixel_780,pixel_781,pixel_782,pixel_783
1860,0,0,0,0,0,0,0,0,13,0,...,0,0,0,0,0,0,0,0,0,0
353,0,0,0,0,7,0,0,5,4,5,...,0,0,0,0,0,0,0,0,0,0
1333,0,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,0,0,0,0,0
905,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1289,0,10,8,0,0,0,0,0,3,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
965,0,0,0,0,0,0,0,0,0,17,...,0,0,0,0,0,0,0,0,0,0
1284,0,0,0,0,0,0,0,0,3,0,...,0,1,1,1,0,0,0,0,0,0
1739,0,0,0,0,0,0,0,0,0,5,...,0,0,0,0,0,0,0,0,0,0
261,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### reshape the input data to a 4D tensor

In [97]:
X_train = X_train.values.reshape(-1, 28, 28, 1)
X_test = X_test.values.reshape(-1, 28, 28, 1)
X_train.shape

(1600, 28, 28, 1)

### Defining ArchitectureCertainly! I apologize for the confusion. Let's go through each layer in the architecture and explain the `in_channels` and `out_channels` parameters:

1. Conv1:
   - `in_channels`: The number of input channels to the convolutional layer. Since we are using grayscale images, the input shape is `(batch_size, 28, 28, 1)`. The input has only one channel (grayscale), so `in_channels` is set to 1.
   - `out_channels`: The number of output channels from the convolutional layer. Here, we set `out_channels=32`, which means that Conv1 will output 32 feature maps.

2. MaxPool1:
   - `in_channels`: The number of input channels to the max pooling layer. It should match the number of output channels from the previous layer. In this case, MaxPool1 receives the 32 feature maps outputted by Conv1, so `in_channels` is set to 32.
   - `out_channels`: Max pooling layers do not change the number of channels. It remains the same, so `out_channels` will also be 32.

3. Conv2:
   - `in_channels`: The number of input channels to the convolutional layer. It should match the number of output channels from the previous layer, which is 32 in this case (output of MaxPool1). Therefore, `in_channels` is set to 32.
   - `out_channels`: The number of output channels from the convolutional layer. Here, we set `out_channels=64`, which means Conv2 will output 64 feature maps.

4. MaxPool2:
   - `in_channels`: The number of input channels to the max pooling layer. It should match the number of output channels from the previous layer. In this case, MaxPool2 receives the 64 feature maps outputted by Conv2, so `in_channels` is set to 64.
   - `out_channels`: Max pooling layers do not change the number of channels. It remains the same, so `out_channels` will also be 64.

5. FC1:
   - `input_size`: The number of input units to the fully connected (dense) layer. Here, we need to consider the flattened size of the feature maps from the previous layer. Since the output of MaxPool2 is `(batch_size, 7, 7, 64)`, we flatten it to a 1D vector of size `7 * 7 * 64 = 3136`. Therefore, `input_size` is set to 3136.
   - `output_size`: The number of output units from the fully connected layer. Here, we set `output_size=128`, meaning FC1 will output a vector of size 128.

6. FC2:
   - `input_size`: The number of input units to the fully connected layer. It should match the `output_size` of the previous layer, which is 128 in this case (output of FC1). Therefore, `input_size` is set to 128.
   - `output_size`: The number of output units from the fully connected layer. Here, we set `output_size=64`, meaning FC2 will output a vector of size 64.

7. FC3:
   - `input_size`: The number of input units to the fully connected layer. It should match the `output_size` of the previous layer, which is 64 in this case (output of FC2). Therefore, `input_size` is set to 64.
   - `output_size`: The number of output units from the fully connected layer. Here, we set `output_size=1`, as we are performing binary

 classification. FC3 will output a single value representing the predicted class probability.

The `in_channels` and `out_channels` parameters determine the number of filters or feature maps in convolutional layers. In subsequent fully connected layers, they refer to the number of input and output units, respectively. These values are typically chosen based on the complexity of the data and the desired capacity of the network. Adjusting these values can affect the model's performance, so experimentation and tuning may be necessary.

In [102]:
arch = {
    'Conv1': Conv2D(in_channels=1, out_channels=32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), name='Conv1'),
    'MaxPool1': MaxPool2D(kernel_size=(2, 2), stride=(2, 2), mode='max'),
    'Conv2': Conv2D(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), name='Conv2'),
    'MaxPool2': MaxPool2D(kernel_size=(2, 2), stride=(2, 2), mode='max'),
    'FC1': FC(input_size=3136, output_size=128, name='FC1'),
    'Activation1': get_activation('relu'),
    'FC2': FC(input_size=128, output_size=64, name='FC2'),
    'Activation2': get_activation('relu'),
    'FC3': FC(input_size=64, output_size=1, name='FC3'),
    'Activation3': get_activation('sigmoid'),  # Use sigmoid activation for binary classification
}

Weights:
(3, 3, 1, 32)
[[[[-1.37918364e-02 -8.31327373e-03 -1.25863560e-02  1.81083853e-02
     1.18695612e-02  3.44227365e-03  4.94337573e-03 -9.42728609e-03
    -1.43479046e-02  3.10812091e-04  4.43845960e-03  1.28699621e-02
    -2.86752114e-03 -6.55550394e-03  1.95336351e-03 -1.33686497e-02
     9.73327975e-03  1.81259016e-02  7.31403300e-03 -1.83150523e-02
    -9.97026535e-03 -1.32033321e-02  1.11973161e-02 -2.00736768e-03
     6.00079591e-03 -2.34183881e-02  6.27882981e-03  1.35088992e-04
    -4.64897581e-03 -3.48236206e-03 -1.97073448e-02  1.69442382e-02]]

  [[ 6.86196467e-03 -1.41687441e-02 -1.50524278e-03  2.15892744e-02
    -1.54737928e-02 -1.34696485e-02 -4.92751534e-03 -2.30392077e-02
    -5.90385372e-03 -1.32064582e-02  5.18644979e-03  3.47106169e-03
    -3.76812792e-03  3.39650297e-03 -3.40598916e-03  1.51507579e-02
     1.82032566e-02  2.19463437e-03  2.80932442e-03 -1.64485111e-02
     4.84720376e-03 -7.86443097e-03  1.24879095e-02  5.45554710e-03
    -1.79977093e-03  3

#### Create the criterion (loss) function

In [103]:
criterion = BinaryCrossEntropy()

#### Create the optimizer

In [104]:
optimizer = Adam(arch, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8)


#### Creating Model

In [105]:
model = Model(arch, criterion, optimizer)