# MACHINE LEARNING FINAL PROJECT


Students Info:
- Vũ Minh Chiến - 22127045
- Cao Nguyễn Huy Hoàng - 22127120

### **IMPORT LIBRARIES**

In [1]:
import numpy as np
import os
import pickle
import gzip
import matplotlib.pyplot as plt
from sklearn import (tree)
import torch
import tensorflow as tf

import seaborn as sns

#2.2
#sklearn
from sklearn.neural_network import MLPClassifier

#pytorch
import torch.nn as nn
import torch.optim as optim

### **Data Preparation**

#### 1. Data collection

This dataset is collect from [cs.toronto.edu](https://www.cs.toronto.edu/~kriz/cifar.html). All files have been downloaded, unpacked and stored in `cifar-10-python` folder. The data will be stored in 5 variables with corresponding size:
- train_X (50000x3072)
- train_Y (50000x1)
- test_X (10000x3072)
- test_Y (10000x1)

The code below is used to read the data from the files as well as remove the redundant categorical features such as `batch-label` and `file-name`.

In [2]:
# function to read the pickle file
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='latin1')
    return dict 


# define variables
test_X = test_Y = train_X = train_Y = None


# read batch file 1-5 to train_X and train_Y
for i in range(1, 6):
    # file name
    file = 'cifar-10-python/data_batch_' + str(i)

    # read data to a dictionary
    dict = unpickle(file)

    # convert data to numpy array and store to test_X and test_Y
    if train_X is None:
        train_X = np.array(dict['data'])
        train_Y = np.array(dict['labels']).reshape(-1, 1)
    else:
        train_X = np.concatenate((train_X, np.array(dict['data'])), axis=0)
        train_Y = np.concatenate((train_Y, np.array(dict['labels']).reshape(-1, 1)), axis=0)


# read test_batch to test_X and test_Y
dict = unpickle('cifar-10-python/test_batch')
test_X = np.array(dict['data'])
test_Y = np.array(dict['labels']).reshape(-1, 1)

#### 2. Data preprocessing

Find any missing value in the dataset

In [3]:
print(np.isnan(train_X).any(), np.isnan(train_Y).any(), np.isnan(test_X).any(), np.isnan(test_Y).any())

False False False False


So there aren't any missing values in this dataset.

Next is the normalization step, this is a image dataset so the values must be between [0, 255], but first let's check if this is right.

In [4]:
print(train_X.min(), train_X.max())

0 255


We have determined the values range is between [0, 255], now we can normalize it to range [0, 1]. This is a crucial step which can help promoting faster convergence during gradient-based training.

In [5]:
# this is shortern form of MinMax scaling
train_X = train_X / 255
test_X = test_X / 255

Applying one-hot for the label of the image. First we need to find the range of the label.

In [6]:
print(train_Y.min(), train_Y.max())

0 9


Use a identity matrix to generate one-hot matrix

In [7]:
# create identity matrix with len = 10 (0-9)
eye_matrix = np.eye(10)

# generate one-hot
train_Y_one_hot = eye_matrix[train_Y.reshape(-1)]
test_Y_one_hot = eye_matrix[test_Y.reshape(-1)]

### Define MLP Architecture:
1. Number of layers: 2 hidden layers (256 and 128 neurons for the first and second layer, respectively).
2. Number of neurons per layer: 256 and 128 for first and second layer, respectively.
3. Activation function: ReLU for hidden layer and SGD for outer layer.
4. Batch Normalization: 64

### Loss function and Optimizer:
1. Loss function: Cross-entropy loss
2. Optimizer: Stochastic Gradient Descent (SGD)

In [8]:
#Ref: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html

model_sklearn = MLPClassifier(
    hidden_layer_sizes=(256, 128),  # Two hidden layers: 256 and 128 neurons
    activation='relu',             # Activation function for hidden layers
    solver='sgd',                  # Optimizer
    learning_rate_init=0.01,       # Initial learning rate
    max_iter=20,                   # Number of epochs
    batch_size=64,                 # Mini-batch size
    verbose=True,                  # Training progress
)

#MLPClassifier optimizes the cross-entropy loss function by using LBFGS or SGD.

In [9]:
#Ref neural network: https://pytorch.org/docs/stable/nn.html
#Ref optimization: https://pytorch.org/docs/stable/optim.html
#Ref Data loader: https://pytorch.org/docs/stable/data.html
class MLPClassifier_pytorch(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLPClassifier_pytorch, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 256), # First hidden layer with 256 neurons and input size
            nn.ReLU(),                  # Activation function for first layer
            nn.BatchNorm1d(256),        # Batch normalization
            nn.Linear(256, 128),        # Second hidden layer with 128 neurons
            nn.ReLU(),                  # Activation function for the second layer
            nn.BatchNorm1d(128),        # Batch normalization for the second layer
            nn.Linear(128, num_classes),# Output layer
            nn.Softmax(dim=1)           # Softmax for multi-class classification probabilities
        )
    
    def forward(self, x):
        return self.model(x) # Forward pass

model_pytorch = MLPClassifier_pytorch(input_size=3072, num_classes=10)

loss_pytorch = nn.CrossEntropyLoss() #Loss
optimizer_pytorch = optim.SGD(model_pytorch.parameters(), lr=0.01) #Optimizer


In [10]:
#Ref model: https://www.tensorflow.org/api_docs/python/tf/keras/Sequential
#Ref layers: https://www.tensorflow.org/api_docs/python/tf/keras/layers
#Ref optimizer: https://www.tensorflow.org/api_docs/python/tf/keras/optimizers

BatchNormalization = tf.keras.layers.BatchNormalization
Dense = tf.keras.layers.Dense
Softmax = tf.keras.layers.Softmax
ReLU = tf.keras.layers.ReLU

SGD = tf.keras.optimizers.SGD

model_tensor = tf.keras.Sequential([
    Dense(256, input_shape=(3072,)),  # First hidden layer with 256 neurons and input size
    ReLU(),                           # Activation function for the first layer 
    BatchNormalization(),             # Batch normalization
    Dense(128),                       # Second hidden layer with 128 neurons
    ReLU(),                           # Activation function for the second layer 
    BatchNormalization(),             # Batch normalization
    Dense(10),                        # Output layer
    Softmax()                         # Softmax for multi-class classification probabilities
])

model_tensor.compile(optimizer=SGD(learning_rate=0.01), #Optimizer 
              loss='categorical_crossentropy',          #Loss
              metrics=['accuracy'])                     #Accuracy for evaluation

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
