In [17]:
'''
Comparing single layer MLP with deep MLP (using PyTorch)
'''

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pickle


# Create model

def create_multilayer_perceptron():

    class net(nn.Module):
        def __init__(self):
            super().__init__()
            n_input   = 2376
            n_hidden1 = 512
            n_hidden2 = 256
            n_hidden3 = 64
            n_classes = 2

            # layer 1
            self.fc1 = nn.Linear(n_input, n_hidden1)
            self.bn1 = nn.BatchNorm1d(n_hidden1)
            self.do1 = nn.Dropout(p=0.30)

            # layer 2
            self.fc2 = nn.Linear(n_hidden1, n_hidden2)
            self.bn2 = nn.BatchNorm1d(n_hidden2)
            self.do2 = nn.Dropout(p=0.40)

            # layer 3 (no BN/Dropout here is fine)
            self.fc3 = nn.Linear(n_hidden2, n_hidden3)

            # output
            self.out = nn.Linear(n_hidden3, n_classes)

        def forward(self, x):
            x = F.relu(self.bn1(self.fc1(x)))
            x = self.do1(x)
            x = F.relu(self.bn2(self.fc2(x)))
            x = self.do2(x)
            x = F.relu(self.fc3(x))
            x = self.out(x)
            return x

    return net()


# Do not change this
def preprocess():
    pickle_obj = pickle.load(file=open("C:/Users/ankit/OneDrive/Desktop/my_projects/face recognition/face_all.pickle", 'rb'))
    features = pickle_obj['Features']
    labels = pickle_obj['Labels']
    train_x = features[0:21100] / 255
    valid_x = features[21100:23765] / 255
    test_x = features[23765:] / 255

    labels = np.squeeze(labels)
    train_y = labels[0:21100]
    valid_y = labels[21100:23765]
    test_y = labels[23765:]

    class dataset(Dataset):
        def __init__(self, X, y):
            self.X = X
            self.y = y

        def __len__(self):
            return len(self.y)

        def __getitem__(self, idx):
            return self.X[idx], self.y[idx]

    trainset = dataset(train_x, train_y)
    validset = dataset(valid_x, valid_y)
    testset = dataset(test_x, test_y)

    return trainset, validset, testset


def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X.float())
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X.float())
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


# Parameters
learning_rate = 0.0003
training_epochs = 128
batch_size = 128

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Construct model
model = create_multilayer_perceptron().to(device)

# Define loss and openptimizer
cost = nn.CrossEntropyLoss(label_smoothing=0.05)  # helps generalization
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)


# load data
trainset, validset, testset = preprocess()
train_dataloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(validset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(testset, batch_size=batch_size, shuffle=False)


# Training cycle
for t in range(training_epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, cost, optimizer)
print("Optimization Finished!")
test(test_dataloader, model, cost)

Using cpu device
Epoch 1
-------------------------------
loss: 0.707534  [    0/21100]
loss: 0.457821  [12800/21100]
Epoch 2
-------------------------------
loss: 0.341926  [    0/21100]
loss: 0.357534  [12800/21100]
Epoch 3
-------------------------------
loss: 0.302955  [    0/21100]
loss: 0.327587  [12800/21100]
Epoch 4
-------------------------------
loss: 0.309061  [    0/21100]
loss: 0.280995  [12800/21100]
Epoch 5
-------------------------------
loss: 0.309626  [    0/21100]
loss: 0.343279  [12800/21100]
Epoch 6
-------------------------------
loss: 0.335524  [    0/21100]
loss: 0.363918  [12800/21100]
Epoch 7
-------------------------------
loss: 0.348903  [    0/21100]
loss: 0.230895  [12800/21100]
Epoch 8
-------------------------------
loss: 0.307178  [    0/21100]
loss: 0.291224  [12800/21100]
Epoch 9
-------------------------------
loss: 0.242720  [    0/21100]
loss: 0.396511  [12800/21100]
Epoch 10
-------------------------------
loss: 0.298447  [    0/21100]
loss: 0.2762