# Classifier

## Import libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
from tqdm.notebook import tqdm, trange
from time import sleep
from torchvision.io import read_image
from torchvision.transforms import ToTensor
from torchvision import transforms
from torchsummary import summary
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder


## Functions definition

In [2]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [3]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr, betas=(0.5,0.999))
    for epoch in tqdm(range(epochs), desc = "Current Epoch"):
        # Training Phase 
        model.train()
        train_losses = []
        for batch in tqdm(train_loader, desc = f"Epoch: {epoch}", leave= False):
            optimizer.zero_grad()
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

## Class definition

In [4]:
class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [5]:
class Metric():
    def __init__(self, name):
        self.name = name
    
    def eval(self, outputs, labels):
        pass

In [6]:
class Accuracy(Metric):
    
    def eval(self, outputs, labels):
        _, preds = torch.max(outputs, dim=1)
        return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [7]:
class ImageClassificationBase(nn.Module):
    
    def __init__(self, loss_function, metrics):
        super().__init__()
        self.loss_function = loss_function
        self.metrics = metrics
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = self.loss_function(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = self.loss_function(out, labels)   # Calculate loss
        result = {'val_loss': loss.detach()}
        
        for m in self.metrics:
            result[m.name] = m.eval(out, labels)           # Calculate metrics
            
        return result
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        
        result = {'val_loss': epoch_loss.item()}
        
        for m in self.metrics:
            batch = [x[m.name] for x in outputs]
            epoch = torch.stack(batch).mean()      # Combine metrics
            result[m.name] = epoch.item()
            
        return result
    
    def epoch_end(self, epoch, result):
        out = f"Epoch [{epoch}]"
        vals = list(result.keys())
        for v in vals:
            out += f", {v}: {result[v]:.4f}"
        print(out)
        

In [8]:
class Net(ImageClassificationBase):
    
    def __init__(self, loss_function, metrics, out_size):
        super().__init__(loss_function, metrics)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
        self.norm1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
        self.norm2 = nn.BatchNorm2d(128)
        self.fc1 = nn.Linear(in_features=128*11*11, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=256)
        self.fc = nn.Linear(256, out_size)

    def forward(self, input):
        output = F.relu(self.conv1(input))
        output = F.relu(self.conv2(self.norm1(self.pool1(output))))
        output = self.norm2(self.pool2(output))
        output = output.view(-1, 128*11*11)
        output = F.relu(self.fc1(output))
        output = F.relu(self.fc2(output))
        output = self.fc(output)
        output = F.softmax(input = output)
        return output

    """def __init__(self, loss_function, metrics, out_size):
        super().__init__(loss_function, metrics)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding = 1)
        self.norm1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.drop1 = nn.Dropout2d(p=0.25)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1)
        self.norm2 = nn.BatchNorm2d(256)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.drop2 = nn.Dropout2d(p=0.25)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(in_features=256*11*11, out_features=1024)
        self.drop3 = nn.Dropout2d(p=0.5)
        self.fc = nn.Linear(1024, out_size)

    def forward(self, input):
        output = F.relu(self.conv1(input))
        output = F.relu(self.conv2(output))
        output = self.drop1(self.pool1(self.norm1(output)))
        output = F.relu(self.conv3(output))
        output = F.relu(self.conv4(output))
        output = self.drop2(self.pool2(self.norm2(output)))
        output = self.flatten(output)
        output = F.relu(self.fc1(output))
        output = self.fc(self.drop3(output))
        output = F.softmax(input = output)
        return output"""

## Variables definition

In [19]:
device = get_default_device()
models_directory = "models"
dataset_name = "Fer2013_uniform"
train_df_path = f"../datasets/{dataset_name}/train"
test_df_path = f"../datasets/{dataset_name}/test"

In [10]:
batch_size = 32
loss_function = nn.CrossEntropyLoss()
metrics = [Accuracy("val_acc")]
optimizer = optim.Adam
lr =0.0001
num_epochs = 25

## Dataset loading

In [11]:
df_train = ImageFolder(root=train_df_path, transform=transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                     transforms.ToTensor()]))
df_test = ImageFolder(root=test_df_path, transform=transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                     transforms.ToTensor()]))

In [12]:
train_dl = DeviceDataLoader(DataLoader(df_train, batch_size=batch_size, shuffle=True),device)
test_dl = DeviceDataLoader(DataLoader(df_test, batch_size=batch_size, shuffle=True),device)

In [13]:
classes = df_train.classes
classes

['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

## Model design and Training

In [14]:
net = Net(loss_function, metrics, len(classes))
net.to(device)
summary(net,(1,48,48))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 46, 46]             640
         MaxPool2d-2           [-1, 64, 23, 23]               0
       BatchNorm2d-3           [-1, 64, 23, 23]             128
            Conv2d-4          [-1, 128, 21, 21]          73,856
         MaxPool2d-5          [-1, 128, 11, 11]               0
       BatchNorm2d-6          [-1, 128, 11, 11]             256
            Linear-7                  [-1, 256]       3,965,184
            Linear-8                  [-1, 256]          65,792
            Linear-9                    [-1, 7]           1,799
Total params: 4,107,655
Trainable params: 4,107,655
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 2.22
Params size (MB): 15.67
Estimated Total Size (MB): 17.90
-------------------------------------

  output = F.softmax(input = output)


In [15]:
history = fit(num_epochs, lr, net, train_dl, test_dl, optimizer)

Current Epoch:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 0:   0%|          | 0/936 [00:00<?, ?it/s]

  output = F.softmax(input = output)


Epoch [0], val_loss: 1.7201, val_acc: 0.4302, train_loss: 1.7234


Epoch: 1:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [1], val_loss: 1.6721, val_acc: 0.4856, train_loss: 1.6097


Epoch: 2:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [2], val_loss: 1.6660, val_acc: 0.4929, train_loss: 1.5499


Epoch: 3:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [3], val_loss: 1.6275, val_acc: 0.5384, train_loss: 1.4973


Epoch: 4:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [4], val_loss: 1.6081, val_acc: 0.5420, train_loss: 1.4528


Epoch: 5:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [5], val_loss: 1.6012, val_acc: 0.5623, train_loss: 1.4152


Epoch: 6:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [6], val_loss: 1.5999, val_acc: 0.5574, train_loss: 1.3853


Epoch: 7:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [7], val_loss: 1.5989, val_acc: 0.5548, train_loss: 1.3659


Epoch: 8:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [8], val_loss: 1.6084, val_acc: 0.5434, train_loss: 1.3452


Epoch: 9:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [9], val_loss: 1.5889, val_acc: 0.5704, train_loss: 1.3337


Epoch: 10:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [10], val_loss: 1.5963, val_acc: 0.5584, train_loss: 1.3225


Epoch: 11:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [11], val_loss: 1.5951, val_acc: 0.5674, train_loss: 1.3129


Epoch: 12:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [12], val_loss: 1.5880, val_acc: 0.5647, train_loss: 1.3050


Epoch: 13:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [13], val_loss: 1.5818, val_acc: 0.5773, train_loss: 1.3015


Epoch: 14:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [14], val_loss: 1.5902, val_acc: 0.5714, train_loss: 1.2925


Epoch: 15:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [15], val_loss: 1.5932, val_acc: 0.5601, train_loss: 1.2902


Epoch: 16:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [16], val_loss: 1.5999, val_acc: 0.5619, train_loss: 1.2873


Epoch: 17:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [17], val_loss: 1.5820, val_acc: 0.5838, train_loss: 1.2821


Epoch: 18:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [18], val_loss: 1.5811, val_acc: 0.5759, train_loss: 1.2805


Epoch: 19:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [19], val_loss: 1.5804, val_acc: 0.5781, train_loss: 1.2784


Epoch: 20:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [20], val_loss: 1.5821, val_acc: 0.5771, train_loss: 1.2738


Epoch: 21:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [21], val_loss: 1.5816, val_acc: 0.5765, train_loss: 1.2714


Epoch: 22:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [22], val_loss: 1.5790, val_acc: 0.5812, train_loss: 1.2714


Epoch: 23:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [23], val_loss: 1.5798, val_acc: 0.5804, train_loss: 1.2679


Epoch: 24:   0%|          | 0/936 [00:00<?, ?it/s]

Epoch [24], val_loss: 1.5900, val_acc: 0.5662, train_loss: 1.2670


## Save/Load model

In [20]:
if not os.path.isdir(models_directory):
    os.makedirs(name = models_directory)
model_path = os.path.join(models_directory,f"classifier_{dataset_name}.pt")


In [None]:
torch.save(net.state_dict(), model_path)

In [21]:
net = Net(loss_function, metrics, len(classes))
net.load_state_dict(torch.load(model_path))
net.cuda()

Net(
  (loss_function): CrossEntropyLoss()
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=15488, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc): Linear(in_features=256, out_features=7, bias=True)
)

## Model testing

In [22]:
evaluate(net, test_dl)

  output = F.softmax(input = output)


{'val_loss': 1.6622084379196167, 'val_acc': 0.4991883337497711}

In [23]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in tqdm(test_dl):
        images, labels = data[0].to(device), data[1].to(device)
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct // total} %')

  0%|          | 0/22 [00:00<?, ?it/s]

  output = F.softmax(input = output)


Accuracy of the network on the test images: 49 %


In [23]:
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in tqdm(test_dl):
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

  0%|          | 0/22 [00:00<?, ?it/s]

  output = F.softmax(input = output)


Accuracy for class: angry is 48.0 %
Accuracy for class: disgust is 57.0 %
Accuracy for class: fear  is 40.0 %
Accuracy for class: happy is 74.0 %
Accuracy for class: neutral is 44.0 %
Accuracy for class: sad   is 57.0 %
Accuracy for class: surprise is 76.0 %
