# Classifier

## Import libraries

In [46]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
from tqdm.notebook import tqdm, trange
from time import sleep
from torchvision.io import read_image
from torchvision.transforms import ToTensor
from torchvision import transforms
from torchsummary import summary
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from collections import Counter
import numpy as np


## Functions definition

In [3]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [4]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr, betas=(0.5,0.999))
    for epoch in tqdm(range(epochs), desc = "Current Epoch"):
        # Training Phase 
        model.train()
        train_losses = []
        for batch in tqdm(train_loader, desc = f"Epoch: {epoch}", leave= False):
            optimizer.zero_grad()
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

## Class definition

In [5]:
class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [6]:
class Metric():
    def __init__(self, name):
        self.name = name
    
    def eval(self, outputs, labels):
        pass

In [7]:
class Accuracy(Metric):
    
    def eval(self, outputs, labels):
        _, preds = torch.max(outputs, dim=1)
        return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [8]:
class ImageClassificationBase(nn.Module):
    
    def __init__(self, loss_function, metrics):
        super().__init__()
        self.loss_function = loss_function
        self.metrics = metrics
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = self.loss_function(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = self.loss_function(out, labels)   # Calculate loss
        result = {'val_loss': loss.detach()}
        
        for m in self.metrics:
            result[m.name] = m.eval(out, labels)           # Calculate metrics
            
        return result
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        
        result = {'val_loss': epoch_loss.item()}
        
        for m in self.metrics:
            batch = [x[m.name] for x in outputs]
            epoch = torch.stack(batch).mean()      # Combine metrics
            result[m.name] = epoch.item()
            
        return result
    
    def epoch_end(self, epoch, result):
        out = f"Epoch [{epoch}]"
        vals = list(result.keys())
        for v in vals:
            out += f", {v}: {result[v]:.4f}"
        print(out)
        

In [9]:
class Net(ImageClassificationBase):
    
    def __init__(self, loss_function, metrics, out_size):
        super().__init__(loss_function, metrics)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1)
        self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
        self.norm1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.pool2 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
        self.norm2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1)
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.norm3 = nn.BatchNorm2d(128)
        self.fc1 = nn.Linear(in_features=128*5*5, out_features=256)
        #self.fc2 = nn.Linear(in_features=256, out_features=256)
        self.fc = nn.Linear(256, out_size)

    def forward(self, input):
        output = self.norm1(self.pool1(F.relu(self.conv1(input))))
        output = self.norm2(self.pool2(F.relu(self.conv2((output)))))
        output = self.norm3(self.pool3(F.relu(self.conv3((output)))))
        output = output.view(-1, 128*5*5)
        output = F.relu(self.fc1(output))
        #output = F.relu(self.fc2(output))
        output = self.fc(output)
        output = F.softmax(input = output)
        return output

    """def __init__(self, loss_function, metrics, out_size):
        super().__init__(loss_function, metrics)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding = 1)
        self.norm1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.drop1 = nn.Dropout2d(p=0.25)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1)
        self.norm2 = nn.BatchNorm2d(256)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.drop2 = nn.Dropout2d(p=0.25)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(in_features=256*11*11, out_features=1024)
        self.drop3 = nn.Dropout2d(p=0.5)
        self.fc = nn.Linear(1024, out_size)

    def forward(self, input):
        output = F.relu(self.conv1(input))
        output = F.relu(self.conv2(output))
        output = self.drop1(self.pool1(self.norm1(output)))
        output = F.relu(self.conv3(output))
        output = F.relu(self.conv4(output))
        output = self.drop2(self.pool2(self.norm2(output)))
        output = self.flatten(output)
        output = F.relu(self.fc1(output))
        output = self.fc(self.drop3(output))
        output = F.softmax(input = output)
        return output"""

## Variables definition

In [10]:
device = get_default_device()
models_directory = "models"
dataset_name = "Fer2013_Aug_Disgust_E_uniform"
train_df_path = f"../datasets/{dataset_name}/train"
test_df_path = f"../datasets/{dataset_name}/test"

## Dataset loading

In [12]:
df_train = ImageFolder(root=train_df_path, transform=transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                     transforms.ToTensor()]))
df_test = ImageFolder(root=test_df_path, transform=transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                     transforms.ToTensor()]))

In [13]:
train_dl = DeviceDataLoader(DataLoader(df_train, batch_size=batch_size, shuffle=True),device)
test_dl = DeviceDataLoader(DataLoader(df_test, batch_size=batch_size, shuffle=True),device)

In [14]:
classes = df_train.classes
classes

['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [49]:
counts = dict(Counter(df_train.targets))
print(counts)
weights = np.array(list(counts.values()))
weights = torch.Tensor( min(weights)/weights)
print(weights)

{0: 3995, 1: 1669, 2: 4097, 3: 7215, 4: 4965, 5: 4830, 6: 3171}
tensor([0.4178, 1.0000, 0.4074, 0.2313, 0.3362, 0.3455, 0.5263])


## Model design and Training

In [50]:
batch_size = 64
loss_function = nn.CrossEntropyLoss(weight = weights)
metrics = [Accuracy("val_acc")]
optimizer = optim.Adam
lr =0.0001
num_epochs = 25

In [51]:
net = Net(loss_function, metrics, len(classes))
net.to(device)
summary(net,(1,48,48))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 46, 46]             320
         MaxPool2d-2           [-1, 32, 23, 23]               0
       BatchNorm2d-3           [-1, 32, 23, 23]              64
            Conv2d-4           [-1, 64, 21, 21]          18,496
         MaxPool2d-5           [-1, 64, 11, 11]               0
       BatchNorm2d-6           [-1, 64, 11, 11]             128
            Conv2d-7            [-1, 128, 9, 9]          73,856
         MaxPool2d-8            [-1, 128, 5, 5]               0
       BatchNorm2d-9            [-1, 128, 5, 5]             256
           Linear-10                  [-1, 256]         819,456
           Linear-11                    [-1, 7]           1,799
Total params: 914,375
Trainable params: 914,375
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/

  output = F.softmax(input = output)


In [52]:
history = fit(num_epochs, lr, net, train_dl, test_dl, optimizer)

Current Epoch:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 0:   0%|          | 0/468 [00:00<?, ?it/s]

  output = F.softmax(input = output)


Epoch [0], val_loss: 1.6965, val_acc: 0.4559, train_loss: 1.7456


Epoch: 1:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [1], val_loss: 1.6433, val_acc: 0.5019, train_loss: 1.6302


Epoch: 2:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [2], val_loss: 1.6407, val_acc: 0.5099, train_loss: 1.5832


Epoch: 3:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [3], val_loss: 1.6071, val_acc: 0.5429, train_loss: 1.5480


Epoch: 4:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [4], val_loss: 1.6153, val_acc: 0.5499, train_loss: 1.5185


Epoch: 5:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [5], val_loss: 1.6048, val_acc: 0.5486, train_loss: 1.4918


Epoch: 6:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [6], val_loss: 1.6019, val_acc: 0.5650, train_loss: 1.4685


Epoch: 7:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [7], val_loss: 1.5971, val_acc: 0.5699, train_loss: 1.4470


Epoch: 8:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [8], val_loss: 1.6082, val_acc: 0.5527, train_loss: 1.4286


Epoch: 9:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [9], val_loss: 1.6234, val_acc: 0.5556, train_loss: 1.4099


Epoch: 10:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [10], val_loss: 1.6164, val_acc: 0.5473, train_loss: 1.3944


Epoch: 11:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [11], val_loss: 1.6039, val_acc: 0.5568, train_loss: 1.3810


Epoch: 12:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [12], val_loss: 1.6211, val_acc: 0.5480, train_loss: 1.3687


Epoch: 13:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [13], val_loss: 1.6176, val_acc: 0.5511, train_loss: 1.3578


Epoch: 14:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [14], val_loss: 1.6102, val_acc: 0.5629, train_loss: 1.3493


Epoch: 15:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [15], val_loss: 1.6162, val_acc: 0.5466, train_loss: 1.3399


Epoch: 16:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [16], val_loss: 1.6202, val_acc: 0.5601, train_loss: 1.3319


Epoch: 17:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [17], val_loss: 1.6156, val_acc: 0.5427, train_loss: 1.3260


Epoch: 18:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [18], val_loss: 1.6197, val_acc: 0.5412, train_loss: 1.3203


Epoch: 19:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [19], val_loss: 1.6164, val_acc: 0.5414, train_loss: 1.3160


Epoch: 20:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [20], val_loss: 1.6178, val_acc: 0.5496, train_loss: 1.3104


Epoch: 21:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [21], val_loss: 1.6190, val_acc: 0.5428, train_loss: 1.3067


Epoch: 22:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [22], val_loss: 1.6028, val_acc: 0.5444, train_loss: 1.3048


Epoch: 23:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [23], val_loss: 1.6062, val_acc: 0.5584, train_loss: 1.3019


Epoch: 24:   0%|          | 0/468 [00:00<?, ?it/s]

Epoch [24], val_loss: 1.6187, val_acc: 0.5432, train_loss: 1.2996


## Save/Load model

In [None]:
if not os.path.isdir(models_directory):
    os.makedirs(name = models_directory)
model_path = os.path.join(models_directory,f"classifier_{dataset_name}.pt")


In [None]:
torch.save(net.state_dict(), model_path)

In [None]:
net = Net(loss_function, metrics, len(classes))
net.load_state_dict(torch.load(model_path))
net.cuda()

Net(
  (loss_function): CrossEntropyLoss()
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=15488, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc): Linear(in_features=256, out_features=7, bias=True)
)

## Model testing

In [53]:
evaluate(net, test_dl)

  output = F.softmax(input = output)


{'val_loss': 1.6166155338287354, 'val_acc': 0.5428977608680725}

In [54]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in tqdm(test_dl):
        images, labels = data[0].to(device), data[1].to(device)
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct // total} %')

  0%|          | 0/11 [00:00<?, ?it/s]

  output = F.softmax(input = output)


Accuracy of the network on the test images: 54 %


In [55]:
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in tqdm(test_dl):
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

  0%|          | 0/11 [00:00<?, ?it/s]

  output = F.softmax(input = output)


Accuracy for class: angry is 47.0 %
Accuracy for class: disgust is 51.0 %
Accuracy for class: fear  is 30.0 %
Accuracy for class: happy is 71.0 %
Accuracy for class: neutral is 51.0 %
Accuracy for class: sad   is 51.0 %
Accuracy for class: surprise is 79.0 %
