In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm
import os
import copy

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using the GPU!")
else:
    print("WARNING: Could not find GPU! Using CPU only")

Using the GPU!


In [2]:
!nvidia-smi

Thu May 20 23:08:55 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.119.03   Driver Version: 450.119.03   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           On   | 00000000:00:1E.0 Off |                    0 |
| N/A   29C    P8    30W / 149W |      3MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [3]:
import random
# set seed for all randomization for determinism
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(42)

In [4]:
# grab data (https://stackoverflow.com/questions/65099766/is-there-a-way-to-download-data-from-a-public-link-to-google-colab)
# !pip3 install gdown
# !gdown --id 13JvaRtXkV_oZ8K-7iOH736OhQ2eAMENB
# !gdown --id 1f8wUtQj-UatrZtCnkJFcB--X2eJS1m_N


In [5]:
EMOTION_DICT = {
    0: "angry",
    1: "disgust",
    2: "fear",
    3: "happy",
    4: "sad",
    5: "surprise",
    6: "neutral",
}

def initialize_model(model_name, num_classes, desired_clases: list, resume_from = None):
    
    # You may NOT use pretrained models!! 
    use_pretrained = False
    input_size = 224

    if model_name == "affect_pretrain":
        model_ft = models.densenet121(pretrained=False)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, len(EMOTION_DICT)) 
        model_ft.load_state_dict(torch.load("./densenet121_rot30_2019Nov11_14.23")['net'])
        
        # chop off the rest of the affects except the desired one
        new_classifier = nn.Linear(num_ftrs, len(desired_clases)) 
        new_classifier.weight =  torch.nn.Parameter(model_ft.classifier.weight[desired_clases])
        new_classifier.bias = torch.nn.Parameter(model_ft.classifier.bias[desired_clases])
        model_ft.classifier = new_classifier
    
    if model_name == "from_scratch":
        model_ft = models.densenet121(pretrained=False)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes) 
        
    if model_name == "imagenet_pretrain":
        model_ft = models.densenet121(pretrained=True)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes) 
    
    if resume_from is not None:
        print("Loading weights from %s" % resume_from)
        model_ft.load_state_dict(torch.load(resume_from))
    
    return model_ft, input_size

## Data Loading

With the input size from the model, we can now load the dataset

In [6]:
data_dir = '/home/ubuntu/affect_objective_data/'
wav2lip_generator_output_size = 96
def get_dataloaders(input_size, batch_size, shuffle = True, aug_fraction = 1.0):
    # How to transform the image when you are loading them.
    # you'll likely want to mess with the transforms on the training set.
    
    # For now, we resize/crop the image to the correct input size for our network,
    # then convert it to a [C,H,W] tensor, then normalize it to values with a given mean/stdev. These normalization constants
    # are derived from aggregating lots of data and happen to produce better results.
    data_transforms = {
        
        # with augmentation 
        'train': transforms.Compose([   
            transforms.Grayscale(num_output_channels=3),
            transforms.Resize((wav2lip_generator_output_size,wav2lip_generator_output_size)),
            transforms.Resize(input_size),
            torchvision.transforms.RandomApply(
                  [torchvision.transforms.RandomAffine(scale=(0.9,1.1),
                                                      translate=(0.1,0.1),
                                                      degrees=15)],
                p=0.3
            ),
            torchvision.transforms.RandomHorizontalFlip(p=0.5),
            torchvision.transforms.RandomPerspective(distortion_scale=0.2, p=0.3),
            torchvision.transforms.RandomGrayscale(p=0.1),
            torchvision.transforms.RandomApply(
                  [transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1)],
                p=0.3
            ),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            torchvision.transforms.RandomErasing(p=0.1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=True),
            transforms.Normalize([0.4306, 0.3199, 0.2652], [0.1722, 0.1150, 0.0941])
        ]),
        'val': transforms.Compose([
            transforms.Grayscale(num_output_channels=3),
            transforms.Resize((wav2lip_generator_output_size,wav2lip_generator_output_size)),
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.4306, 0.3199, 0.2652], [0.1722, 0.1150, 0.0941])
        ]),
        'test': transforms.Compose([
            transforms.Grayscale(num_output_channels=3),
            transforms.Resize((wav2lip_generator_output_size,wav2lip_generator_output_size)),
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.4306, 0.3199, 0.2652], [0.1722, 0.1150, 0.0941])
        ])
    }
    
    # Create training and validation datasets
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in data_transforms.keys()}
    dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=False if x != 'train' else shuffle, num_workers=4) for x in data_transforms.keys()}
    return dataloaders_dict

## Data Exploration

In [7]:
# data exploration
# from collections import Counter
# image_datasets_explore = {x: datasets.ImageFolder(os.path.join(data_dir, x), transforms.ToTensor()) for x in ['train','val','test']}
# for name, dset in image_datasets_explore.items():
#   print(name)
#   print("**Dimensions**")
#   print("min H:", min(img[0].shape[1] for img in dset))
#   print("max H:", max(img[0].shape[1] for img in dset))
#   print("min W:", min(img[0].shape[2] for img in dset))
#   print("max W:", max(img[0].shape[2] for img in dset))

#   print("**Class Distributions**")
#   idx_to_class = {v:k for k,v in dset.class_to_idx.items()}
#   print({idx_to_class[k]:v for k,v in Counter(img[1] for img in dset).items()})

# mean = torch.zeros(3, dtype=float)
# std = torch.zeros(3, dtype=float)
# for img, _ in image_datasets_explore['train']:
#     mean += img.mean(axis=(1,2))
#     std += img.std(axis=(1,2))

# mean = mean / len(image_datasets_explore['train'])
# std = std / len(image_datasets_explore['train'])
# print('channel means:', mean)
# print('channel std:', std)

## Training
Next, let's make a helper function that trains the given model

In [8]:
def train_model(model, dataloaders, criterion, optimizer, scheduler=None, save_dir = None,
                save_all_epochs=False, num_epochs=25, name = ''):
    '''
    model: The NN to train
    dataloaders: A dictionary containing at least the keys 
                 'train','val' that maps to Pytorch data loaders for the dataset
    criterion: The Loss function
    optimizer: The algorithm to update weights 
               (Variations on gradient descent)
    scheduler: pytorch lr_scheduler for managing learning rate
    num_epochs: How many epochs to train for
    save_dir: Where to save the best model weights that are found, 
              as they are found. Will save to save_dir/weights_best.pt
              Using None will not write anything to disk
    save_all_epochs: Whether to save weights for ALL epochs, not just the best
                     validation error epoch. Will save to save_dir/weights_e{#}.pt
    '''
    since = time.time()

    val_acc_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1),
              (f'lr={scheduler.get_last_lr()}' if scheduler else '') )
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            # TQDM has nice progress bars
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    # torch.max outputs the maximum value, and its index
                    # Since the input is batched, we take the max along axis 1
                    # (the meaningful outputs)
                    _, preds = torch.max(outputs, 1)

                    # backprop + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        # update the learning rate
                        if scheduler and isinstance(scheduler, optim.lr_scheduler.CyclicLR):
                            scheduler.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
            if save_all_epochs:
                torch.save(model.state_dict(), os.path.join(save_dir, f'weights_{epoch}_{name}.pt'))
        if scheduler and isinstance(scheduler, optim.lr_scheduler.LambdaLR):
          scheduler.step()
        print() 

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # save and load best model weights
    torch.save(best_model_wts, os.path.join(save_dir, f'weights_best_{name}.pt'))
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

## Optimizer & Loss
We need a loss function, and an optimization function to use to try to reduce that loss.

In [9]:
def make_optimizer(model):
    # Get all the parameters
    params_to_update = model.parameters()
    print("Params to learn:")
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

    # Use SGD
    # optimizer = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
    optimizer = optim.Adam(params_to_update, lr=0.0001, weight_decay= 0.5)
    return optimizer

def make_scheduler(optimizer, total_epochs):
    # a simple linear decay from initial learning rate to 0
    update = lambda current_epoch : (total_epochs - current_epoch) / total_epochs
    return optim.lr_scheduler.LambdaLR(optimizer, update)

def make_cyclic_scheduler(optimizer, total_epochs, steps_per_epoch):
    return optim.lr_scheduler.CyclicLR(optimizer, 0.00001, 0.0001,
                                       step_size_up=steps_per_epoch/2,
                                       cycle_momentum=False)

def get_loss():
    # Create an instance of the loss function
    criterion = nn.CrossEntropyLoss()
    return criterion

# Section 1 Tying it all together - Training

In [10]:
NAME = 'affect_pretrain_cyclic_baseline_1e-4_to_1e-5_wd0.5_greyscale'

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet]
# You can add your own, or modify these however you wish!
model_name = "affect_pretrain"
# model_name = "from_scratch"
# model_name = "imagenet_pretrain"

# Number of classes in the dataset
# Miniplaces has 100
num_classes = 2

# Batch size for training (change depending on how much memory you have)
# You should use a power of 2.
batch_size = 64

# Shuffle the input data?
shuffle_datasets = True

# Number of epochs to train for 
num_epochs = 10

### IO
# Path to a model file to use to start weights at
# resume_from = "weights/weights_best_resnet50_adam_cyclic_schdeduler_64_batch_10_epoch_all_augs_p_0.1_and_0.3_and_0.5.pt"
resume_from = None

# Directory to save weights to
save_dir = "weights"
os.makedirs(save_dir, exist_ok=True)

# Save weights for all epochs, not just the best one
save_all_epochs = False



In [20]:
# Initialize the model for this run
model, input_size = initialize_model(model_name, num_classes, [3,6], resume_from = resume_from)
dataloaders = get_dataloaders(input_size, batch_size, shuffle_datasets)
print(dataloaders['train'].dataset.class_to_idx)
criterion = get_loss()

# Move the model to the gpu if needed
model = model.to(device)

optimizer = make_optimizer(model)
# scheduler = make_scheduler(optimizer, num_epochs)
scheduler = make_cyclic_scheduler(optimizer, num_epochs, len(dataloaders['train']))
# scheduler = None

# Train the model!
trained_model, validation_history = train_model(model=model, dataloaders=dataloaders,
           criterion=criterion, optimizer=optimizer, scheduler=scheduler,
           save_dir=save_dir, save_all_epochs=save_all_epochs, num_epochs=num_epochs, name=NAME)

{'happy': 0, 'neutral': 1}
Params to learn:
	 features.conv0.weight
	 features.norm0.weight
	 features.norm0.bias
	 features.denseblock1.denselayer1.norm1.weight
	 features.denseblock1.denselayer1.norm1.bias
	 features.denseblock1.denselayer1.conv1.weight
	 features.denseblock1.denselayer1.norm2.weight
	 features.denseblock1.denselayer1.norm2.bias
	 features.denseblock1.denselayer1.conv2.weight
	 features.denseblock1.denselayer2.norm1.weight
	 features.denseblock1.denselayer2.norm1.bias
	 features.denseblock1.denselayer2.conv1.weight
	 features.denseblock1.denselayer2.norm2.weight
	 features.denseblock1.denselayer2.norm2.bias
	 features.denseblock1.denselayer2.conv2.weight
	 features.denseblock1.denselayer3.norm1.weight
	 features.denseblock1.denselayer3.norm1.bias
	 features.denseblock1.denselayer3.conv1.weight
	 features.denseblock1.denselayer3.norm2.weight
	 features.denseblock1.denselayer3.norm2.bias
	 features.denseblock1.denselayer3.conv2.weight
	 features.denseblock1.denselayer4

Epoch 0/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.1263 Acc: 0.9570


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.0781 Acc: 0.9754

Epoch 1/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0529 Acc: 0.9826


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.0395 Acc: 0.9950

Epoch 2/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0541 Acc: 0.9833


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.1371 Acc: 0.9415

Epoch 3/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0568 Acc: 0.9839


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.2137 Acc: 0.8978

Epoch 4/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0628 Acc: 0.9838


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.2127 Acc: 0.8892

Epoch 5/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0710 Acc: 0.9810


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.2365 Acc: 0.8779

Epoch 6/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0755 Acc: 0.9805


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.2071 Acc: 0.9047

Epoch 7/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0726 Acc: 0.9810


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.2110 Acc: 0.9161

Epoch 8/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0710 Acc: 0.9806


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.4855 Acc: 0.8103

Epoch 9/9 lr=[1e-05]
----------


  0%|          | 0/493 [00:00<?, ?it/s]

train Loss: 0.0699 Acc: 0.9805


  0%|          | 0/66 [00:00<?, ?it/s]

val Loss: 0.1925 Acc: 0.9195

Training complete in 118m 22s
Best val Acc: 0.994983


# Section 2: Inference

Now that we've trained a model, we would like to evaluate its performance (on the validation data), and use it for inference (on the test data). We're going to perform top-5 inference - that is, our model will get to output 5 guesses for a given image


In [11]:
def evaluate(model, dataloader, criterion, is_labelled = False, generate_labels = True, k = 5):
    # If is_labelled, we want to compute loss, top-1 accuracy and top-5 accuracy
    # If generate_labels, we want to output the actual labels
    # Set the model to evaluate mode
    model.eval()
    running_loss = 0
    running_top1_correct = 0
    running_top5_correct = 0
    predicted_labels = []
    

    # Iterate over data.
    # TQDM has nice progress bars
    for inputs, labels in tqdm(dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        tiled_labels = torch.stack([labels.data for i in range(k)], dim=1) 
        # Makes this to calculate "top 5 prediction is correct"
        # [[label1 label1 label1 label1 label1], [label2 label2 label2 label label2]]

        # forward
        # track history if only in train
        with torch.set_grad_enabled(False):
            # Get model outputs and calculate loss
            outputs = model(inputs)
            if is_labelled:
                loss = criterion(outputs, labels)

            # torch.topk outputs the maximum values, and their indices
            # Since the input is batched, we take the max along axis 1
            # (the meaningful outputs)
            _, preds = torch.topk(outputs, k=k, dim=1)
            if generate_labels:
                # We want to store these results
                nparr = preds.cpu().detach().numpy()
                predicted_labels.extend([list(nparr[i]) for i in range(len(nparr))])

        if is_labelled:
            # statistics
            running_loss += loss.item() * inputs.size(0)
            # Check only the first prediction
            running_top1_correct += torch.sum(preds[:, 0] == labels.data)
            # Check all 5 predictions
            running_top5_correct += torch.sum(preds == tiled_labels)
        else:
            pass

    # Only compute loss & accuracy if we have the labels
    if is_labelled:
        epoch_loss = float(running_loss / len(dataloader.dataset))
        epoch_top1_acc = float(running_top1_correct.double() / len(dataloader.dataset))
        epoch_top5_acc = float(running_top5_correct.double() / len(dataloader.dataset))
    else:
        epoch_loss = None
        epoch_top1_acc = None
        epoch_top5_acc = None
    
    # Return everything
    return epoch_loss, epoch_top1_acc, epoch_top5_acc, predicted_labels

    

In [13]:
# Initialize the model from bewst weights for eval
load_from = "weights/weights_best_affect_pretrain_cyclic_baseline_1e-4_to_1e-5_greyscale.pt"
model, input_size = initialize_model(model_name, num_classes, [3,6], resume_from = load_from)
dataloaders = get_dataloaders(input_size, batch_size, shuffle_datasets)
criterion = get_loss()

# Move the model to the gpu if needed
model = model.to(device)

Loading weights from weights/weights_best_affect_pretrain_cyclic_baseline_1e-4_to_1e-5_greyscale.pt


In [15]:
val_loss, val_top1, _, _ = evaluate(model, dataloaders['val'], criterion, is_labelled = True, generate_labels = False, k = 1)
print("val_loss:", val_loss)
print("val_top1:", val_top1)

  0%|          | 0/66 [00:00<?, ?it/s]

val_loss: 0.04307846507137976
val_top1: 0.9863831820353559


In [14]:
test_loss, test_top1, _, _ = evaluate(model, dataloaders['test'], criterion, is_labelled = True, generate_labels = False, k = 1)
print("test_loss:", test_loss)
print("test_top1:", test_top1)

  0%|          | 0/54 [00:00<?, ?it/s]

test_loss: 0.7646705143202706
test_top1: 0.8529155787641428


In [23]:
train_loss, train_top1, _, _ = evaluate(model, dataloaders['train'], criterion, is_labelled = True, generate_labels = False, k = 1)
print("train_loss:", train_loss)
print("train_top1:", train_top1)

  0%|          | 0/151 [00:00<?, ?it/s]

train_loss: 0.011481323452846057
train_top1: 0.9961502445114973
