In [None]:
from google.colab import drive
drive.mount('/content/drive') 

Mounted at /content/drive


In [None]:
%mkdir -p /content/data/
%cd /content/data

import tarfile 

train_path = '/content/drive/My Drive/CSC420/pretraining/training.tar'
val_path = '/content/drive/My Drive/CSC420/pretraining/validation.tar'

# Load training set and validation set
for fpath in [train_path, val_path]:
  print('Extracting {}...'.format(fpath.split('/')[-1]))
  with tarfile.open(fpath) as tar:
    tar.extractall()


/content/data
Extracting training.tar...
Extracting validation.tar...


In [None]:
import cv2
import numpy as np
from PIL import Image

'''
References:
Preprocessing steps: https://arxiv.org/pdf/1911.05946.pdf
Face landmarks : https://www.pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/C
'''
class align_faces_with_landmarks(object):

    def __call__(self, image, landmarks):
        # Since the images are loaded it PIL
        image = np.asarray(image)

        # The percentage value of how far in the picture the left eye should be
        LEFT_EYE_CORD = (0.25, 0.2)
        DIMENSIONS = 244

        landmarks = np.array(landmarks).reshape((5, 2))
        # assumption is made that there is only one

        # To Gauge Scale
        maximum = np.max(landmarks, axis=0)
        minimum = np.min(landmarks, axis=0)

        # eye landmarks
        left = landmarks[:1]
        right = landmarks[1:2]

        centre = np.vstack((left, right))
        centre = np.mean(centre, axis=0, dtype=np.int)

        diff = right - left
        diff = diff.reshape((2, 1))

        angle = np.degrees(np.arctan2(diff[1], diff[0]))

        # find the length of the face, and use that for our scale
        y_scale = maximum[1] - minimum[1]
        y_scale = y_scale + 0.9 * y_scale

        M = cv2.getRotationMatrix2D((centre[0], centre[1]), angle, DIMENSIONS / y_scale)

        # update translation
        t_x = DIMENSIONS // 2
        t_y = DIMENSIONS * LEFT_EYE_CORD[1]
        M[0, 2] += (t_x - centre[0])
        M[1, 2] += (t_y - centre[1])

        image2 = cv2.warpAffine(image, M, (DIMENSIONS, DIMENSIONS),
                                flags=cv2.INTER_CUBIC)

        # convert back to PIL
        return Image.fromarray(image2)

In [None]:
import time

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torchvision
from PIL import Image
from torch.utils.data import Dataset, DataLoader


class CelebAU(Dataset):
    """CelebAU dataset labeled by presence of action units (AU)"""

    def __init__(self, train=False, intensity=False, transform=None):
        """
        Args:
          - label_csv: Path to the csv file with action unit labels.
          - train: training set if True, otherwise validation set
          - intensity (bool): labels are intensities (between 0 and 5) rather
                              than presence (either 0 or 1).
          - transform: transform applied to an image input
        """
        self.train = train
        if train:
            label_path = '/content/drive/MyDrive/CSC420/pretraining/train_labels.csv'
            self.root_dir = '/content/data/training'
        else:
            label_path = '/content/drive/MyDrive/CSC420/pretraining/val_labels.csv'
            self.root_dir = '/content/data/validation'
        self.au_frame = pd.read_csv(label_path, index_col=[0, 1])
        
        if intensity:
            self.label_cols = [' AU01_r', ' AU02_r', ' AU04_r', ' AU05_r', ' AU06_r',
                               ' AU07_r', ' AU09_r', ' AU10_r', ' AU12_r', ' AU14_r',
                               ' AU15_r', ' AU17_r', ' AU20_r', ' AU23_r', ' AU25_r',
                               ' AU26_r', ' AU28_r', ' AU45_r']
        else:
            self.label_cols = [' AU01_c', ' AU02_c', ' AU04_c', ' AU05_c', ' AU06_c',
                               ' AU07_c', ' AU09_c', ' AU10_c', ' AU12_c', ' AU14_c',
                               ' AU15_c', ' AU17_c', ' AU20_c', ' AU23_c', ' AU25_c',
                               ' AU26_c', ' AU28_c', ' AU45_c']

        self.landmark_cols = ['name', 'lefteye_x', 'lefteye_y', 'righteye_x', 'righteye_y', 'nose_x', 'nose_y',
                              'leftmouth_x', 'leftmouth_y', 'rightmouth_x', 'rightmouth_y']

        self.intensity = intensity
        self.transform = transform

        # code to handle facial landmarks
        landmark_path = '/content/drive/MyDrive/CSC420/pretraining/list_landmarks_align_celeba.csv'
        self.landmark_frame = pd.read_csv(landmark_path, index_col=[0])
        self.align = align_faces_with_landmarks()

    def __len__(self):
        return len(self.au_frame)

    def __getitem__(self, idx):
        """
        Returns a dictionary containing the image and its label if a face is
        detected. Otherwise, return None.
        """
        # Get image at idx
        image_id = self.au_frame.iloc[idx, 0]
        image_path = self.root_dir + '/' + str(image_id).zfill(6) + '.jpg'
        image = cv2.imread(image_path)
        image = Image.fromarray(image)

        # get landmarks for the specified file
        image_id = int(image_id)
        landmarks = self.landmark_frame.iloc[image_id - 1]

        landmarks = landmarks.tolist()

        # Get AU labels
        aus = self.au_frame.iloc[idx][self.label_cols]
        aus = np.array(aus, dtype=float)

        if self.transform:
            try:
                # torchvision transforms can't take multiple parameters so splitting the transforms here reference:
                # https://discuss.pytorch.org/t/t-compose-typeerror-call-takes-2-positional-arguments-but-3-were
                # -given/62529
                image = self.align(image, landmarks)
                image = self.transform(image)

            except ValueError:
                return None
        sample = {'image': image, 'labels': aus}
        
        return sample


def collate_fn(batch):
    """
  Used to process the list of samples to form a batch. Ignores images
  where no faces were detected.
  """
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)



def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)), cmap="gray")
    plt.show()


OK


In [None]:
import copy
import time

import torchvision
from scipy.io.idl import AttrDict
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np


def validation_step(convnet, val_loader, criterion, bs):
    num_matches = 0.0
    total = 0.0
    losses = []
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    with torch.no_grad():
        for _, item in enumerate(val_loader, 0):

            labels = item.get("labels")
            imgs = item.get("image")
            labels = labels.to(device)
            imgs = imgs.to(device)

            outputs = convnet(imgs)

            # Compute batch loss
            val_loss = criterion(outputs, labels)
            losses.append(val_loss.data.item())

            # Compute batch accuracy, set probabilities > 0.5 to 1
            t = torch.Tensor([0.5])
            t = t.to(device)
            num_matches += ((torch.nn.functional.sigmoid(outputs) > t) == labels).sum()

            total += labels.size(0) * 18

    val_loss = np.mean(losses)
    val_acc = 100 * num_matches / total
    return val_loss, val_acc


def train(convnet, args, soft_start=False):

    train_losses = []
    valid_losses = []
    valid_accs = []
    best_loss = -100 
    start = time.time()




    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    best_model_wts = copy.deepcopy(convnet.state_dict())

    train_set = CelebAU(train=True, transform=args.transform)
    train_loader = DataLoader(train_set, args.batch_size, collate_fn=collate_fn,
                              shuffle=True, num_workers=args.num_workers)
    
    val_set = CelebAU(train=False, transform=args.transform)
    val_loader = DataLoader(val_set, args.batch_size, collate_fn=collate_fn,
                            shuffle=True, num_workers=args.num_workers)

    optimizer = torch.optim.Adam(convnet.parameters(), args.learn_rate)
    criterion = torch.nn.BCEWithLogitsLoss()

    if args.resume: 
      print("Loading checkpoint")
      state = torch.load(args.checkpoint)
      convnet.load_state_dict(state['model_state_dict'])
      optimizer.load_state_dict(state['optimizer_state_dict'])


    for epoch in range(args.epochs):
        convnet.train()
        losses = []

        for _, item in enumerate(train_loader, 0):
            
            labels = item.get("labels")
            imgs = item.get("image")

            labels = labels.to(device)
            imgs = imgs.to(device)

            optimizer.zero_grad()

            outputs = convnet(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            losses.append(loss.data.item())


        avg_loss = np.mean(losses)
        train_losses.append(avg_loss)
        time_elapsed = time.time() - start

        print('Epoch [%d/%d], Loss: %.4f, Time (s): %d' % (
            epoch + 1, args.epochs, avg_loss, time_elapsed))
        
        # Validation 
        convnet.eval()
        val_loss, val_acc = validation_step(convnet, val_loader, criterion,
                                            args.batch_size)
        time_elapsed = time.time() - start
        valid_losses.append(val_loss)
        valid_accs.append(val_acc)

        print('Epoch [%d/%d], Val Loss: %.4f, Val Acc: %.1f%%, Time(s): %.2f' % (
            epoch + 1, args.epochs, val_loss, val_acc, time_elapsed))

        # Save model
        if -val_loss >= best_loss:
          print("Best Loss: Saving Model")
          best_loss = -val_loss
          checkpoint = {
                'model_state_dict': convnet.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_losses': train_losses,
                'valid_losses': valid_losses,
                'best_val_loss': best_loss
            }
          torch.save(checkpoint, args.checkpoint)


def collate_fn(batch):
    """
    Used to process the list of samples to form a batch. Ignores images
    where no faces were detected.
    """
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)


def get_model():
    model = models.resnet18(pretrained=True)
    num = model.fc.in_features
    fc =torch.nn.Sequential(
        torch.nn.LeakyReLU(),
        torch.nn.Linear(512, 256), 
        torch.nn.LeakyReLU(),
        torch.nn.Linear(256, 18))
    
    model.fc = fc 
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    return model


def get_model_VGG():

    VGG = models.vgg16(pretrained=True)
    VGG.classifier.add_module( "class_facs_1", torch.nn.Linear(1000, 512 ))
    VGG.classifier.add_module("class_facs_ReLU2", torch.nn.LeakyReLU())
    VGG.classifier.add_module("class_facs_2",torch.nn.Linear(512, 18 ))
    
    for name, param in VGG.named_parameters():
      if "class_facs_1" in name or "class_facs_2" in name :
        param.requires_grad = True 
      else: 
        param.requires_grad = False

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    VGG = VGG.to(device)
    return VGG



transform = torchvision.transforms.Compose(
     [torchvision.transforms.ToTensor(), 
      torchvision.transforms.Normalize((0.4007, 0.4783, 0.6502), (0.1641, 0.1755, 0.2007 ))
    ]
)

args = AttrDict()
args_dict = {
    'gpu': True,
    'transform': transform,
    'checkpoint': "/content/drive/MyDrive/CSC420/pretraining/checkpoint/checkpoint.pt",
    'learn_rate': 0.0001,
    'batch_size': 35,
    'epochs': 12,
    'num_workers': 4,
    'resume': False 
}

args.update(args_dict)
train(convnet=get_model(), args=args)


Epoch [1/12], Loss: 0.3435, Time (s): 380




Epoch [1/12], Val Loss: 0.3089, Val Acc: 85.9%, Time(s): 398.08
Best Loss: Saving Model
Epoch [2/12], Loss: 0.2876, Time (s): 770
Epoch [2/12], Val Loss: 0.2831, Val Acc: 87.1%, Time(s): 786.46
Best Loss: Saving Model
Epoch [3/12], Loss: 0.2617, Time (s): 1149
Epoch [3/12], Val Loss: 0.2804, Val Acc: 87.4%, Time(s): 1166.39
Best Loss: Saving Model
Epoch [4/12], Loss: 0.2378, Time (s): 1530
Epoch [4/12], Val Loss: 0.2775, Val Acc: 87.7%, Time(s): 1546.80
Best Loss: Saving Model
Epoch [5/12], Loss: 0.2115, Time (s): 1911
Epoch [5/12], Val Loss: 0.2897, Val Acc: 87.3%, Time(s): 1927.94
Epoch [6/12], Loss: 0.1810, Time (s): 2291
Epoch [6/12], Val Loss: 0.3063, Val Acc: 87.4%, Time(s): 2307.67
