In [1]:
import numpy as np
from PIL import Image
import os, json, math, time
from typing import Union, List, Dict, cast
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torch.nn.functional as F
import torch.utils.data as data
from torch.utils.model_zoo import load_url as load_state_dict_from_url
from torch.autograd import Variable
import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
#from torchsummary import summary

In [2]:
# global constants

training_size = 28317 + 3541

__all__ = [
    "VGG",
    # "vgg11",
    # "vgg11_bn",
    # "vgg13",
    # "vgg13_bn",
    "vgg16",
    "vgg16_bn",
    # "vgg19_bn",
    # "vgg19",
]

model_urls = {
    "vgg11": "https://download.pytorch.org/models/vgg11-8a719046.pth",
    "vgg13": "https://download.pytorch.org/models/vgg13-19584684.pth",
    "vgg16": "https://download.pytorch.org/models/vgg16-397923af.pth",
    "vgg19": "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth",
    "vgg11_bn": "https://download.pytorch.org/models/vgg11_bn-6002323d.pth",
    "vgg13_bn": "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth",
    "vgg16_bn": "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth",
    "vgg19_bn": "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth",
}

cfgs: Dict[str, List[Union[str, int]]] = {
    "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "D": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
    "E": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"],
}

In [3]:
# device setup

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' 
if torch.cuda.device_count() > 0:  
    print("Using %d GPU(s)" % torch.cuda.device_count())
else:
    print("Using CPU")
cudnn.benchmark = True
cudnn.enabled = True

Using 2 GPU(s)


In [4]:
# data loading & data processing

def load_data(image_path, emotion_path, subset):
    images = np.load(image_path)        # shape = (35393, 48, 48, 1)
    images = images/255.0
    images = np.float32(images)
    emotions = np.load(emotion_path)    # shape = (35393, 8)
    emotions = np.float32(emotions)
    crops1 = images[:,:36,:36,:]        # shape = (35393, 36, 36, 1)
    crops2 = images[:,:36,12:,:]        # shape = (35393, 36, 36, 1)
    crops3 = images[:,12:,6:42,:]       # shape = (35393, 36, 36, 1)
    crops4 = images[:,3:46,3:46,:]      # shape = (35393, 43, 43, 1)
    crops5 = images[:,4:44,4:44,:]      # shape = (35393, 40, 40, 1)

    if subset == 'train':
        return images[:training_size], crops1[:training_size], crops2[:training_size], crops3[:training_size], crops4[:training_size], crops5[:training_size], emotions[:training_size]
    if subset == 'test':
        return images[training_size:], crops1[training_size:], crops2[training_size:], crops3[training_size:], crops4[training_size:], crops5[training_size:], emotions[training_size:]

def fixed_crop(img, img_size):
    img = torch.tensor(img)                             # (img_size, img_size, 1)
    img = torch.reshape(img, (1, img_size, img_size))   # (1, img_size, img_size)
    img = transforms.Resize([48, 48])(img)              # (1, 48, 48)
    img = img.repeat(3, 1, 1)                           # (3, 48, 48)
    
    return img

class FERPlusDataset(data.Dataset):
    def __init__(self, image_path, emotion_path, subset):
        assert(subset=='train' or subset=='test')
        self.images, self.crops1, self.crops2, self.crops3, self.crops4, self.crops5, self.emotions = load_data(image_path, emotion_path, subset)

    def __getitem__(self, index):
        image = self.images[index]
        crop1 = self.crops1[index]
        crop2 = self.crops2[index]
        crop3 = self.crops3[index]
        crop4 = self.crops4[index]
        crop5 = self.crops5[index]
        emotion = self.emotions[index]

        image = fixed_crop(image, 48)
        crop1 = fixed_crop(crop1, 36)
        crop2 = fixed_crop(crop2, 36)
        crop3 = fixed_crop(crop3, 36)
        crop4 = fixed_crop(crop4, 43)
        crop5 = fixed_crop(crop5, 40)

        return image, crop1, crop2, crop3, crop4, crop5, emotion

    def __len__(self):
        return len(self.images)

In [5]:
# model architecture

class VGG(nn.Module):
    def __init__(self, features, init_weights=True):
        super().__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.alpha = nn.Sequential(nn.Linear(512, 1),
                                   nn.Sigmoid())
        self.beta = nn.Sequential(nn.Linear(1024, 1),
                                  nn.Sigmoid())
        self.fc = nn.Sequential(nn.Linear(1024, 8),
                                nn.LogSoftmax(dim=1))
        
#         for m in self.modules():
#             if isinstance(m, nn.Conv2d):
#                 nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
#                 if m.bias is not None:
#                     nn.init.constant_(m.bias, 0)
#             elif isinstance(m, nn.BatchNorm2d):
#                 nn.init.constant_(m.weight, 1)
#                 nn.init.constant_(m.bias, 0)
#             elif isinstance(m, nn.Linear):
#                 nn.init.normal_(m.weight, 0, 0.01)
#                 nn.init.constant_(m.bias, 0)

    def forward(self, x):
        vs = []
        alphas = []
        for i in range(6):
            f = x[:,:,:,:,i] # x.shape = (32, 3, 48, 48, 6)
            f = transforms.Resize([224, 224])(f)
            f = self.features(f)
            f = self.avgpool(f)
            f = f.squeeze(3).squeeze(2)
            vs.append(f)
            alphas.append(self.alpha(f))
        vs_stack = torch.stack(vs, dim=2)
        alphas_stack = torch.stack(alphas, dim=2)
        alphas_stack = F.softmax(alphas_stack, dim=2)
        alphas_part_max = alphas_stack[:, :, 0:5].max(dim=2)[0]
        alphas_org = alphas_stack[:, :, 0]
        vm = vs_stack.mul(alphas_stack).sum(2).div(alphas_stack.sum(2))
        for i in range(len(vs)):
            vs[i] = torch.cat([vs[i], vm], dim=1)
        vs_stack_4096 = torch.stack(vs, dim=2)

        betas = []
        for index, v in enumerate(vs):
            betas.append(self.beta(v))
        betas_stack = torch.stack(betas, dim=2)
        betas_stack = F.softmax(betas_stack, dim=2)

        output = vs_stack_4096.mul(betas_stack * alphas_stack).sum(2).div((betas_stack * alphas_stack).sum(2))
        output = output.view(output.size(0), -1)
        pred_score = self.fc(output)

        return pred_score, alphas_part_max, alphas_org

def make_layers(cfg, batch_norm=False):
    layers: List[nn.Module] = []
    in_channels = 3
    for v in cfg:
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            v = cast(int, v)
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)


def _vgg(arch, cfg, batch_norm, pretrained, progress, **kwargs):
    if pretrained:
        kwargs["init_weights"] = False
    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
        model_state_dict = model.state_dict()
        for key in state_dict:
            if not ((key == 'module.fc.weight') | (key == 'module.fc.bias')):
                model_state_dict[key] = state_dict[key]
        model.load_state_dict(model_state_dict, strict=False)
    return model


def vgg16(pretrained=False, progress=True, **kwargs):
    return _vgg("vgg16", "D", False, pretrained, progress, **kwargs)

    
def vgg16_bn(pretrained=False, progress=True, **kwargs):
    return _vgg("vgg16_bn", "D", True, pretrained, progress, **kwargs)

In [6]:
class RBLoss(nn.Module):
    def __init__(self):
        super(RBLoss, self).__init__()

    def forward(self, alphas_part_max, alphas_org):
        size = alphas_org.shape[0]
        loss_wt = 0.0
        for i in range(size):
            loss_wt += max((torch.Tensor([0])).to(device), 0.02 - (alphas_part_max[i] - alphas_org[i]))
        RBLoss = loss_wt / size
        
        return RBLoss

def accuracy(output, target):
    batch_size = target.size(0)
    acc = 0
    for i in range(batch_size):
        true = target[i]
        pred = output[i]
        index_max = torch.argmax(pred)
        if true[index_max] == torch.max(true):
            acc += 1
    acc = float(acc)/batch_size
    return acc

def adjust_learning_rate(optimizer, epoch):
    """Adjust the learning rate of the optimizer"""
    if epoch in [15, 30]:
        for param_group in optimizer.param_groups:
            param_group['lr'] *= 0.1
            print('lr',param_group['lr'])

class AverageMeter(object): 
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [7]:
def train(train_loader, model, loss1, loss2, optimizer, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()

    model.train()
    end = time.time()

    for i, (images, crops1, crops2, crops3, crops4, crops5, emotions) in enumerate(train_loader):
        input = torch.zeros([images.shape[0], images.shape[1], images.shape[2], images.shape[3], 6])
        input[:,:,:,:,0] = images.to(device)
        input[:,:,:,:,1] = crops1.to(device)
        input[:,:,:,:,2] = crops2.to(device)
        input[:,:,:,:,3] = crops3.to(device)
        input[:,:,:,:,4] = crops4.to(device)
        input[:,:,:,:,5] = crops5.to(device)
        target = emotions.to(device)

        input_var = torch.autograd.Variable(input).to(device)
        target_var = torch.autograd.Variable(target).to(device)
        pred_score, alphas_part_max, alphas_org = model(input_var)

        loss = loss1(pred_score, target_var) + loss2(alphas_part_max, alphas_org)
        acc = accuracy(pred_score.data, target)
        losses.update(loss.item(), input.size(0))
        accuracies.update(acc, input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        print('\r',
              'Training [Epoch: {}/{} ({}/{})]: '
              'Time {:.2f}s ({:.2f}s) '
              'Loss {:.6f} ({:.6f}) '
              'Accuracy {:.4f} ({:.4f})'
              .format(epoch+1, epochs, i+1, len(train_loader), 
                      batch_time.val, batch_time.avg,
                      losses.val, losses.avg,
                      accuracies.val, accuracies.avg),
              end='')
        
    torch.cuda.empty_cache()


def validate(val_loader, model, loss1, loss2):
    with torch.no_grad():
        batch_time = AverageMeter()
        losses = AverageMeter()
        accuracies = AverageMeter()

        model.eval()
        end = time.time()
        print()
        for i, (images, crops1, crops2, crops3, crops4, crops5, emotions) in enumerate(val_loader):
            input = torch.zeros([images.shape[0], images.shape[1], images.shape[2], images.shape[3], 6])
            input[:,:,:,:,0] = images.to(device).detach()
            input[:,:,:,:,1] = crops1.to(device).detach()
            input[:,:,:,:,2] = crops2.to(device).detach()
            input[:,:,:,:,3] = crops3.to(device).detach()
            input[:,:,:,:,4] = crops4.to(device).detach()
            input[:,:,:,:,5] = crops5.to(device).detach()
            target = emotions.to(device).detach()

            input_var = torch.autograd.Variable(input).to(device).detach()
            target_var = torch.autograd.Variable(target).to(device).detach()
            pred_score, alphas_part_max, alphas_org = model(input_var)

            loss = loss1(pred_score, target_var) + loss2(alphas_part_max, alphas_org)
            acc = accuracy(pred_score.data, target)
            losses.update(loss.data[0], input.size(0))
            accuracies.update(acc, input.size(0))
            
            batch_time.update(time.time() - end)
            end = time.time()

            print('\r',
                  'Test [Batch: {}/{}]: '
                  'Time {:.2f}s ({:.2f}s) '
                  'Loss {:.6f} ({:.6f}) '
                  'Accuracy {:.4f} ({:.4f})'
                  .format(i+1, len(val_loader), 
                          batch_time.val, batch_time.avg,
                          losses.val, losses.avg,
                          accuracies.val, accuracies.avg),
                  end='')
            torch.cuda.empty_cache()
        print(' *** Test Accuracy {:.4f} ***'.format(accuracies.avg))
              
        return accuracies.avg

In [8]:
def inplace_relu(m):
    classname = m.__class__.__name__
    if classname.find('ReLU') != -1:
        m.inplace=True

In [9]:
epochs = 40
batch_size_train = 16
batch_size_test = 16
learning_rate = 1e-4
momentum = 0.9
weight_decay = 1e-4
image_path = '../dataset/aligned_images.npy'
emotion_path = '../dataset/emotions_multi.npy'

train_loader = torch.utils.data.DataLoader(
    FERPlusDataset(
        image_path,
        emotion_path,
        'train'
    ),
    batch_size=batch_size_train,
    shuffle=True,
)

val_loader = torch.utils.data.DataLoader(
    FERPlusDataset(
        image_path,
        emotion_path,
        'test'
    ),
    batch_size=batch_size_test,
    shuffle=False,
)

In [10]:
model = vgg16_bn(pretrained=True)
model.apply(inplace_relu)
if torch.cuda.device_count() > 1: 
    model = nn.DataParallel(model)
model = model.to(device)
#summary(model,(3,48,48,6))

In [11]:
loss1 = nn.KLDivLoss(reduction='batchmean')
loss1 = loss1.to(device)
loss2 = RBLoss() 
loss2 = loss2.to(device)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                             lr=learning_rate)

In [None]:
accs = []
for epoch in range(epochs):
    #if epoch==0: torch.cuda.empty_cache()
    #adjust_learning_rate(optimizer, epoch)
    train(train_loader, model, loss1, loss2, optimizer, epoch)
    accs.append(validate(val_loader, model, loss1, loss2))
    with open('./results/vgg16-IN-Adam-224_bmean_freemem.json', 'w', encoding='utf-8') as f:
        json.dump(accs, f, ensure_ascii=False)

 Training [Epoch: 1/40 (1992/1992)]: Time 1.00s (0.42s) Loss 1.446051 (0.501939) Accuracy 0.5000 (0.7495)
 Test [Batch: 221/221]: Time 0.52s (0.17s) Loss 0.619732 (0.422211) Accuracy 0.4667 (0.7904) *** Test Accuracy 0.7904 ***
 Training [Epoch: 2/40 (1992/1992)]: Time 0.24s (0.42s) Loss 0.334198 (0.337904) Accuracy 0.5000 (0.8282)
 Test [Batch: 221/221]: Time 0.17s (0.17s) Loss 0.457853 (0.370668) Accuracy 0.8000 (0.8153) *** Test Accuracy 0.8153 ***
 Training [Epoch: 3/40 (69/1992)]: Time 0.41s (0.41s) Loss 0.305416 (0.284468) Accuracy 0.8750 (0.8533)

In [None]:
if not os.path.isdir('./results/'):
    os.mkdir('./results/')
with open('./results/vgg16-IN-Adam-224_bmean_freemem.json', 'w', encoding='utf-8') as f:
    json.dump(accs, f, ensure_ascii=False)