## This notebook aims to train proxyless with frozen backbone on different facial expression datasets and find out which datasets enable better convergence 

In [1]:
import albumentations as A
import albumentations.pytorch as Ap
import torch.nn.functional as F
from torch import cuda
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torchvision.datasets.folder import default_loader
import matplotlib.pyplot as plt
import time
import math
import os
import random
import cv2
import csv
import PIL
import pandas
from os.path import join
import copy
import timm 
from collections import OrderedDict
os.environ['CUDA_LAUNCH_BLOCKING'] = "1" 
plt.ion()   # interactive mode

In [None]:
from Datasets.MyDataset_expression_IMFDB import MyDataset_expression_IMFDB
from Datasets.MyDataset_expression_FER2013 import MyDataset_expression_FER2013
from Datasets.MyDataset_expression_OZON import MyDataset_expression_OZON

In [4]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(112),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(112),
        transforms.CenterCrop(100),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Let's create dataloader for FER2013 dataset only
to_csv_path = '/storage_labs/3030/BelyakovM/Face_attributes/ds/db_BuevichP/emochon/fer2013/train.csv'
image_datasets_FER2013 = {x: MyDataset_expression_FER2013(to_csv_path,x,{0:4,1:5,2:6,3:1,4:2,5:3,6:0},
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders_FER2013 = {x: torch.utils.data.DataLoader(image_datasets_FER2013[x], batch_size=63,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes_FER2013 = {x: image_datasets_FER2013[x].__len__() for x in ['train', 'val']}

# Let's create dataloader for IMFDB dataset only
annotations_path = '/storage_labs/3030/BelyakovM/Face_attributes/ds/db_BuevichP/emochon/IMFDB_final_processed/overall_path_expression_annotation_existing_files.txt'
image_datasets_IMFDB = {x: MyDataset_expression_IMFDB(annotations_path,x,{'NEUTRAL':0, 'HAPPINESS':1, 'SADNESS':2, 'SURPRISE':3, 'ANGER':4, 'DISGUST':5,'FEAR':6},
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders_IMFDB = {x: torch.utils.data.DataLoader(image_datasets_IMFDB[x], batch_size=28,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes_IMFDB = {x: image_datasets_IMFDB[x].__len__() for x in ['train', 'val']}

# Let's create dataloader for IMFDB-FER2013 composite dataset
image_datasets_IMFDB_FER2013 = {x: torch.utils.data.ConcatDataset([image_datasets_FER2013[x],image_datasets_IMFDB[x]]) for x in ['train', 'val']}
dataloaders_IMFDB_FER2013 = {x: torch.utils.data.DataLoader(image_datasets_IMFDB_FER2013[x], batch_size=63,
                                             shuffle=True, num_workers=4) for x in ['train', 'val']}

dataset_sizes_IMFDB_FER2013 = {x: image_datasets_IMFDB_FER2013[x].__len__() for x in ['train', 'val']}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [31]:
# Let's create dataloader for OZON dataset only
annotations_path_ozon = '/storage_labs/3030/BelyakovM/Face_attributes/ds/db_BuevichP/emochon/OZON_expressions_dataset/train_7expressions_annotation.txt'
image_datasets_ozon = {x: MyDataset_expression_OZON(annotations_path_ozon,x,{'neutral':0, 'happy':1, 'sad':2, 'surprise':3, 'anger':4, 'disgust':5,'fear':6},
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders_ozon = {x: torch.utils.data.DataLoader(image_datasets_ozon[x], batch_size=28,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes_ozon = {x: image_datasets_ozon[x].__len__() for x in ['train', 'val']}

In [34]:
# Let's create dataloader for IMFDB-FER2013-OZON composite dataset
image_datasets_IMFDB_FER2013_OZON = {x: torch.utils.data.ConcatDataset([image_datasets_FER2013[x],image_datasets_IMFDB[x],image_datasets_ozon[x]]) for x in ['train', 'val']}
dataloaders_IMFDB_FER2013_OZON = {x: torch.utils.data.DataLoader(image_datasets_IMFDB_FER2013_OZON[x], batch_size=63,
                                             shuffle=True, num_workers=4) for x in ['train', 'val']}

dataset_sizes_IMFDB_FER2013_OZON = {x: image_datasets_IMFDB_FER2013_OZON[x].__len__() for x in ['train', 'val']}

In [38]:
# Let's create dataloader for IMFDB-OZON composite dataset
image_datasets_IMFDB_OZON = {x: torch.utils.data.ConcatDataset([image_datasets_IMFDB[x],image_datasets_ozon[x]]) for x in ['train', 'val']}
dataloaders_IMFDB_OZON = {x: torch.utils.data.DataLoader(image_datasets_IMFDB_OZON[x], batch_size=63,
                                             shuffle=True, num_workers=4) for x in ['train', 'val']}

dataset_sizes_IMFDB_OZON = {x: image_datasets_IMFDB_OZON[x].__len__() for x in ['train', 'val']}

In [5]:
def train_model(model, criterion, optimizer, scheduler,dataloaders,dataset_sizes, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)[2]
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [6]:
from utils.MultiTaskModel_grouped_age_head_proxyless import MultiTaskModel_grouped_age_head
from utils.MultiTaskModel_proxyless import MultiTaskModel

In [8]:

model_ft = torch.hub.load('mit-han-lab/ProxylessNAS', "proxyless_cpu" , pretrained=True)
model_ft.classifier = nn.Sequential(*list(model_ft.classifier.children())[:-3])
model_ft = MultiTaskModel(model_ft)
model_ft.fc2 = nn.Linear(in_features=1432, out_features=1400, bias=True)
model_ft = MultiTaskModel_grouped_age_head(model_ft)
model_ft.load_state_dict(torch.load('/storage_labs/3030/BelyakovM/Face_attributes/Saved_models/proxyless-cpu_gender_age_trained.pth',map_location=device))
model_ft.to(device)

Using cache found in /root/.cache/torch/hub/mit-han-lab_ProxylessNAS_master


MultiTaskModel_grouped_age_head(
  (encoder): MultiTaskModel(
    (encoder): ProxylessNASNets(
      (first_conv): ConvLayer(
        (bn): BatchNorm2d(40, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU6(inplace=True)
        (conv): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      )
      (blocks): ModuleList(
        (0): MobileInvertedResidualBlock(
          (mobile_inverted_conv): MBInvertedConvLayer(
            (depth_conv): Sequential(
              (conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
              (bn): BatchNorm2d(40, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
              (relu): ReLU6(inplace=True)
            )
            (point_linear): Sequential(
              (conv): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_run

In [9]:
#Freezing all the parameters and batchnorms except parameters of age and emotion heads
for module in model_ft.encoder.modules():
    if isinstance(module,nn.modules.BatchNorm1d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm2d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm3d):
        module.eval()
for i in model_ft.parameters():
    i.requires_grad = False
for param in model_ft.encoder.fc2.parameters():
    param.requires_grad = True
for k in model_ft.encoder.fc3.parameters():
    k.requires_grad = True
for k in model_ft.age_group_head.parameters():
    k.requires_grad = True
for name,param in model_ft.named_parameters():
    if param.requires_grad:
        print(name)

encoder.fc2.weight
encoder.fc2.bias
encoder.fc3.weight
encoder.fc3.bias
age_group_head.weight
age_group_head.bias


## Frozen model training on FER2013 dataset only

In [10]:
criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


In [75]:
model_ft = train_model(model_ft, criterion,optimizer_ft, exp_lr_scheduler,dataloaders_FER2013,
                       num_epochs=30)

Epoch 0/29
----------
train Loss: 1.8163 Acc: 0.2430
val Loss: 1.8044 Acc: 0.2526

Epoch 1/29
----------
train Loss: 1.8043 Acc: 0.2462
val Loss: 1.8136 Acc: 0.2548

Epoch 2/29
----------
train Loss: 1.8024 Acc: 0.2474
val Loss: 1.8181 Acc: 0.2562

Epoch 3/29
----------
train Loss: 1.7994 Acc: 0.2468
val Loss: 1.8236 Acc: 0.2473

Epoch 4/29
----------
train Loss: 1.7997 Acc: 0.2476
val Loss: 1.7993 Acc: 0.2538

Epoch 5/29
----------
train Loss: 1.7994 Acc: 0.2540
val Loss: 1.8122 Acc: 0.2526

Epoch 6/29
----------
train Loss: 1.7981 Acc: 0.2497
val Loss: 1.7981 Acc: 0.2543

Epoch 7/29
----------
train Loss: 1.7908 Acc: 0.2551
val Loss: 1.7915 Acc: 0.2555

Epoch 8/29
----------
train Loss: 1.7904 Acc: 0.2579
val Loss: 1.7932 Acc: 0.2557

Epoch 9/29
----------
train Loss: 1.7890 Acc: 0.2570
val Loss: 1.7950 Acc: 0.2554

Epoch 10/29
----------
train Loss: 1.7919 Acc: 0.2555
val Loss: 1.7945 Acc: 0.2552

Epoch 11/29
----------
train Loss: 1.7898 Acc: 0.2580
val Loss: 1.7933 Acc: 0.2552

Ep

In [9]:
model_ft = torch.hub.load('mit-han-lab/ProxylessNAS', "proxyless_cpu" , pretrained=True)
model_ft.classifier = nn.Sequential(*list(model_ft.classifier.children())[:-3])
model_ft = MultiTaskModel(model_ft)
model_ft.fc2 = nn.Linear(in_features=1432, out_features=1400, bias=True)
model_ft = MultiTaskModel_grouped_age_head(model_ft)
model_ft.load_state_dict(torch.load('/storage_labs/3030/BelyakovM/Face_attributes/Saved_models/proxyless-cpu_gender_age_trained.pth',map_location=device))
model_ft.to(device)

#freezing all the parameters and batchnorms except parameters of age and emotion heads
for module in model_ft.encoder.modules():
    if isinstance(module,nn.modules.BatchNorm1d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm2d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm3d):
        module.eval()
for i in model_ft.parameters():
    i.requires_grad = False
for param in model_ft.encoder.fc2.parameters():
    param.requires_grad = True
for k in model_ft.encoder.fc3.parameters():
    k.requires_grad = True
for k in model_ft.age_group_head.parameters():
    k.requires_grad = True
for name,param in model_ft.named_parameters():
    if param.requires_grad:
        print(name)

Using cache found in /root/.cache/torch/hub/mit-han-lab_ProxylessNAS_master


encoder.fc2.weight
encoder.fc2.bias
encoder.fc3.weight
encoder.fc3.bias
age_group_head.weight
age_group_head.bias


## Frozem model training on IMFDB dataset only

In [11]:
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft = train_model(model_ft, criterion,optimizer_ft, exp_lr_scheduler,dataloaders_IMFDB,dataset_sizes_IMFDB,
                       num_epochs=30)

Epoch 0/29
----------
train Loss: 1.6863 Acc: 0.3411
val Loss: 1.5876 Acc: 0.3868

Epoch 1/29
----------
train Loss: 1.6451 Acc: 0.3611
val Loss: 1.5858 Acc: 0.3972

Epoch 2/29
----------
train Loss: 1.6467 Acc: 0.3620
val Loss: 1.6213 Acc: 0.3454

Epoch 3/29
----------
train Loss: 1.6313 Acc: 0.3693
val Loss: 1.5572 Acc: 0.3956

Epoch 4/29
----------
train Loss: 1.6240 Acc: 0.3705
val Loss: 1.5400 Acc: 0.4127

Epoch 5/29
----------
train Loss: 1.6260 Acc: 0.3676
val Loss: 1.5389 Acc: 0.4136

Epoch 6/29
----------
train Loss: 1.6200 Acc: 0.3690
val Loss: 1.5554 Acc: 0.3972

Epoch 7/29
----------
train Loss: 1.5653 Acc: 0.3946
val Loss: 1.5043 Acc: 0.4217

Epoch 8/29
----------
train Loss: 1.5630 Acc: 0.3956
val Loss: 1.5037 Acc: 0.4171

Epoch 9/29
----------
train Loss: 1.5566 Acc: 0.3992
val Loss: 1.4977 Acc: 0.4265

Epoch 10/29
----------
train Loss: 1.5544 Acc: 0.3973
val Loss: 1.5041 Acc: 0.4176

Epoch 11/29
----------
train Loss: 1.5548 Acc: 0.3969
val Loss: 1.5008 Acc: 0.4165

Ep

In [14]:
model_ft = torch.hub.load('mit-han-lab/ProxylessNAS', "proxyless_cpu" , pretrained=True)
model_ft.classifier = nn.Sequential(*list(model_ft.classifier.children())[:-3])
model_ft = MultiTaskModel(model_ft)
model_ft.fc2 = nn.Linear(in_features=1432, out_features=1400, bias=True)
model_ft = MultiTaskModel_grouped_age_head(model_ft)
model_ft.load_state_dict(torch.load('/storage_labs/3030/BelyakovM/Face_attributes/Saved_models/proxyless-cpu_gender_age_trained.pth',map_location=device))
model_ft.to(device)

#Freezing all the parameters and batchnorms except parameters of age and emotion heads
for module in model_ft.encoder.modules():
    if isinstance(module,nn.modules.BatchNorm1d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm2d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm3d):
        module.eval()
for i in model_ft.parameters():
    i.requires_grad = False
for param in model_ft.encoder.fc2.parameters():
    param.requires_grad = True
for k in model_ft.encoder.fc3.parameters():
    k.requires_grad = True
for k in model_ft.age_group_head.parameters():
    k.requires_grad = True
for name,param in model_ft.named_parameters():
    if param.requires_grad:
        print(name)

Using cache found in /root/.cache/torch/hub/mit-han-lab_ProxylessNAS_master


encoder.fc2.weight
encoder.fc2.bias
encoder.fc3.weight
encoder.fc3.bias
age_group_head.weight
age_group_head.bias


## Frozen model training on IMFDB-FER2013 composite dataset

In [15]:
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft = train_model(model_ft, criterion,optimizer_ft, exp_lr_scheduler,dataloaders_IMFDB_FER2013,dataset_sizes_IMFDB_FER2013,
                       num_epochs=30)

Epoch 0/29
----------
train Loss: 1.9967 Acc: 0.1289
val Loss: 2.0480 Acc: 0.0832

Epoch 1/29
----------
train Loss: 1.9940 Acc: 0.1352
val Loss: 2.0480 Acc: 0.0832

Epoch 2/29
----------
train Loss: 1.9962 Acc: 0.1346
val Loss: 2.0480 Acc: 0.0832

Epoch 3/29
----------
train Loss: 1.9959 Acc: 0.1339
val Loss: 2.0480 Acc: 0.0832

Epoch 4/29
----------
train Loss: 1.9942 Acc: 0.1329
val Loss: 2.0480 Acc: 0.0832

Epoch 5/29
----------
train Loss: 1.9926 Acc: 0.1345
val Loss: 2.0480 Acc: 0.0832

Epoch 6/29
----------
train Loss: 1.9954 Acc: 0.1309
val Loss: 2.0480 Acc: 0.0832

Epoch 7/29
----------
train Loss: 1.9929 Acc: 0.1356
val Loss: 2.0480 Acc: 0.0832

Epoch 8/29
----------
train Loss: 1.9947 Acc: 0.1318
val Loss: 2.0480 Acc: 0.0832

Epoch 9/29
----------
train Loss: 1.9957 Acc: 0.1328
val Loss: 2.0480 Acc: 0.0832

Epoch 10/29
----------
train Loss: 1.9945 Acc: 0.1340
val Loss: 2.0480 Acc: 0.0832

Epoch 11/29
----------
train Loss: 1.9940 Acc: 0.1334
val Loss: 2.0480 Acc: 0.0832

Ep

In [27]:
model_ft = torch.hub.load('mit-han-lab/ProxylessNAS', "proxyless_cpu" , pretrained=True)
model_ft.classifier = nn.Sequential(*list(model_ft.classifier.children())[:-3])
model_ft = MultiTaskModel(model_ft)
model_ft.load_state_dict(torch.load('/storage_labs/3030/BelyakovM/Face_attributes/Saved_models/proxyless-cpu_gender_classes_3_heads.pth',map_location=device))
model_ft.fc2 = nn.Linear(in_features=1432, out_features=1400, bias=True)
model_ft = MultiTaskModel_grouped_age_head(model_ft)

model_ft.to(device)

#freezing all the parameters and batchnorms except parameters of age and emotion heads
for module in model_ft.encoder.modules():
    if isinstance(module,nn.modules.BatchNorm1d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm2d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm3d):
        module.eval()
for i in model_ft.parameters():
    i.requires_grad = False
for param in model_ft.encoder.fc2.parameters():
    param.requires_grad = True
for k in model_ft.encoder.fc3.parameters():
    k.requires_grad = True
for k in model_ft.age_group_head.parameters():
    k.requires_grad = True
for name,param in model_ft.named_parameters():
    if param.requires_grad:
        print(name)

Using cache found in /root/.cache/torch/hub/mit-han-lab_ProxylessNAS_master


encoder.fc2.weight
encoder.fc2.bias
encoder.fc3.weight
encoder.fc3.bias
age_group_head.weight
age_group_head.bias


## Frozen model training on OZON dataset only

In [32]:
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft = train_model(model_ft, criterion,optimizer_ft, exp_lr_scheduler,dataloaders_ozon,dataset_sizes_ozon,
                       num_epochs=20)

Epoch 0/19
----------
train Loss: 1.7420 Acc: 0.3419
val Loss: 1.6746 Acc: 0.3511

Epoch 1/19
----------
train Loss: 1.7132 Acc: 0.3477
val Loss: 1.6441 Acc: 0.3624

Epoch 2/19
----------
train Loss: 1.7143 Acc: 0.3489
val Loss: 1.6273 Acc: 0.3744

Epoch 3/19
----------
train Loss: 1.7083 Acc: 0.3503
val Loss: 1.6874 Acc: 0.3529

Epoch 4/19
----------
train Loss: 1.7030 Acc: 0.3542
val Loss: 1.6313 Acc: 0.3763

Epoch 5/19
----------
train Loss: 1.6092 Acc: 0.3834
val Loss: 1.5831 Acc: 0.3845

Epoch 6/19
----------
train Loss: 1.5960 Acc: 0.3907
val Loss: 1.5734 Acc: 0.3926

Epoch 7/19
----------
train Loss: 1.5956 Acc: 0.3888
val Loss: 1.5693 Acc: 0.3974

Epoch 8/19
----------
train Loss: 1.5933 Acc: 0.3896
val Loss: 1.5615 Acc: 0.3947

Epoch 9/19
----------
train Loss: 1.5888 Acc: 0.3934
val Loss: 1.5623 Acc: 0.4001

Epoch 10/19
----------
train Loss: 1.5864 Acc: 0.3942
val Loss: 1.5549 Acc: 0.4003

Epoch 11/19
----------
train Loss: 1.5860 Acc: 0.3946
val Loss: 1.5568 Acc: 0.3962

Ep

## Frozen model training on IMFDB-FER2013-OZON composite dataset

In [None]:
model_ft = torch.hub.load('mit-han-lab/ProxylessNAS', "proxyless_cpu" , pretrained=True)
model_ft.classifier = nn.Sequential(*list(model_ft.classifier.children())[:-3])
model_ft = MultiTaskModel(model_ft)

model_ft.fc2 = nn.Linear(in_features=1432, out_features=1400, bias=True)
model_ft = MultiTaskModel_grouped_age_head(model_ft)
model_ft.load_state_dict(torch.load('/storage_labs/3030/BelyakovM/Face_attributes/Saved_models/proxyless-cpu_gender_age_trained.pth',map_location=device))
model_ft.to(device)

#freezing all the parameters and batchnorms except parameters of age and emotion heads
for module in model_ft.encoder.modules():
    if isinstance(module,nn.modules.BatchNorm1d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm2d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm3d):
        module.eval()
for i in model_ft.parameters():
    i.requires_grad = False
for param in model_ft.encoder.fc2.parameters():
    param.requires_grad = True
for k in model_ft.encoder.fc3.parameters():
    k.requires_grad = True
for k in model_ft.age_group_head.parameters():
    k.requires_grad = True
for name,param in model_ft.named_parameters():
    if param.requires_grad:
        print(name)

In [37]:
criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft = train_model(model_ft, criterion,optimizer_ft, exp_lr_scheduler,dataloaders_IMFDB_FER2013_OZON,dataset_sizes_IMFDB_FER2013_OZON,
                       num_epochs=20)

Using cache found in /root/.cache/torch/hub/mit-han-lab_ProxylessNAS_master


encoder.fc2.weight
encoder.fc2.bias
encoder.fc3.weight
encoder.fc3.bias
age_group_head.weight
age_group_head.bias
Epoch 0/19
----------
train Loss: 1.7715 Acc: 0.2941
val Loss: 1.7154 Acc: 0.3175

Epoch 1/19
----------
train Loss: 1.7338 Acc: 0.3111
val Loss: 1.7017 Acc: 0.3221

Epoch 2/19
----------
train Loss: 1.7241 Acc: 0.3181
val Loss: 1.6963 Acc: 0.3167

Epoch 3/19
----------
train Loss: 1.7156 Acc: 0.3205
val Loss: 1.6756 Acc: 0.3379

Epoch 4/19
----------
train Loss: 1.7138 Acc: 0.3213
val Loss: 1.6936 Acc: 0.3248

Epoch 5/19
----------
train Loss: 1.7142 Acc: 0.3207
val Loss: 1.6814 Acc: 0.3303

Epoch 6/19
----------
train Loss: 1.7086 Acc: 0.3251
val Loss: 1.6858 Acc: 0.3295

Epoch 7/19
----------
train Loss: 1.6828 Acc: 0.3358
val Loss: 1.6552 Acc: 0.3472

Epoch 8/19
----------
train Loss: 1.6767 Acc: 0.3400
val Loss: 1.6569 Acc: 0.3435

Epoch 9/19
----------
train Loss: 1.6773 Acc: 0.3386
val Loss: 1.6570 Acc: 0.3451

Epoch 10/19
----------
train Loss: 1.6766 Acc: 0.3395
va

## Frozen model training on IMFDB-OZON composite dataset

In [39]:
model_ft = torch.hub.load('mit-han-lab/ProxylessNAS', "proxyless_cpu" , pretrained=True)
model_ft.classifier = nn.Sequential(*list(model_ft.classifier.children())[:-3])
model_ft = MultiTaskModel(model_ft)

model_ft.fc2 = nn.Linear(in_features=1432, out_features=1400, bias=True)
model_ft = MultiTaskModel_grouped_age_head(model_ft)
model_ft.load_state_dict(torch.load('/storage_labs/3030/BelyakovM/Face_attributes/Saved_models/proxyless-cpu_gender_age_trained.pth',map_location=device))
model_ft.to(device)

#freezing all the parameters and batchnorms except parameters of age and emotion heads
for module in model_ft.encoder.modules():
    if isinstance(module,nn.modules.BatchNorm1d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm2d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm3d):
        module.eval()
for i in model_ft.parameters():
    i.requires_grad = False
for param in model_ft.encoder.fc2.parameters():
    param.requires_grad = True
for k in model_ft.encoder.fc3.parameters():
    k.requires_grad = True
for k in model_ft.age_group_head.parameters():
    k.requires_grad = True
for name,param in model_ft.named_parameters():
    if param.requires_grad:
        print(name)

Using cache found in /root/.cache/torch/hub/mit-han-lab_ProxylessNAS_master


encoder.fc2.weight
encoder.fc2.bias
encoder.fc3.weight
encoder.fc3.bias
age_group_head.weight
age_group_head.bias
Epoch 0/19
----------
train Loss: 1.7524 Acc: 0.3142
val Loss: 1.6680 Acc: 0.3472

Epoch 1/19
----------
train Loss: 1.7003 Acc: 0.3406
val Loss: 1.6474 Acc: 0.3630

Epoch 2/19
----------
train Loss: 1.6860 Acc: 0.3468
val Loss: 1.6412 Acc: 0.3610

Epoch 3/19
----------
train Loss: 1.6792 Acc: 0.3496
val Loss: 1.6641 Acc: 0.3465

Epoch 4/19
----------
train Loss: 1.6744 Acc: 0.3524
val Loss: 1.6354 Acc: 0.3643

Epoch 5/19
----------
train Loss: 1.6670 Acc: 0.3548
val Loss: 1.6111 Acc: 0.3714

Epoch 6/19
----------
train Loss: 1.6668 Acc: 0.3549
val Loss: 1.6102 Acc: 0.3750

Epoch 7/19
----------
train Loss: 1.6316 Acc: 0.3723
val Loss: 1.5940 Acc: 0.3809

Epoch 8/19
----------
train Loss: 1.6287 Acc: 0.3727
val Loss: 1.5912 Acc: 0.3855

Epoch 9/19
----------
train Loss: 1.6281 Acc: 0.3724
val Loss: 1.5969 Acc: 0.3811

Epoch 10/19
----------
train Loss: 1.6251 Acc: 0.3762
va

In [40]:
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft = train_model(model_ft, criterion,optimizer_ft, exp_lr_scheduler,dataloaders_IMFDB_OZON,dataset_sizes_IMFDB_OZON,
                       num_epochs=20)

Epoch 0/19
----------
train Loss: 1.6227 Acc: 0.3765
val Loss: 1.5852 Acc: 0.3893

Epoch 1/19
----------
train Loss: 1.6218 Acc: 0.3781
val Loss: 1.5853 Acc: 0.3890

Epoch 2/19
----------
train Loss: 1.6203 Acc: 0.3772
val Loss: 1.5854 Acc: 0.3891

Epoch 3/19
----------
train Loss: 1.6191 Acc: 0.3788
val Loss: 1.5853 Acc: 0.3898

Epoch 4/19
----------
train Loss: 1.6207 Acc: 0.3759
val Loss: 1.5854 Acc: 0.3895

Epoch 5/19
----------
train Loss: 1.6196 Acc: 0.3773
val Loss: 1.5854 Acc: 0.3893

Epoch 6/19
----------
train Loss: 1.6198 Acc: 0.3753
val Loss: 1.5853 Acc: 0.3902

Epoch 7/19
----------
train Loss: 1.6180 Acc: 0.3785
val Loss: 1.5853 Acc: 0.3894

Epoch 8/19
----------
train Loss: 1.6182 Acc: 0.3794
val Loss: 1.5853 Acc: 0.3895

Epoch 9/19
----------
train Loss: 1.6164 Acc: 0.3791
val Loss: 1.5853 Acc: 0.3894

Epoch 10/19
----------
train Loss: 1.6202 Acc: 0.3776
val Loss: 1.5853 Acc: 0.3895

Epoch 11/19
----------
train Loss: 1.6201 Acc: 0.3761
val Loss: 1.5853 Acc: 0.3894

Ep