In [4]:
import re
import torchvision
from torchvision import datasets, models, transforms
from torchvision.datasets.folder import default_loader
import math
import pandas
import collections
from functools import partial
import albumentations as A
import albumentations.pytorch as Ap
import random
from os.path import join
import torch
import cv2
import torch.optim as optim
from torch.optim import lr_scheduler
from torch import nn
from torch.nn import functional as F
import copy
import numpy as np
from torch.utils import model_zoo
from torch.nn import Sequential, BatchNorm1d, BatchNorm2d, Dropout, Module, Linear
import yaml
import argparse
import os
import timm
import time
import sys
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## This notebook aims to find out how accurate efficientnet-b0 can converge on the age estiamtion problem and find out whether recognition pretrained weights boost the model convergence on age estiamtion problem in contrast to ImageNet weights

In [3]:
conf = argparse.ArgumentParser(description='traditional_training for face recognition.')

conf.add_argument("--backbone_type", type = str,default = 'EfficientNet',
                      help = "Mobilefacenets, Resnet.")
conf.add_argument("--backbone_conf_file", type = str ,default ='/storage_labs/3030/BelyakovM/Face_attributes/Code/EfficientNet_B0_face_recognizer/FaceX-Zoo/training_mode/backbone_conf.yaml', 
                      help = "the path of backbone_conf.yaml.")
conf.add_argument("--head_type", type = str ,default = 'AdaM-Softmax', 
                      help = "mv-softmax, arcface, npc-face.")
conf.add_argument("--head_conf_file", type = str ,default = '/storage_labs/3030/BelyakovM/Face_attributes/Code/EfficientNet_B0_face_recognizer/FaceX-Zoo/training_mode/head_conf.yaml', 
                      help = "the path of head_conf.yaml.")
    
args = conf.parse_args([])

In [4]:
def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False):
    """Loads pretrained weights from weights path or download using url.
    Args:
        model (Module): The whole model of efficientnet.
        model_name (str): Model name of efficientnet.
        weights_path (None or str):
            str: path to pretrained weights file on the local disk.
            None: use pretrained weights downloaded from the Internet.
        load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
        advprop (bool): Whether to load pretrained weights
                        trained with advprop (valid when weights_path is None).
    """
    if isinstance(weights_path, str):
        state_dict = torch.load(weights_path)['state_dict']
        for key_name in list(state_dict.keys()):
            new_key = key_name.replace('backbone.','')
            state_dict[new_key] = state_dict.pop(key_name)
    else:
        # AutoAugment or Advprop (different preprocessing)
        url_map_ = url_map_advprop if advprop else url_map
        state_dict = model_zoo.load_url(url_map_[model_name])

    if load_fc:
        state_dict.pop('head.weight')
        ret = model.load_state_dict(state_dict, strict=False)
        assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
    else:
        state_dict.pop('backbone._fc.weight')
        state_dict.pop('backbone._fc.bias')
        ret = model.load_state_dict(state_dict, strict=False)
        assert set(ret.missing_keys) == set(
            ['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
    assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys)

    print('Loaded pretrained weights for {}'.format(model_name))

In [5]:
sys.path.append('/storage_labs/3030/BelyakovM/Face_attributes/Code/EfficientNet_B0_face_recognizer/FaceX-Zoo')
from backbone.backbone_def import BackboneFactory
from head.head_def import HeadFactory
class FaceModel(torch.nn.Module):

    def __init__(self, backbone_factory, head_factory):
        super(FaceModel, self).__init__()
        #Age groups: ['1-2', '3-6', '7-12', '13-17', '18-22', '23-26', '27-33', '34-44', '45-59', '60-90']
        self.idx_tensor = torch.from_numpy(np.array([1.5, 4.5, 9.5, 15, 20, 24.5, 30, 39, 52, 75])).to(device)
        self.relu = nn.ReLU()
        self.Softmax = nn.Softmax(1)
        self.backbone = backbone_factory.get_backbone()
        self.head = head_factory.get_head()
        self.age_group_head = Sequential(nn.BatchNorm2d(1280),nn.Flatten(),nn.Linear(in_features=62720, out_features=1000, bias=True),self.relu ,nn.Linear(in_features=1000, out_features=30, bias=True),self.relu ,nn.Linear(in_features=30, out_features=10, bias=True))

    def forward(self, data):
        grouped_age = self.age_group_head(self.backbone.extract_features(data))
        regression_age = torch.sum(self.Softmax(grouped_age) * self.idx_tensor, axis=1)
        return grouped_age,regression_age

In [4]:

    
backbone_factory = BackboneFactory(args.backbone_type, args.backbone_conf_file)   
head_factory = HeadFactory(args.head_type, args.head_conf_file)
efficientnet_b0_pretrained_frozen_age_head2feats = FaceModel(backbone_factory, head_factory)
load_pretrained_weights(efficientnet_b0_pretrained_frozen_age_head2feats.backbone,args.backbone_type,weights_path ='/storage_labs/3030/BelyakovM/Face_attributes/Code/EfficientNet_B0_face_recognizer/efficientnet_facerecognition_weights.pt',load_fc=True )
efficientnet_b0_pretrained_frozen_age_head2feats.to(device)

  backbone_conf = yaml.load(f)
  head_conf = yaml.load(f)


backbone param:
{'width': 1.0, 'depth': 1.0, 'image_size': 112, 'drop_ratio': 0.2, 'out_h': 7, 'out_w': 7, 'feat_dim': 512}
head param:
{'feat_dim': 512, 'num_class': 72778, 'scale': 32, 'lamda': 70.0}
Loaded pretrained weights for EfficientNet


FaceModel(
  (relu): ReLU()
  (Softmax): Softmax(dim=1)
  (backbone): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 32, kernel_size=(3, 3), stride=(1, 1), bias=False
      (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          32, 8, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          8, 32, kernel_size=(1, 1), stride=(1, 1)
          (stat

In [5]:
for module in efficientnet_b0_pretrained_frozen_age_head2feats.backbone.modules():
    if isinstance(module,nn.modules.BatchNorm1d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm2d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm3d):
        module.eval()
for i in efficientnet_b0_pretrained_frozen_age_head2feats.parameters():
    i.requires_grad = False
for param in efficientnet_b0_pretrained_frozen_age_head2feats.age_group_head.parameters():
    param.requires_grad = True

for name,param in efficientnet_b0_pretrained_frozen_age_head2feats.named_parameters():
    if param.requires_grad:
        print(name)

age_group_head.0.weight
age_group_head.0.bias
age_group_head.2.weight
age_group_head.2.bias
age_group_head.4.weight
age_group_head.4.bias
age_group_head.6.weight
age_group_head.6.bias


In [5]:
#Custom Dataset and Dataloader for age training  with Albumentations
class MyDataset_age(torch.utils.data.Dataset):
    def __init__(self,root,phase,annotation_name,classes,transform = None,loader = default_loader):

        self.group_dict = {0:list(range(1,3)), 1:list(range(3,7)), 2:list(range(7,13)), 3:list(range(13,18)), 4:list(range(18,23)), 5:list(range(23,27)), 6:list(range(27,34)), 7:list(range(34,45)), 8:list(range(45,60)), 9:list(range(60,91))}
        self.phase = phase
        self.classes = classes
        self.attribute_frame = pandas.read_csv(join(root,annotation_name))
        self.root = root
        self.loader = loader
        self.transform = transform

    def __getitem__(self, index):
        group_label = 4
        img_path , target = join(self.root, self.attribute_frame.iloc[index, 0]), int(self.attribute_frame.iloc[index, 1])
        for key in self.group_dict:
            if int(target) in self.group_dict[key]:
                group_label = key
                break
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transform is not None:
            img = self.transform(image = img)['image']
    
        
        return img, group_label,target
 
    def __len__(self):
        return len(self.attribute_frame)
    
def dataloader_age(MyDataset,root,item_prob_filename,batch_size,shuffle , num_workers):
    item_prob_file =open(root + "/" + item_prob_filename, "r")
    item_prob_list = item_prob_file.readlines()
    item_prob_list = [float(item_prob_list[i]) for i in range(len(item_prob_list))]
    t = item_prob_list.pop(0)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(item_prob_list, len(item_prob_list))
    return torch.utils.data.DataLoader(MyDataset, batch_size=batch_size, num_workers=num_workers,shuffle=shuffle,sampler=sampler)

In [6]:
data_transforms_A = {
    'train': A.Compose([
        A.Resize(112, 112),
        A.RandomResizedCrop(height=112,width=112,scale=(0.5, 1.0)),

        A.core.composition.OneOf ([ 
            A.Blur(p=0.5),#Размытие входного изображения с помощью ядра случайного размера. 
            A.GaussianBlur(p=0.5),#Размытие входного изображения с помощью фильтра Гаусса со случайным размером ядра. 
            A.GaussNoise(p=0.5),#Примените гауссовский шум к входному изображению. 
            A.ISONoise(p=0.5),#Примените шум сенсора камеры. 
            A.MedianBlur(p=0.5),#Размытие входного изображения с помощью медианного фильтра со случайным линейным размером апертуры.
            A.MotionBlur(p=0.5),#Примените размытие движения к входному изображению, используя ядро случайного размера. 
            A.CLAHE(p=0.5),#Примените коррекцию адаптивной гистограммы с ограничением контраста к входному изображению.
            A.Equalize(p=0.5),#Выровняйте гистограмму изображения. 
        ], p = 1),

        A.core.composition.OneOf ([ 
            A.ChannelDropout(p=0.5),#Случайно отбросьте каналы во входном изображении.
            A.ChannelShuffle(p=0.5),#Произвольно переставьте каналы входного изображения RGB.
            A.InvertImg(p=0.5),#Инвертируйте входное изображение, вычитая значения пикселей из 255
            A.Solarize(p=0.5),#Инвертировать все значения пикселей выше порога. 
            A.ToGray(p=0.5),#Преобразуйте входное изображение RGB в оттенки серого.
            A.HueSaturationValue(p=0.5),#Произвольно изменяйте оттенок, насыщенность и значение входного изображения. 
            A.RandomBrightness(p=0.5),#Произвольно изменяйте яркость входного изображения. 
            A.RandomBrightnessContrast(p=0.5),#Произвольно изменяйте яркость и контраст входного изображения.
            A.RandomContrast(p=0.5)#Произвольно изменяйте контраст входного изображения.
        ], p = 1),
        A.core.composition.OneOf ([ 
           A.Downscale(scale_min=0.2, scale_max=0.2,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
           A.Downscale(scale_min=0.3, scale_max=0.3,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
           A.Downscale(scale_min=0.4, scale_max=0.4,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
           A.Downscale(scale_min=0.5, scale_max=0.5,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
           A.Downscale(scale_min=0.6, scale_max=0.6,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
           A.Downscale(scale_min=0.7, scale_max=0.7,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
           A.Downscale(scale_min=0.8, scale_max=0.8,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
           A.Downscale(scale_min=0.9, scale_max=0.9,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
           A.Downscale(scale_min=0.99, scale_max=0.99,p=0.5),#Уменьшает качество изображения за счет уменьшения и обратного увеличения. 
        ], p = 1),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        Ap.transforms.ToTensorV2()
        ]),
    'val': A.Compose([
        A.Resize(112, 112),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        Ap.transforms.ToTensorV2()
        ]),
}

data_dir = '/storage_labs/3030/BelyakovM/Face_attributes/ds/db_GordeevN/Over_dataset'
annotations = {'train':'anataishon_train.csv','val':'anataishon_val.csv'}
item_probabilities = {'train':'anataishon_train_rasp.txt','val':'anataishon_val_rasp.txt'}
image_datasets = {x: MyDataset_age(data_dir,x,annotations[x],list(range(10)),
                                          data_transforms_A[x])
                  for x in ['train', 'val']}
dataloaders = {x: dataloader_age(image_datasets[x],data_dir,item_probabilities[x], batch_size=60,
                                             shuffle=False, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: image_datasets[x].__len__() for x in ['train', 'val']}
class_names = image_datasets['train'].classes




In [7]:
#Train loop for age training 
def train_model(model, classification_criterion,regression_criterion, optimizer, scheduler, losses_ratio = None,num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 10000.0
    

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            
            model.eval()   # Set model to evaluate mode

            running_class_loss = 0.0
            running_regress_loss = 0.0
            

            # Iterate over data.
            for inputs, class_labels,regress_labels in dataloaders[phase]:
                class_labels = class_labels.type(torch.FloatTensor)
                regress_labels = regress_labels.type(torch.FloatTensor)
                inputs = inputs.to(device)
                class_labels = class_labels.to(device)
                regress_labels = regress_labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    age_group,age_digit = model(inputs)[1]
                    batch_regression_loss = regression_criterion(age_digit, regress_labels)
                    batch_classification_loss = classification_criterion(age_group, class_labels.long())
                    
                    
                    if (losses_ratio == None):
                        if (batch_regression_loss > batch_classification_loss):
                            alpha = batch_regression_loss / batch_classification_loss
                            loss = alpha * batch_classification_loss + batch_regression_loss
                        else:
                            alpha = batch_classification_loss / batch_regression_loss
                            loss = batch_classification_loss +  alpha * batch_regression_loss
                    else:
                        loss = losses_ratio[0] * batch_regression_loss + losses_ratio[1] * batch_classification_loss
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_class_loss += batch_classification_loss.item() * inputs.size(0)
                running_regress_loss += batch_regression_loss.item() * inputs.size(0)
                
            if phase == 'train':
                scheduler.step()

            epoch_class_loss = running_class_loss / dataset_sizes[phase]
            epoch_regress_loss = running_regress_loss/ dataset_sizes[phase]
            avrg_epoch_loss = (epoch_class_loss+epoch_regress_loss)/2

            
            print('{} Classificatoin_Loss: {:.4f} Regression_Loss: {:.4f}'.format(
                phase, epoch_class_loss, epoch_regress_loss))

            # deep copy the model
            if phase == 'val' and avrg_epoch_loss < best_loss:
                best_loss = avrg_epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val loss: {:4f}'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

## In the section below we are going to train efficient-b0 model with recognition weights with frozen backbone

In [9]:
class_criterion = nn.CrossEntropyLoss()
regress_criterion = torch.nn.L1Loss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(efficientnet_b0_pretrained_frozen_age_head2feats.parameters(), lr=0.0001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


efficientnet_b0_pretrained_frozen_age_head2feats = train_model(efficientnet_b0_pretrained_frozen_age_head2feats, class_criterion,regress_criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)

Epoch 0/19
----------
train Classificatoin_Loss: 2.4769 Regression_Loss: 22.5765
val Classificatoin_Loss: 2.5571 Regression_Loss: 22.3863

Epoch 1/19
----------
train Classificatoin_Loss: 2.4636 Regression_Loss: 22.3584
val Classificatoin_Loss: 2.3597 Regression_Loss: 22.4925

Epoch 2/19
----------
train Classificatoin_Loss: 2.1732 Regression_Loss: 22.3991
val Classificatoin_Loss: 2.0201 Regression_Loss: 21.8635

Epoch 3/19
----------
train Classificatoin_Loss: 2.0259 Regression_Loss: 22.0205
val Classificatoin_Loss: 1.9702 Regression_Loss: 21.5281

Epoch 4/19
----------
train Classificatoin_Loss: 2.0099 Regression_Loss: 21.7250
val Classificatoin_Loss: 1.9717 Regression_Loss: 20.9239

Epoch 5/19
----------
train Classificatoin_Loss: 2.0117 Regression_Loss: 21.6252
val Classificatoin_Loss: 2.0489 Regression_Loss: 22.1098

Epoch 6/19
----------
train Classificatoin_Loss: 2.0106 Regression_Loss: 21.4589
val Classificatoin_Loss: 1.9625 Regression_Loss: 21.2065

Epoch 7/19
----------
train

## Age-head training of efficent-b0 hasn't yielded considerable accuracy. Merely 20 loss converece 

## Further let's train recognition efficient-b0 entirely

In [9]:
backbone_factory = BackboneFactory(args.backbone_type, args.backbone_conf_file)   
head_factory = HeadFactory(args.head_type, args.head_conf_file)
efficientnet_b0_pretrained_entirely_age_head2feats = FaceModel(backbone_factory, head_factory)
load_pretrained_weights(efficientnet_b0_pretrained_entirely_age_head2feats.backbone,args.backbone_type,weights_path ='/storage_labs/3030/BelyakovM/Face_attributes/Code/EfficientNet_B0_face_recognizer/efficientnet_facerecognition_weights.pt',load_fc=True )
efficientnet_b0_pretrained_entirely_age_head2feats.to(device)

  backbone_conf = yaml.load(f)
  head_conf = yaml.load(f)


backbone param:
{'width': 1.0, 'depth': 1.0, 'image_size': 112, 'drop_ratio': 0.2, 'out_h': 7, 'out_w': 7, 'feat_dim': 512}
head param:
{'feat_dim': 512, 'num_class': 72778, 'scale': 32, 'lamda': 70.0}
Loaded pretrained weights for EfficientNet


FaceModel(
  (relu): ReLU()
  (Softmax): Softmax(dim=1)
  (backbone): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 32, kernel_size=(3, 3), stride=(1, 1), bias=False
      (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          32, 8, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          8, 32, kernel_size=(1, 1), stride=(1, 1)
          (stat

In [10]:
class_criterion = nn.CrossEntropyLoss()
regress_criterion = torch.nn.L1Loss()

optimizer_ft = optim.SGD(efficientnet_b0_pretrained_entirely_age_head2feats.parameters(), lr=0.0001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


efficientnet_b0_pretrained_entirely_age_head2feats = train_model(efficientnet_b0_pretrained_entirely_age_head2feats, class_criterion,regress_criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)

Epoch 0/19
----------
train Classificatoin_Loss: 2.6525 Regression_Loss: 22.2847
val Classificatoin_Loss: 2.2526 Regression_Loss: 21.0732

Epoch 1/19
----------
train Classificatoin_Loss: 1.9758 Regression_Loss: 13.6190
val Classificatoin_Loss: 1.8279 Regression_Loss: 9.0696

Epoch 2/19
----------
train Classificatoin_Loss: 1.6739 Regression_Loss: 10.1495
val Classificatoin_Loss: 1.5256 Regression_Loss: 9.2586

Epoch 3/19
----------
train Classificatoin_Loss: 1.6127 Regression_Loss: 9.2943
val Classificatoin_Loss: 1.6316 Regression_Loss: 7.4620

Epoch 4/19
----------
train Classificatoin_Loss: 1.5762 Regression_Loss: 9.0202
val Classificatoin_Loss: 1.5283 Regression_Loss: 7.3777

Epoch 5/19
----------
train Classificatoin_Loss: 1.4965 Regression_Loss: 8.5143
val Classificatoin_Loss: 1.4622 Regression_Loss: 6.9260

Epoch 6/19
----------
train Classificatoin_Loss: 1.5270 Regression_Loss: 8.1669
val Classificatoin_Loss: 1.3838 Regression_Loss: 7.1983

Epoch 7/19
----------
train Classific

In [11]:
efficientnet_b0_pretrained_entirely_age_head2feats = train_model(efficientnet_b0_pretrained_entirely_age_head2feats, class_criterion,regress_criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=15)

Epoch 0/14
----------
train Classificatoin_Loss: 1.4744 Regression_Loss: 7.1828
val Classificatoin_Loss: 1.4226 Regression_Loss: 6.4838

Epoch 1/14
----------
train Classificatoin_Loss: 1.4786 Regression_Loss: 7.1702
val Classificatoin_Loss: 1.4321 Regression_Loss: 6.5857

Epoch 2/14
----------
train Classificatoin_Loss: 1.4663 Regression_Loss: 7.1440
val Classificatoin_Loss: 1.4612 Regression_Loss: 6.5708

Epoch 3/14
----------
train Classificatoin_Loss: 1.4530 Regression_Loss: 7.1184
val Classificatoin_Loss: 1.4395 Regression_Loss: 6.6653

Epoch 4/14
----------
train Classificatoin_Loss: 1.4494 Regression_Loss: 7.1719
val Classificatoin_Loss: 1.3832 Regression_Loss: 6.5508

Epoch 5/14
----------
train Classificatoin_Loss: 1.4631 Regression_Loss: 7.1412
val Classificatoin_Loss: 1.4231 Regression_Loss: 6.4754

Epoch 6/14
----------
train Classificatoin_Loss: 1.4626 Regression_Loss: 7.1027
val Classificatoin_Loss: 1.4327 Regression_Loss: 6.5219

Epoch 7/14
----------
train Classificatoi

## Not bad results. Mean age error is 6.5 years


## Let's train the same model versions(frozen backbone,trained entirely), but with ImageNet weights

In [8]:
class MultiTaskModel_aged_Tensor(nn.Module):
    def __init__(self,model_backbone):
        super(MultiTaskModel_aged_Tensor,self).__init__()
        self.relu = nn.ReLU()
        self.Softmax = nn.Softmax(1)
        self.encoder = model_backbone
        self.gender_head = Sequential(nn.Linear(in_features=1280, out_features=1000, bias=True),self.relu ,nn.Linear(in_features=1000, out_features=30, bias=True),self.relu,nn.Linear(30,2))   

        self.age_group_head = Sequential(nn.Linear(in_features=1280, out_features=1400, bias=True),self.relu,nn.Linear(in_features=1400, out_features=10, bias=True))
        self.expression_head =  Sequential(nn.Linear(in_features=1280, out_features=1400, bias=True),self.relu,nn.Linear(in_features=1400, out_features=7, bias=True))
        #Age groups: ['1-2', '3-6', '7-12', '13-17', '18-22', '23-26', '27-33', '34-44', '45-59', '60-90']
        self.idx_tensor = torch.from_numpy(np.array([1.5, 4.5, 9.5, 15, 20, 24.5, 30, 39, 52, 75])).to(device)
    def forward(self,x):
        features = self.encoder(x)
        gender = self.gender_head(self.relu(features))
        expression = self.expression_head(self.relu(features))
        
        grouped_age = self.age_group_head(self.relu(features))
        regression_age = torch.sum(self.Softmax(grouped_age) * self.idx_tensor, axis=1)
        return [gender, (grouped_age,regression_age),  expression]
    
    
efficientb0_imagnet_frozen = timm.create_model('efficientnet_b0', pretrained=True)
efficientb0_imagnet_frozen.classifier = nn.Sequential(*list(efficientb0_imagnet_frozen.classifier.children())[:-3])
efficientb0_imagnet_frozen = MultiTaskModel_aged_Tensor(efficientb0_imagnet_frozen)

efficientb0_imagnet_frozen = efficientb0_imagnet_frozen.to(device)

for module in efficientb0_imagnet_frozen.encoder.modules():
    if isinstance(module,nn.modules.BatchNorm1d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm2d):
        module.eval()
    if isinstance(module,nn.modules.BatchNorm3d):
        module.eval()
for i in efficientb0_imagnet_frozen.parameters():
    i.requires_grad = False
for param in efficientb0_imagnet_frozen.age_group_head.parameters():
    param.requires_grad = True

for name,param in efficientb0_imagnet_frozen.named_parameters():
    if param.requires_grad:
        print(name)

age_group_head.0.weight
age_group_head.0.bias
age_group_head.2.weight
age_group_head.2.bias


In [9]:
class_criterion = nn.CrossEntropyLoss()
regress_criterion = torch.nn.L1Loss()

optimizer_ft = optim.SGD(efficientb0_imagnet_frozen.parameters(), lr=0.0001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

efficientb0_imagnet_frozen = train_model(efficientb0_imagnet_frozen, class_criterion,regress_criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)

Epoch 0/19
----------
train Classificatoin_Loss: 2.0373 Regression_Loss: 18.3849
val Classificatoin_Loss: 1.9782 Regression_Loss: 13.1766

Epoch 1/19
----------
train Classificatoin_Loss: 2.0751 Regression_Loss: 16.4312
val Classificatoin_Loss: 1.9432 Regression_Loss: 12.4817

Epoch 2/19
----------
train Classificatoin_Loss: 2.0469 Regression_Loss: 15.9618
val Classificatoin_Loss: 1.9085 Regression_Loss: 12.1514

Epoch 3/19
----------
train Classificatoin_Loss: 2.0275 Regression_Loss: 15.6369
val Classificatoin_Loss: 1.9052 Regression_Loss: 12.2448

Epoch 4/19
----------
train Classificatoin_Loss: 2.0061 Regression_Loss: 15.5745
val Classificatoin_Loss: 1.9262 Regression_Loss: 11.9246

Epoch 5/19
----------
train Classificatoin_Loss: 1.9970 Regression_Loss: 15.3033
val Classificatoin_Loss: 1.8413 Regression_Loss: 11.7910

Epoch 6/19
----------
train Classificatoin_Loss: 1.9967 Regression_Loss: 15.1258
val Classificatoin_Loss: 1.8690 Regression_Loss: 11.9358

Epoch 7/19
----------
train

In [10]:
efficientb0_imagnet_entirely = timm.create_model('efficientnet_b0', pretrained=True)
efficientb0_imagnet_entirely.classifier = nn.Sequential(*list(efficientb0_imagnet_entirely.classifier.children())[:-3])
efficientb0_imagnet_entirely = MultiTaskModel_aged_Tensor(efficientb0_imagnet_entirely)

efficientb0_imagnet_entirely = efficientb0_imagnet_entirely.to(device)

In [11]:
class_criterion = nn.CrossEntropyLoss()
regress_criterion = torch.nn.L1Loss()

optimizer_ft = optim.SGD(efficientb0_imagnet_entirely.parameters(), lr=0.0001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

efficientb0_imagnet_entirely = train_model(efficientb0_imagnet_entirely, class_criterion,regress_criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)

Epoch 0/19
----------
train Classificatoin_Loss: 1.7724 Regression_Loss: 14.8265
val Classificatoin_Loss: 1.5822 Regression_Loss: 11.0553

Epoch 1/19
----------
train Classificatoin_Loss: 1.6207 Regression_Loss: 11.0434
val Classificatoin_Loss: 1.5522 Regression_Loss: 10.3698

Epoch 2/19
----------
train Classificatoin_Loss: 1.5018 Regression_Loss: 9.9029
val Classificatoin_Loss: 1.3788 Regression_Loss: 8.5092

Epoch 3/19
----------
train Classificatoin_Loss: 1.4642 Regression_Loss: 9.4897
val Classificatoin_Loss: 1.4420 Regression_Loss: 10.0156

Epoch 4/19
----------
train Classificatoin_Loss: 1.4139 Regression_Loss: 9.1387
val Classificatoin_Loss: 1.3156 Regression_Loss: 8.2566

Epoch 5/19
----------
train Classificatoin_Loss: 1.3831 Regression_Loss: 8.8564
val Classificatoin_Loss: 1.2902 Regression_Loss: 8.3827

Epoch 6/19
----------
train Classificatoin_Loss: 1.3447 Regression_Loss: 8.6463
val Classificatoin_Loss: 1.2441 Regression_Loss: 7.7302

Epoch 7/19
----------
train Classifi

In [13]:
optimizer_ft = optim.SGD(efficientb0_imagnet_entirely.parameters(), lr=0.0001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

efficientb0_imagnet_entirely = train_model(efficientb0_imagnet_entirely, class_criterion,regress_criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=15)

Epoch 0/14
----------
train Classificatoin_Loss: 1.3159 Regression_Loss: 8.1332
val Classificatoin_Loss: 1.2150 Regression_Loss: 7.3968

Epoch 1/14
----------
train Classificatoin_Loss: 1.3203 Regression_Loss: 8.2050
val Classificatoin_Loss: 1.2514 Regression_Loss: 7.6909

Epoch 2/14
----------
train Classificatoin_Loss: 1.2774 Regression_Loss: 7.9809
val Classificatoin_Loss: 1.2209 Regression_Loss: 7.6197

Epoch 3/14
----------
train Classificatoin_Loss: 1.2583 Regression_Loss: 7.8078
val Classificatoin_Loss: 1.1754 Regression_Loss: 7.5947

Epoch 4/14
----------
train Classificatoin_Loss: 1.2500 Regression_Loss: 7.8101
val Classificatoin_Loss: 1.2142 Regression_Loss: 7.4053

Epoch 5/14
----------
train Classificatoin_Loss: 1.2541 Regression_Loss: 7.7957
val Classificatoin_Loss: 1.1909 Regression_Loss: 7.3045

Epoch 6/14
----------
train Classificatoin_Loss: 1.2317 Regression_Loss: 7.6584
val Classificatoin_Loss: 1.1730 Regression_Loss: 7.2762

Epoch 7/14
----------
train Classificatoi

## As can be concluded, ImageNet weights brought significant L1loss when it comes to frozen version

## As for entirely trained version, seems like face recognition weights boost expression training more than ImageNet ones 