# Age, Gender and Race Classification

In [None]:
!pip install poutyne
!pip install torchsummary

In [1]:
import os
import tarfile
import urllib.request
import re

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torchvision.models as models
from torch.utils import model_zoo
from torch.utils.data import Subset, DataLoader, Dataset, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid

from poutyne import set_seeds, Model, ModelCheckpoint, CSVLogger, Experiment, BatchMetric
import tarfile
from torchsummary import summary

In [2]:
set_seeds(42)

We download the dataset.

In [3]:
base_path = './datasets/utk_faces'
tar_file_name = 'utk_face.tar.gz'
base_image_path = os.path.join(base_path, 'images')
download_url = 'https://drive.google.com/uc?export=download&id=0BxYys69jI14kYVM3aVhKS1VhRUk&alt=media&confirm=t'

In [4]:
def download_and_extract_dataset(base_path, extract_path, file_name, url):
    os.makedirs(base_path, exist_ok=True)
    os.makedirs(extract_path, exist_ok=True)
    tar_filename = os.path.join(base_path, file_name)

    print(f'Downloading dataset to {tar_filename} ...')
    urllib.request.urlretrieve(url, tar_filename)
    print('Extracting archive...')
    with tarfile.open(tar_filename, "r") as tar:
        tar.extractall(extract_path)
    
download_and_extract_dataset(base_path, base_image_path, tar_file_name, download_url)    

Downloading dataset to ./datasets/utk_faces\utk_face.tar.gz ...
Extracting archive...


In [5]:
# Training constants
cuda_device = 0
device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu")

num_classes = 7
batch_size = 32
learning_rate = 0.0001
weight_decay = 0.01
n_epoch = 70
image_size = 48
loss_weights={'age': 1/24, 'gender': 1/2, 'race': 1/5}

In [None]:
class FacesImageFolder(ImageFolder):
    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        """
        path, target = self.samples[index]
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
            
        filename = os.path.basename(path)
        age, gender, race, _ = filename.split('_')
            
        target = [int(int(age) / 5), int(gender), int(race)]

        return sample, target
    
class TransformDataset(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __getitem__(self, index):
        item, label = self.dataset[index]
        if self.transform is None:
              return item, label
        
        return self.transform(item), label  

    def __len__(self):
        return len(self.dataset)    


In [None]:
augment_transform = transforms.Compose(
    [                
        transforms.Resize((image_size, image_size)),
        transforms.RandomAffine(degrees = 0, translate = (0.1, 0.1)),
        #transforms.RandomAutocontrast(),
        transforms.RandomHorizontalFlip(),
        #transforms.RandomVerticalFlip(),
        #transforms.RandomPerspective(),    
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

normalize_transform = transforms.Compose(
    [        
        transforms.Resize((image_size, image_size)),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

re_valid_file_name = re.compile('\d+_\d_\d_.*')
images_dataset = FacesImageFolder(
    base_image_path,
    transform=transforms.ToTensor(),
    is_valid_file=lambda path: not os.path.split(path)[1].startswith('.') and re_valid_file_name.match(os.path.basename(path)),
)


In [None]:
print(len(images_dataset))

In [None]:
dataset_lengths = [int(len(images_dataset) * 0.8), int(len(images_dataset) * 0.1), 0]
dataset_lengths[2] = len(images_dataset) - dataset_lengths[0] - dataset_lengths[1]
train_dataset, valid_dataset, test_dataset = random_split(images_dataset, dataset_lengths)

train_dataset = TransformDataset(train_dataset, augment_transform)
valid_dataset = TransformDataset(valid_dataset, normalize_transform)
test_dataset = TransformDataset(test_dataset, normalize_transform)

In [None]:
print(len(train_dataset))
print(len(valid_dataset))
print(len(test_dataset))

Creation of the PyTorch's dataloader to split our data into batches.

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=0)

In [None]:
def show_batch(dataset):
    fig, ax = plt.subplots(4, 4, figsize=(12, 12))
    for i, (image, _) in enumerate(dataset):
        ax[i % 4, i // 4].imshow(image.permute(1, 2, 0))
        if i >= 15:
            break
            
show_batch(images_dataset)          

In [None]:
inputs, labels = next(iter(train_loader))   
print(inputs.shape)
print(labels)

In [None]:
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        
        self.maxpool = nn.MaxPool2d(2, 2)
        
        self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding='same')    
        self.conv1_2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding='same')
        self.bn_conv1 = nn.BatchNorm2d(64)
        self.dropout_conv1 = nn.Dropout2d(0.25)
        
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding='same')
        self.bn_conv2 = nn.BatchNorm2d(128)
        self.dropout_conv2 = nn.Dropout2d(0.25)        
        
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=512, kernel_size=3, padding='same')
        self.bn_conv3 = nn.BatchNorm2d(512)
        self.dropout_conv3 = nn.Dropout2d(0.25)        
        
        self.conv4 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding='same')
        self.bn_conv4 = nn.BatchNorm2d(512)    
        self.dropout_conv4 = nn.Dropout2d(0.25) 
        
        #flatten_length =  c * image_size * image_size        
        flatten_length = 4608
        self.fc1_age = nn.Linear(flatten_length, 256)
        self.bn_fc1_age = nn.BatchNorm1d(256)
        self.dropout_fc1_age = nn.Dropout(0.25)
        
        self.fc1_gender = nn.Linear(flatten_length, 256)
        self.bn_fc1_gender = nn.BatchNorm1d(256)
        self.dropout_fc1_gender = nn.Dropout(0.25)
        
        self.fc1_race = nn.Linear(flatten_length, 256)
        self.bn_fc1_race = nn.BatchNorm1d(256)
        self.dropout_fc1_race = nn.Dropout(0.25)
        
        self.fc2_age = nn.Linear(256, 512)
        self.bn_fc2_age = nn.BatchNorm1d(512)
        self.dropout_fc2_age = nn.Dropout(0.25)    
        
        self.fc2_gender = nn.Linear(256, 512)
        self.bn_fc2_gender = nn.BatchNorm1d(512)
        self.dropout_fc2_gender = nn.Dropout(0.25)
        
        self.fc2_race = nn.Linear(256, 512)
        self.bn_fc2_race = nn.BatchNorm1d(512)
        self.dropout_fc2_race = nn.Dropout(0.25)
        
        self.fc3_out_age = nn.Linear(512, 24)
        self.fc3_out_gender = nn.Linear(512, 2)
        self.fc3_out_race = nn.Linear(512, 5)
        
    def forward(self, input):    
        output = F.relu(self.conv1_1(input))      
        output = F.relu(self.conv1_2(output))
        output = self.bn_conv1(output)    
        output = self.maxpool(output)   
        output = self.dropout_conv1(output)
        
        output = F.relu(self.conv2(output))
        output = self.bn_conv2(output)    
        output = self.maxpool(output)   
        output = self.dropout_conv2(output)
        
        output = F.relu(self.conv3(output))
        output = self.bn_conv3(output)        
        output = self.maxpool(output)    
        output = self.dropout_conv3(output)
        
        output = F.relu(self.conv4(output))
        output = self.bn_conv4(output)
        output = self.maxpool(output)            
        output = self.dropout_conv4(output)    
        
        # Flattening process
        b, c, h, w = output.size() # batch_size, channels, height, width
        output = output.view(-1, c * h * w)
        
        output_age = self.fc1_age(output)
        output_age = self.bn_fc1_age(output_age)
        output_age = self.dropout_fc1_age(output_age)
        
        output_gender = self.fc1_gender(output)
        output_gender = self.bn_fc1_gender(output_gender)
        output_gender = self.dropout_fc1_gender(output_gender)   
        
        output_race = self.fc1_race(output)
        output_race = self.bn_fc1_race(output_race)
        output_reace = self.dropout_fc1_race(output_race)   
        
        output_age = self.fc2_age(output_age)
        output_age = self.bn_fc2_age(output_age)
        output_age = self.dropout_fc2_age(output_age)        
        output_age = self.fc3_out_age(output_age)
        
        output_gender = self.fc2_gender(output_gender)
        output_gender = self.bn_fc2_gender(output_gender)
        output_gender = self.dropout_fc2_gender(output_gender)        
        output_gender = self.fc3_out_gender(output_gender)
        
        output_race = self.fc2_race(output_race)
        output_race = self.bn_fc2_race(output_race)
        output_race = self.dropout_fc2_race(output_race)        
        output_race = self.fc3_out_race(output_race)
        
        return output_age, output_gender, output_race
    
cnn = CNN().to(device)
print(summary(cnn, input_size=(3, image_size, image_size)))    

We define callbacks for saving last epoch, best epoch and logging the results.

In [None]:
class AgeGenderRaceLoss(nn.Module):
    def __init__(self, weights=None):
        super(AgeGenderRaceLoss, self).__init__()
        if weights is None:
            weights = {'age': 1/3, 'gender': 1/3, 'race': 1/3}
            
        self.weights = weights
 
    def forward(self, inputs, targets, smooth=1):        
        cross_entropy_loss = nn.CrossEntropyLoss()
                
        [age_inputs, gender_inputs, race_inputs] = inputs
        [age_targets, gender_targets, race_targets] = targets    

        age_loss = cross_entropy_loss(age_inputs, age_targets)
        gender_loss = cross_entropy_loss(gender_inputs, gender_targets)
        race_loss = cross_entropy_loss(race_inputs, race_targets)    

        weighted_loss = age_loss * self.weights['age'] +  gender_loss *  self.weights['gender'] + race_loss * self.weights['race']        
        return age_loss
    
class AgeGenderRaceAccuracy(BatchMetric):
    def __init__(self, *,  weights=None):
        super().__init__()
        self.__name__ = 'acc'
        
    def forward(self, y_pred, y_true):
        batch_size = y_pred[0].shape[0]
        [y_true_age, y_true_gender, y_true_race] = y_true
        [y_pred_age, y_pred_gender, y_pred_race] = y_pred
        
        y_pred_age = y_pred_age.argmax(1)
        y_pred_gender = y_pred_gender.argmax(1)
        y_pred_race = y_pred_race.argmax(1)
        
        acc_pre_age = (y_pred_age == y_true_age).float()
        acc_pred_gender = (y_pred_gender == y_true_gender).float()
        acc_pred_race = (y_pred_race == y_true_race).float()
    

        acc_pred = acc_pre_age.sum() + acc_pred_gender.sum() + acc_pred_race.sum()
            
        #return acc_pred * 100 / batch_size / 3
        return acc_pred_race.sum() / batch_size

In [None]:
# We are saving everything into ./saves/cub200.
save_base_dir = 'saves'
save_path = os.path.join(save_base_dir, 'cnn')
os.makedirs(save_path, exist_ok=True)

callbacks = [
    # Save the latest weights to be able to resume the optimization at the end for more epochs.
    ModelCheckpoint(os.path.join(save_path, 'last_epoch.ckpt')),
    # Save the weights in a new file when the current model is better than all previous models.
    ModelCheckpoint(
        os.path.join(save_path, 'best_epoch_{epoch}.ckpt'),
        monitor='val_acc',
        mode='max',
        save_best_only=True,
        restore_best=True,
        verbose=True,
    ),
    # Save the losses and accuracies for each epoch in a TSV.
    CSVLogger(os.path.join(save_path, 'log.tsv'), separator='\t'),
]

In [None]:
weight_parameters = (parameter for name, parameter in cnn.named_parameters() if name in ['conv3.weight','conv4.weight'])
non_weight_parameters = (parameter for name, parameter in cnn.named_parameters() if name not in ['conv3.weight','conv4.weight'])

optimizer = optim.Adam([
    {'params': weight_parameters, 'weight_decay': weight_decay},
    {'params': non_weight_parameters}
  ], lr=learning_rate)

loss_function = AgeGenderRaceLoss(loss_weights)
#loss_function = 'L1Loss'

model = Model(
    cnn,
    optimizer,
    loss_function,
    batch_metrics=[AgeGenderRaceAccuracy()],
    device=device,
)

model.fit_generator(
    train_loader,
    valid_loader,
    epochs=n_epoch,
    callbacks=callbacks    
)

test_loss, test_acc, y_predict, y_true = model.evaluate_generator(test_loader, return_pred=True, return_ground_truth=True)

In [None]:
logs = pd.read_csv(os.path.join(save_path, 'log.tsv'), sep='\t')
print(logs)

best_epoch_idx = logs['val_acc'].idxmax()
best_epoch = int(logs.loc[best_epoch_idx]['epoch'])
print("Best epoch: %d" % best_epoch)

In [None]:
metrics = ['loss', 'val_loss']
plt.plot(logs['epoch'], logs[metrics])
plt.legend(metrics)
plt.show()

In [None]:
metrics = ['acc', 'val_acc']
plt.plot(logs['epoch'], logs[metrics])
plt.legend(metrics)
plt.show()

In [None]:
cf_matrix = confusion_matrix(y_true, np.argmax(y_predict, axis=1))
class_names = list(train_valid_dataset.class_to_idx)

plt.figure(figsize = (12,7))
sn.heatmap(cf_matrix/np.sum(cf_matrix), annot=True, fmt='.2%', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.savefig('plots/confusion_matrix.png')