In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import random
import os
from pathlib import Path

random.set_seed = 42

In [None]:
path = Path('/content/drive/MyDrive/2023/VGG/VGG-AGES/FGNET/images')
file_path = list(path.glob(r'**/*.JPG'))

df = pd.DataFrame(file_path, columns=['image']).astype(str)

import re
def age(image_path):
    x = image_path.split('.')[0].rsplit('A', 1)[1]
    result = re.match(r'\d+', x)
    results = result.group(0)
    return int(results)



df['label'] = df['image'].apply(lambda x:age(x))
df

In [None]:
df.label.min(), df.label.max()

In [None]:
def age_map(number):
  if number in range(0, 10):
    age_range = '00-09'
  elif number in range(10, 20):
    age_range = '10-19'
  elif number in range(20, 30):
    age_range = '20-29'
  elif number in range(30, 40):
    age_range = '30-39'
  elif number in range(40, 50):
    age_range = '40-49'
  elif number in range(50, 60):
    age_range = '50-59'
  elif number in range(60, 70):
    age_range = '60-69'
  return age_range

In [None]:
df['age_group'] = df['label'].apply(lambda x:age_map(x))
df

In [None]:
df["age_group"].value_counts()

## Restructuring Directory

The directory structure is not recognizable for pytorch dataloaders. <br>
To restructure the tree so that it is readable, every file is moved under the subfolder named after its class.

In [None]:
def user_id(path):
  return path.split("/")[-1]
df['image_id'] = df['image'].apply(lambda x:user_id(x))


In [None]:
import os
import shutil

In [None]:
folder_path =  "/content/drive/MyDrive/2023/VGG/VGG-AGES/FGNET/images"

In [None]:
file_list = []
formats = ["JPG", "png"]

for subdir in os.listdir(folder_path):
    subpath = os.path.join(folder_path, subdir)
    if os.path.isdir(subpath):
      for f in os.listdir(subpath):
        filepath = os.path.join(subpath, f)
        part = f.split(".")
        if os.path.isfile(filepath) and part[-1] in formats:
            file_list.append((filepath, f))

print(len(file_list))

In [None]:
# DONT INTERRUPT WHILE RUNNING
# SHOWS PROGRESS

im_len = len(file_list)

for i, (filepath, filename) in enumerate(file_list):
    user_id = filepath.split("/")[-1]
    file_id = filename
    class_ = df[(df["image_id"] == user_id)]["age_group"].values[0]

    new_path = os.path.join(folder_path, class_)
    
    if not os.path.exists(new_path):
        os.makedirs(new_path)

    # move file
    new_path = os.path.join(new_path, filename)
    shutil.move(filepath, new_path)
    
    # progress
    prog = (20 * (i + 1)) // im_len
    print("\r[" + "="*prog + "_"*(20-prog) + "]", end="")

In [None]:
# list the classes
!ls /content/drive/MyDrive/2023/VGG/VGG-AGES/FGNET/images

In [None]:
# compress and save to drive for future purposes
# ! tar -czvf faces-cleaned.tar.gz data/faces
# ! cp faces.tar.gz /content/gdrive/My\ Drive/project/vgg/data/

## Preparation

In [None]:
# %cd /content/drive/MyDrive/2023/VGG
# # ! mkdir VGG-AGES
# %cd VGG-AGES
# ! git clone https://github.com/notshridhar/vgg-age

In [None]:
%cd /content/drive/MyDrive/2023/VGG/VGG-AGES/vgg-age

In [None]:
import torch
import source.models as models
import source.worker as worker
import source.loader as loader

In [None]:
# the mean and std of dataset are found by running this
# takes some time to iterate twice
loader.find_mean_std("/content/drive/MyDrive/2023/VGG/VGG-AGES/FGNET/images")

In [None]:
# loader.mean and loader.std set the mean and standard deviation of the color channels of the images respectively. 
loader.random_scale = (0.8, 1.0)
loader.mean = [0.425, 0.343, 0.314]
loader.std  = [0.243, 0.214, 0.210]

In [None]:
# Dataset Loader to feed into network
# 20% of data is used for validation
loaders = loader.split_loader("/content/drive/MyDrive/2023/VGG/VGG-AGES/FGNET/images", valid_frac=0.2, batch_size=32)

In [None]:
# pretrained weights - for convolution layers
state = loader.load_pth("/content/drive/MyDrive/2023/VGG/vgg-age/weight/vgg_face_dag.pth")

## Model initialization

In [None]:
model = models.vgg16(num_classes=8)

In [None]:
# pretrained vgg-face
model.load_weights(state)

In [None]:
# gives parameter count and memory in MB
model.memory_usage()

In [None]:
#train for 10 epochs
worker.train(model, loaders,lr = 0.001, epochs=10)

In [None]:
from google.colab import files
files.download('checkpoint.pth')

In [None]:
# save checkpoint to drive
!cp checkpoint.pth /content/drive/MyDrive/2023/VGG/vgg-age/checkpoint.pth

## Confusion Matrix

In [None]:
# to try out saved version

! cp /content/drive/MyDrive/2023/VGG/VGG-AGES/vgg-age/checkpoint.pth

check = loader.load_pth("checkpoint.pth")

In [None]:
valid_loader = loaders[1]
conf_mat = worker.confusion_matrix(model, valid_loader)

In [None]:
for row in conf_mat:
    for elem in row:
        print("%.2f"%(elem*100), end="\t")
    print("")
    

In [None]:
# one-off accuracy
ncls = len(conf_mat)
tot_acc = 0

for i in range(ncls):
    
    acc = conf_mat[i][i]

    # add left
    if i > 0:
        acc += conf_mat[i][i-1]
    
    if i < ncls - 1:
        acc += conf_mat[i][i+1]
    
    tot_acc += acc

tot_acc = tot_acc / ncls
print("%.2f" % (tot_acc * 100))

## Visualization

The corresponding predictions and target labels are visualized along with the input images.

In [None]:
import os
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

In [None]:
# get index to class mapping
dire = "/content/drive/MyDrive/2023/VGG/VGG-AGES/FGNET/images"
classes = os.listdir(dire)
classes.sort()
idx_to_class = {i:classes[i] for i in range(len(classes))}
print(idx_to_class)

In [None]:
import torch
model = models.vgg16(num_classes=8)
checkpoint = torch.load("/content/drive/MyDrive/2023/VGG/VGG-AGES/vgg-age/checkpoint.pth")
model.load_state_dict(checkpoint['state_dict'])
### now you can evaluate it

import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def pre_image(image_path,model):
   img = Image.open(image_path)
   mean = [0.425, 0.343, 0.314]
   std = [0.243, 0.214, 0.210]
   transform_norm = transforms.Compose([transforms.ToTensor(), 
   transforms.Resize((224,224)),transforms.Normalize(mean, std)])
   # get normalized image
   img_normalized = transform_norm(img).float()
   img_normalized = img_normalized.unsqueeze_(0)
   # input = Variable(image_tensor)
   img_normalized = img_normalized.to(device)
   # print(img_normalized.shape)
   with torch.no_grad():
      model.eval()  
      output =model(img_normalized)
      _, preds = torch.max(output, 1)
      # print(output)
      pred_class = idx_to_class[int(preds)]
      return pred_class
pre_image("/content/50.jpg", model)

In [None]:
# turn model to evaluation and move to cpu
model.eval()
batch_size = 32
model.to(torch.device("cpu"))

In [None]:
# inverse transform to show images
mean = loader.mean
std  = loader.std
mn_inv = [-m/s for m, s in zip(mean, std)]
sd_inv = [1/s for s in std]
inv_transform = transforms.Normalize(mean=mn_inv, std=sd_inv)

In [None]:
with torch.no_grad():
    for i, (input, target) in enumerate(valid_loader):

        output = model(input)
        _, preds = torch.max(output, 1)

        fig=plt.figure(figsize=(15, 15))
        columns = 4
        rows = 5

        for i in range(1, columns*rows + 1):

            pred_class = idx_to_class[int(preds[i])]
            real_class = idx_to_class[int(target[i])]

            ax = fig.add_subplot(rows, columns, i)
            ax.title.set_text("pred:" + pred_class + "," + "real:" + real_class)
            ax.axis("off")

            plt.imshow(inv_transform(input[i]).permute(1, 2, 0))

        break

plt.show()

## For cleaning gpu cache and reloading

In [None]:
del loaders
del model
del state
torch.cuda.empty_cache()

In [None]:
import importlib
importlib.reload(models)
importlib.reload(worker)
importlib.reload(loader)