In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
# Read excel table and set data to data frame
data_dir = '/content/drive/MyDrive/_data/AMFD Faces Final/'
AMFD_Dataframe = pd.read_excel( data_dir + 'AMFD Norming Data + Codebook.xlsx', skiprows=4)

In [None]:
import os
# put all image path to a list
# image_dir = "AMFD_Faces_Final"
image_paths = []
for filename in os.listdir(data_dir):
    if filename.endswith('.jpg'):
        image_paths.append(os.path.join(data_dir, filename))


In [None]:
import torch
from torchvision.transforms import Compose, Resize, ToTensor
from torch.utils.data import Dataset
from PIL import Image

class AMFDDataset(Dataset):
    def __init__(self, dataframe, image_paths, transform=None):
        self.dataframe = dataframe
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):

        image_path = self.image_paths[idx]

        # extract element from the image name
        parts = image_path.split('\\')[-1].split('-')

        # get expression code (which is the Ftype in table)
        expression_code = 0 if parts[0][-2] == 'N' else 1
        # print(parts)

        # get image id from image file
        image_id_photo = parts[1].split('.')[0]

        # using expression code and image id to seach in dataframe
        match = self.dataframe[(self.dataframe['PhotoID'] == int(image_id_photo)) & (self.dataframe['FType'] == expression_code)]

        # get the feature from the matching line
        image_features = None
        if not match.empty:
            # this will return the whole line
            # if we need photo_id included, use match.iloc[0, 0:].values, otherwise match.iloc[0, 1:].values
            image_features = torch.tensor(match.iloc[0, 0:].values, dtype=torch.float32)
            # facial_expression = torch.tensor(match.iloc[0, 1], dtype=torch.float32)
            attractiveness = torch.tensor(match.iloc[0, 14], dtype=torch.float32)
        else:
            raise ValueError("No data found")

        # convert image to tensor. if transform needed, using transform
        image = Image.open(image_path).convert('RGB')
        # convert image to gray scale
        # image = image.convert('L')
        if self.transform:
            image = self.transform(image)
        else:
            image = ToTensor()(image)

        return image_id_photo, image, image_features, torch.tensor(expression_code), attractiveness

    # (X - mean) / sd normalization
    def normalize(self, df):
        normalized_df = df.copy()
        # first column is photo id, skip
        for column in df.columns[1:]:
            mean_value = df[column].mean()
            std_value = df[column].std()
            normalized_df[column] = (df[column]-mean_value)/std_value
        return normalized_df


In [None]:
from torch.utils.data import Subset
from sklearn.model_selection import train_test_split
import numpy as np
import torchvision.transforms as transforms


# Create data set from data frame
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224 pixels
    transforms.ToTensor(),          # Convert the image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])
amfd_dataset = AMFDDataset(AMFD_Dataframe, image_paths, transform=transform)

# using photo ids as unit because we must put photos of one person in one dataset
photo_ids = AMFD_Dataframe['PhotoID'].values
unique_photo_ids = np.unique(photo_ids)

# extract a single set (1/10) for test set
train_val_photo_ids, test_photo_ids = train_test_split(unique_photo_ids, test_size=0.1, random_state=42)

# extract a single set for validation set.
train_photo_ids, val_photo_ids = train_test_split(train_val_photo_ids, test_size=(1/9), random_state=42)

In [None]:
from torch.utils.data import DataLoader

# get index from train_photo_ids, val_photo_ids, and test_photo_ids
def get_photo_ids(photo_id_sets):
    result = []
    for i, photo_id in enumerate(photo_ids):
        if photo_id in photo_id_sets:
            result.append(i)
    return result

train_indices = get_photo_ids(train_photo_ids)
val_indices = get_photo_ids(val_photo_ids)
test_indices = get_photo_ids(test_photo_ids)

print(f"train_indices is {train_indices} and len is {len(train_indices)}")
print(f"val_indices is {val_indices} and len is {len(val_indices)}")
print(f"test_indices is {test_indices} and len is {len(test_indices)}")

# using these indices to get element from dataset in order to make photos of one person in one set
train_dataset = Subset(amfd_dataset, train_indices)
val_dataset = Subset(amfd_dataset, val_indices)
test_dataset = Subset(amfd_dataset, test_indices)

print(f"train_dataset is {train_dataset} and len is {len(train_dataset)}")
print(f"val_dataset is {val_dataset} and len is {len(val_dataset)}")
print(f"test_dataset is {train_dataset} and len is {len(test_dataset)}")

batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


train_indices is [0, 1, 3, 5, 6, 7, 8, 9, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 80, 81, 82, 84, 85, 86, 88, 89, 90, 91, 92, 93, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 114, 115, 116, 117, 118, 121, 122, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 137, 138, 140, 141, 142, 143, 146, 147, 148, 149, 150, 151, 152, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166, 167, 168, 169, 171, 172, 174, 175, 176, 178, 179, 180, 181, 182, 183, 184, 185, 186, 190, 191, 192, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218] and len is 175
val_indices is [2, 14, 27, 35, 36, 44, 46, 51, 61, 79, 87, 111, 123, 136, 144, 145, 153, 155, 160, 170, 189, 197] and len is 22
test_indices is [4, 10, 11, 30, 45, 64, 68, 77, 78

In [None]:
#@title collapse
for i, (image_id_photo, image, image_features, face_expression, attract_score) in enumerate(train_loader):
    print(f"batch {i}")
    print(image_id_photo)
    print(image.shape)
    print(image_features.shape)
    print(face_expression)
    print(attract_score)
    break

batch 0
('1093', '1003', '1055', '1001', '1039', '1080', '1076', '1084')
torch.Size([8, 3, 224, 224])
torch.Size([8, 71])
tensor([1, 1, 1, 1, 1, 0, 1, 1])
tensor([3.8500, 5.4259, 3.6829, 3.8654, 4.4510, 4.8302, 5.7000, 5.0806])


In [None]:
from torchvision import models
import torch
import torch.nn as nn
import torch.nn.functional as F



vgg = models.vgg16(pretrained=True)


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 190MB/s]


In [None]:
class CustomHead(nn.Module):
    def __init__(self, num_features):
        super(CustomHead, self).__init__()
        self.common_layer = nn.Sequential(
            nn.Linear(num_features, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
        )
        # Output layer for binary classification
        self.classifier1 = nn.Linear(4096, 1000)
        self.classifier2 = nn.Linear(1000, 2)
        # Output layer for regression
        self.regressor = nn.Linear(4096, 1)

    def forward(self, x):
        x = self.common_layer(x)
        classification1 = self.classifier1(x)
        classification_output = self.classifier2(torch.relu(classification1))
        regression_output = self.regressor(x)
        return classification_output, regression_output

In [None]:
# facial_expressions = [a[3].item() for i, a in enumerate(train_dataset)]

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# resnet = models.resnet50(pretrained=True)
# num_features = resnet.fc.in_features
num_features = vgg.classifier[0].in_features
vgg.classifier = CustomHead(num_features)
# resnet.fc = nn.Identity()
# resnet.fc = CustomHead(num_features)
vgg.to(device)
# resnet.eval()

optimizer = torch.optim.Adam(vgg.parameters(), lr=1e-4,  weight_decay=1e-5)
# weight_tensor = torch.tensor(len(facial_expressions)/(np.bincount(facial_expressions)*2), dtype=torch.float32, device=device)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion1.to(device)
criterion2.to(device)

MSELoss()

In [None]:
vgg2 = models.vgg16(pretrained=True)
vgg.classifier

CustomHead(
  (common_layer): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
  )
  (classifier1): Linear(in_features=4096, out_features=1000, bias=True)
  (classifier2): Linear(in_features=1000, out_features=2, bias=True)
  (regressor): Linear(in_features=4096, out_features=1, bias=True)
)

In [None]:
#@title Define Train function
def train(model, train_loader, val_loader, optimizer, criterion1, criterion2, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        for params in model.parameters():
            params.requires_grad = False
        for params in model.fc.parameters():
            params.requires_grad = True
        train_loss = 0.0
        train_corrects = 0
        for i, (image_id_photo, image, image_features, face_type, att_score) in enumerate(train_loader):
            image = image.to(device)
            face_type = face_type.to(device)
            att_score = att_score.to(device)
            optimizer.zero_grad()
            out_face, out_score = model(image)
            loss = criterion1(out_face.squeeze(), face_type.float()) + criterion2(out_score.squeeze(), att_score)
            pred_face = torch.argmax(out_face, dim=1)
            train_corrects += torch.sum(pred_face == face_type)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss / len(train_loader)
        train_acc = train_corrects / len(train_loader.dataset)


        model.eval()
        val_loss = 0.0
        val_corrects = 0
        with torch.no_grad():
            for i, (image_id_photo, image, image_features, face_type, att_score) in enumerate(val_loader):
                image = image.to(device)
                face_type = face_type.to(device)
                att_score = att_score.to(device)
                out_face, out_score = model(image)
                loss = criterion1(out_face.squeeze(), face_type.float()) + criterion2(out_score.squeeze(), att_score)
                pred_face = torch.argmax(out_face, dim=1)
                val_corrects += torch.sum(pred_face == face_type)
                val_loss += loss.item()
            val_loss = val_loss / len(val_loader)
            val_acc = val_corrects / len(val_loader.dataset)

        print(f"Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        # print(f"Epoch {epoch}/{num_epochs}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

In [None]:
model = vgg
epochs=30
for epoch in range(epochs):
  model.train()
  for params in model.parameters():
      params.requires_grad = False
  for params in model.classifier.parameters():
      params.requires_grad = True
  train_loss = 0.0
  train_corrects = 0
  for i, (image_id_photo, image, image_features, face_type, att_score) in enumerate(train_loader):
      image = image.to(device)
      face_type = face_type.to(device)
      att_score = att_score.to(device)
      optimizer.zero_grad()
      out_face, out_score = model(image)
      loss = 2 * criterion1(out_face.squeeze(), face_type) + criterion2(out_score.squeeze(), att_score)
      pred_face = torch.argmax(out_face, dim=1)
      train_corrects += torch.sum(pred_face == face_type)
      loss.backward()
      optimizer.step()
      train_loss += loss.item()
  train_loss = train_loss / len(train_loader)
  train_acc = train_corrects / len(train_loader.dataset)


  model.eval()
  val_loss = 0.0
  val_corrects = 0
  with torch.no_grad():
      for i, (image_id_photo, image, image_features, face_type, att_score) in enumerate(val_loader):
          image = image.to(device)
          face_type = face_type.to(device)
          att_score = att_score.to(device)
          out_face, out_score = model(image)
          loss = criterion1(out_face.squeeze(), face_type) + criterion2(out_score.squeeze(), att_score)
          pred_face = torch.argmax(out_face, dim=1)
          val_corrects += torch.sum(pred_face == face_type)
          val_loss += loss.item()
      val_loss = val_loss / len(val_loader)
      val_acc = val_corrects / len(val_loader.dataset)

  print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")


Epoch 1/30, Train Loss: 1.3849, Train Acc: 0.6800 Val Loss: 1.1347, Val Acc: 0.5455
Epoch 2/30, Train Loss: 1.0967, Train Acc: 0.8686 Val Loss: 1.1214, Val Acc: 0.5455
Epoch 3/30, Train Loss: 0.9158, Train Acc: 0.8514 Val Loss: 1.4750, Val Acc: 0.5455
Epoch 4/30, Train Loss: 0.4906, Train Acc: 0.9429 Val Loss: 1.5060, Val Acc: 0.5909
Epoch 5/30, Train Loss: 0.5475, Train Acc: 0.9200 Val Loss: 2.5583, Val Acc: 0.5909
Epoch 6/30, Train Loss: 1.7173, Train Acc: 0.7543 Val Loss: 1.3248, Val Acc: 0.6364
Epoch 7/30, Train Loss: 0.9209, Train Acc: 0.8800 Val Loss: 1.1708, Val Acc: 0.5909
Epoch 8/30, Train Loss: 0.5728, Train Acc: 0.9429 Val Loss: 1.4763, Val Acc: 0.6364
Epoch 9/30, Train Loss: 0.4100, Train Acc: 0.9657 Val Loss: 1.5994, Val Acc: 0.5909
Epoch 10/30, Train Loss: 0.4664, Train Acc: 0.9486 Val Loss: 1.2952, Val Acc: 0.6818
Epoch 11/30, Train Loss: 0.3272, Train Acc: 0.9771 Val Loss: 1.4529, Val Acc: 0.6364
Epoch 12/30, Train Loss: 0.2857, Train Acc: 0.9886 Val Loss: 1.4433, Val A