In [2]:
#   Imports
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
import torchvision
from torchvision import transforms, utils, datasets, models
from torchvision.models import ResNet50_Weights
from torchvision.io import read_image
from sklearn.metrics import multilabel_confusion_matrix, f1_score, ConfusionMatrixDisplay, confusion_matrix
import math
import random
import wandb
from facenet_pytorch import InceptionResnetV1
from matplotlib import pyplot as plt

random.seed(2)

In [3]:
#   Variables
device = ("cuda" if torch.cuda.is_available() else "cpu") # Use GPU or CPU for training

In [4]:
wandb.init(
    # set the wandb project where this run will be logged
    project="my-awesome-project",
    name="sub rand car new_dl"
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


In [5]:
class AlteredNet(nn.Module):
    def __init__(self):
        super(AlteredNet, self).__init__()

        # self.softmax_layer = nn.Softmax(dim=1)
        self.sigmoid_layer = nn.Sigmoid()

        self.model = torchvision.models.resnet18(
            weights=torchvision.models.ResNet18_Weights.DEFAULT
        )
        self.model.fc = nn.Linear(in_features=self.model.fc.in_features, out_features=150)

    def forward(self, x):
        output = self.model(x)
        return output

In [None]:
class HierarchicalNet(nn.Module):
    def __init__(self):
        super(HierarchicalNet, self).__init__()

        # Shared backbone network (e.g., ResNet18)
        self.backbone = torchvision.models.resnet18(pretrained=True)
        num_features = self.backbone.fc.in_features

        # Level 1 prediction (groups)
        self.prominent_prediction = nn.Linear(num_features, 18)  # 18 prominent features

        # Level 2 prediction (subfeatures within groups) (150 total)
        self.cheekbones_subfeature_prediction = nn.Linear(num_features, 2)  # Two subfeatures for cheekbones
        self.cheeks_subfeature_prediction = nn.Linear(num_features, 3)  # Three subfeatures for cheeks
        self.chin_subfeature_prediction = nn.Linear(num_features, 10)  # Ten subfeatures for chin
        self.ears_subfeature_prediction = nn.Linear(num_features, 8)  # Eight subfeatures for ears
        self.eyebrows_subfeature_prediction = nn.Linear(num_features, 14)  # Fourteen subfeatures for eyebrows
        self.eyelids_subfeature_prediction = nn.Linear(num_features, 4)  # Four subfeatures for eyelids
        self.eyes_subfeature_prediction = nn.Linear(num_features, 18)  # Sixteen subfeatures for eyes
        self.facialhair_subfeature_prediction = nn.Linear(num_features, 11)  # Eleven subfeatures for facial hair
        self.forehead_subfeature_prediction = nn.Linear(num_features, 6)  # Six subfeatures for forehead
        self.hair_subfeature_prediction = nn.Linear(num_features, 16)  # Sixteen subfeatures for hair
        self.head_subfeature_prediction = nn.Linear(num_features, 6)  # Six subfeatures for head
        self.lips_subfeature_prediction = nn.Linear(num_features, 8)  # Seven subfeatures for lips
        self.mouth_subfeature_prediction = nn.Linear(num_features, 3)  # Three subfeatures for mouth
        self.neck_subfeature_prediction = nn.Linear(num_features, 4)  # Four subfeatures for neck
        self.nose_subfeature_prediction = nn.Linear(num_features, 23)  # Twenty-One subfeatures for nose        
        self.skin_subfeature_prediction = nn.Linear(num_features, 5)  # Five subfeatures for skin
        self.teeth_subfeature_prediction = nn.Linear(num_features, 8)  # Eight subfeatures for teeth
        self.upperlip_subfeature_prediction = nn.Linear(num_features, 1)  # One subfeatures for upper lip
        
    def forward(self, x):
        features = self.backbone(x)

        # Level 1 prediction
        prominent_output = self.prominent_prediction(features)

        # Level 2 predictions within each group
        cheekbones_output = self.cheekbones_subfeature_prediction(features)  # Predicts subfeatures for cheek bones
        cheeks_output = self.cheeks_subfeature_prediction(features)  # Predicts subfeatures for cheeks
        chin_output = self.chin_subfeature_prediction(features)  # Predicts subfeatures for chin
        ears_output = self.ears_subfeature_prediction(features)  # Predicts subfeatures for ears
        eyebrows_output = self.eyebrows_subfeature_prediction(features)  # Predicts subfeatures for eyebrows
        eyelids_output = self.eyelids_subfeature_prediction(features)  # Predicts subfeatures for eyelids
        eyes_output = self.eyes_subfeature_prediction(features)  # Predicts subfeatures for eyes
        facialhair_output = self.facialhair_subfeature_prediction(features)  # Predicts subfeatures for facial hair
        forehead_output = self.forehead_subfeature_prediction(features)  # Predicts subfeatures for forehead
        hair_output = self.hair_subfeature_prediction(features)  # Predicts subfeatures for hair
        head_output = self.head_subfeature_prediction(features)  # Predicts subfeatures for head
        lips_output = self.lips_subfeature_prediction(features)  # Predicts subfeatures for lips
        mouth_output = self.mouth_subfeature_prediction(features)  # Predicts subfeatures for mouth
        neck_output = self.neck_subfeature_prediction(features)  # Predicts subfeatures for neck
        nose_output = self.nose_subfeature_prediction(features)  # Predicts subfeatures for nose
        skin_output = self.skin_subfeature_prediction(features)  # Predicts subfeatures for skin
        teeth_output = self.teeth_subfeature_prediction(features)  # Predicts subfeatures for teeth
        upperlip_output = self.upperlip_subfeature_prediction(features)  # Predicts subfeatures for upper lip

        return prominent_output, cheekbones_output, cheeks_output, chin_output, ears_output, eyebrows_output, eyelids_output, eyes_output, facialhair_output, forehead_output, hair_output, head_output, lips_output, mouth_output, neck_output, nose_output, skin_output, teeth_output, upperlip_output

In [None]:
""" #   Create Dataloader
transform = transforms.Compose([
    transforms.Resize((178, 218))
])

class CaricatureDataset(Dataset):
    def __init__(self, labels_file, root_dir, transform=None):
        self.labels = pd.read_csv(labels_file)
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        class_index = math.floor(idx / 5)
        self_car = self.labels.iloc[class_index, 0].strip() + "_caricature"
        img_person = os.path.join(self.root_dir, self_car)
        labels = self.labels.iloc[class_index, 1].replace('[', '').replace(']', '').split('.')[:-1]
        labels = np.array([int(label) for label in labels])
        
        images = os.listdir(img_person)
        selected_images = random.sample(images, 5)
        
        samples = []
        for image_file in selected_images:
            image_path = os.path.join(img_person, image_file)
            img = read_image(image_path)
            if img.shape[0] == 1:
                img = np.repeat(img, 3, axis=0)
            sample = img, labels
            samples.append(sample)
        
        return samples[idx % 5] """

In [22]:
""" class CaricatureDataset(Dataset):
    def __init__(self, labels_file, root_dir, split, transform=None):
        self.annotations = pd.read_csv(labels_file, names=["id", "label"], sep=",")
        self.root_dir = root_dir

        self.image_paths = []
        self.identities = []
        for id in self.annotations["id"].values.tolist():
            folder_name = id + "_caricature"

            image_names = os.listdir(os.path.join(root_dir, folder_name))
            random.shuffle(image_names)
            for img_index, img in enumerate(image_names):
                if split=="Train" and img_index<4:
                    self.image_paths.append((os.path.join(root_dir, folder_name, img)))
                    self.identities.append(id)
                elif split=="Test" and img_index==4:
                    self.image_paths.append((os.path.join(root_dir, folder_name, img)))
                    self.identities.append(id)

        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        id = self.identities[idx]

        image = torchvision.io.read_image(image_path, mode=torchvision.io.ImageReadMode.RGB)
        if self.transform:
            image = self.transform(image)
        
        label = self.annotations[self.annotations["id"]==id]["label"].values[0]

        label = label.strip("[]").split()

        # Convert strings to float and create a list
        label = [float(val) for val in label]

        return image, torch.tensor(label) """

In [42]:
class CaricatureDataset(Dataset):
    def __init__(self, labels_file, root_dir, split, transform=None):
        self.annotations = pd.read_csv(labels_file, index_col=0)
        self.root_dir = root_dir

        self.image_paths = []
        self.identities = self.annotations.axes[0].tolist()

        for id in self.identities:
            folder_name = id + "_caricature"

            image_names = os.listdir(os.path.join(root_dir, folder_name))
            random.shuffle(image_names)
            for img_index, img in enumerate(image_names):
                if split=="Train" and img_index<4:
                    self.image_paths.append((os.path.join(root_dir, folder_name, img)))
                elif split=="Test" and img_index==4:
                    self.image_paths.append((os.path.join(root_dir, folder_name, img)))

        self.transform = transform
    
    def __len__(self):
        # print(len(self.image_paths))
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        if split=="Train":
            class_index = math.floor(idx/4)
        elif split=="Test":
            class_index = idx
        id = self.identities[class_index]
        image = torchvision.io.read_image(image_path, mode=torchvision.io.ImageReadMode.RGB)
        if self.transform:
            image = self.transform(image)
        
        label = self.annotations.loc[id]

        return image, torch.tensor(label)

In [43]:
#   Load the data
#   Create Dataloader
transform = transforms.Compose([
    transforms.Resize((218 , 178))
])

train_dataset = CaricatureDataset(labels_file='binary_sub_labels.csv', root_dir='/home/jsutariya/Desktop/Project/ourcar/', split="Train", transform=transform)
test_dataset = CaricatureDataset(labels_file='binary_sub_labels.csv', root_dir='/home/jsutariya/Desktop/Project/ourcar/', split="Test", transform=transform)

#   Create the dataloaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

In [44]:
# # Visualize the images
# i=0
# for batch in test_loader:
#     images, labels, id = batch
    
#     # Display the images
#     grid = torchvision.utils.make_grid(images, nrow=5)
#     plt.imshow(grid.permute(1, 2, 0))
#     plt.axis('off')
#     plt.show()

#     # Get the subject names
#     # subject_names = [dataset.annotations.iloc[math.floor(i / 5), 0].strip() + "_caricature" for i in range(len(images))]
#     print("Subject Names:", id)
#     labels_list = labels.tolist()
#     for list in labels_list:
#         print(list)
    
#     i+=1
#     if i == 3:
#         break  # Only process the first batch for demonstration


In [7]:
model = AlteredNet().to(device)
loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

def train_one_epoch(epoch_index, accp, totp):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(train_loader):
        # Every data instance is an input + label pair
        inputs, labels = data

        inputs, labels = inputs.to(device).float(), labels.to(device).float()
        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        output = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(output, labels)

        loss.backward()
        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 10 == 9:
            last_loss = running_loss / 10 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            running_loss = 0.

        output = model.sigmoid_layer(output)

        predictions = np.round(output.detach().cpu().numpy()).flatten()
        target = labels.detach().cpu().numpy().flatten()
        for index, prediction in enumerate(predictions):
            if prediction == target[index]:
                accp += 1
            totp += 1
        acc = accp/totp

        torch.cuda.empty_cache()

    return last_loss, acc

In [8]:
EPOCHS = 100
epoch_number = 0

for epoch in range(EPOCHS):
    accurate_predictions = 0
    total_predictions = 0

    vaccp = 0
    vtotp = 0
    vatl = np.array([])
    vapl = np.array([]) 

    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss, accuracy = train_one_epoch(epoch_number, accurate_predictions, total_predictions)
    #'''
    # We don't need gradients on to do reporting
    model.train(False)

    running_vloss = 0.0
    for i, vdata in enumerate(test_loader):
        vinputs, vlabels = vdata

        vinputs, vlabels = vinputs.to(device).float(), vlabels.to(device).float()
        voutputs = model(vinputs)
        
        vloss = loss_fn(voutputs, vlabels)

        running_vloss += vloss
        
    avg_vloss = running_vloss / (i + 1)

    voutputs = model.sigmoid_layer(voutputs)

    vpredictions = np.round(voutputs.detach().cpu().numpy()).flatten()
    #vpredictions = np.where(voutputs.detach().cpu().numpy() > 0.5, 1, 0)  # Apply thresholding for multi-label classificationv
    vtarget = vlabels.detach().cpu().numpy().flatten()
    for index, vprediction in enumerate(vpredictions):
        if vprediction == vtarget[index]:
            vaccp += 1
        vtotp += 1
    vacc = vaccp/vtotp

    vatl = np.append(vatl, vtarget)
    vapl = np.append(vapl, vpredictions)

    f1 = f1_score(vatl, vapl, average='weighted')

    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
    print('ACC train {} valid {}'.format(accuracy, vacc))
    print('F1 score: {}'.format(f1))

    #wandb.log({ 'Training' : avg_loss, 'Validation' : avg_vloss, 'Train Acc' : accuracy, 'Val Acc' : vacc, 'F1 Score': f1})
    #'''
    #print('ACC train {}'.format(accuracy))
    torch.cuda.empty_cache()

    epoch_number += 1


EPOCH 1:
  batch 10 loss: 0.7376927852630615
  batch 20 loss: 0.7033847332000732
  batch 30 loss: 0.6597188472747803
  batch 40 loss: 0.6117695689201355
  batch 50 loss: 0.5698795974254608
  batch 60 loss: 0.5446260452270508
  batch 70 loss: 0.5134212911128998
  batch 80 loss: 0.49137944877147677
  batch 90 loss: 0.4757650136947632
  batch 100 loss: 0.45795331001281736
  batch 110 loss: 0.443063884973526
  batch 120 loss: 0.42872370183467867
  batch 130 loss: 0.41806496381759645
  batch 140 loss: 0.4058567702770233
  batch 150 loss: 0.39989835023880005
  batch 160 loss: 0.38923689126968386
  batch 170 loss: 0.37518560588359834
  batch 180 loss: 0.3779471665620804
  batch 190 loss: 0.3765044003725052
  batch 200 loss: 0.36883204281330106
LOSS train 0.36883204281330106 valid 0.36852043867111206
ACC train 0.7993642769607843 valid 0.865234375
F1 score: 0.8363733033162881
EPOCH 2:
  batch 10 loss: 0.36421593725681306
  batch 20 loss: 0.35951915085315705
  batch 30 loss: 0.3559811353683472
 

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 7.79 GiB total capacity; 7.00 GiB already allocated; 30.31 MiB free; 7.09 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
wandb.finish()

In [None]:
model.eval()
correct = 0
all_true_labels = np.array([])
all_predictions = np.array([])
all_true_labels_itemized = np.zeros([1,128])
all_predictions_itemized = np.zeros([1,128])

running_vloss = 0.0
with torch.no_grad():
    for i, (data, target) in enumerate(test_loader):
        data, target = data.to(device).float(), target.to(device).float()
        output = model(data)
        
        vloss = loss_fn(output, target)
        running_vloss += vloss

        output = model.sigmoid_layer(output)

        predictions = np.where(output.detach().cpu().numpy() > 0.5, 1, 0)  # Apply thresholding for multi-label classification
        labels = target.detach().cpu().numpy()
        
        all_true_labels = np.append(all_true_labels, labels)
        all_predictions = np.append(all_predictions, predictions)
        
        predictions = predictions.transpose()

        all_true_labels_itemized = np.concatenate((all_true_labels_itemized, labels), axis=0)
        all_predictions_itemized = np.concatenate((all_predictions_itemized, predictions.T), axis=0)

avg_vloss = running_vloss / (i + 1)        
for index, prediction in enumerate(all_predictions):
    if prediction == all_true_labels[index]:
        correct += 1
print('Accuracy: ', correct/len(all_predictions))
print('Loss: ', avg_vloss)
print('Confusion Matrix: ', multilabel_confusion_matrix(all_true_labels, all_predictions))

In [None]:
transposed_labels = all_true_labels_itemized.transpose()
transposed_predictions = all_predictions_itemized.transpose()

In [None]:
features = np.array(["Adam's apple", 'almond', 'arched (v-shaped)', 'bags under eyes', 'bald', 'bangs', 'beard', 'big', 'big/wide', 'black', 'blond', 
                     'buck', 'bulbous', 'bushy', 'button', 'cheekbones', 'cheeks', 'chin', 'chubby/full', 'cleft', 'crooked', 'crows feet', 'curly', 
                     'curved down', 'deep-set', 'dimples', 'dorsal hump', 'double chin', 'downturned', 'dreads', 'drooping', 'ears', 'eyebrows', 'eyelids', 
                     'eyes', 'facial hair', 'far apart', 'flared nostrils', 'flat', 'forehead', 'forward', 'freckles', 'furrowed', 'gap', 'glasses', 
                     'goatee', 'hair', 'handlebar', 'hat', 'head', 'high', 'hooded', 'hooked', 'large', 'lazy eye', 'light', 'light-colored', 'lines', 
                     'lips', 'long', 'long eyelashes', 'low', 'medial cleft', 'messy', 'mole', 'mouth', 'mustache', 'narrow', 'narrow-set', 'neck', 'nose', 
                     'overbite', 'pale', 'pierced', 'pointed', 'pointy', 'pouty/full', 'puffy', 'receded', 'receding hairline', 'red', 'red lipstick', 
                     'rough', 'round', 'rounded', 'rounded tip', 'scar', 'sharp', 'short', 'sideburns', 'skin', 'slanted down', 'slanted up', 
                     'slicked back', 'small', 'small nostrils', 'smooth', 'soul patch', 'square', 'stick out', 'straight', 'strong jawline', 'stubble', 
                     'tattoos', 'teeth', 'thick', 'thick lower', 'thin', 'thin bridge', 'thin upper', 'thin/hollow', 'trimmed', 'unibrow', 'upper lip', 
                     'upturned', 'v-shaped', 'weak jawline', 'well-defined tip', 'white', 'white streaks', 'wide', 'wide bridge', 'wide nostrils', 
                     'wide tip', 'wide-set', 'wide-x', 'widows peak', 'wrinkled'])

'''features = np.array([
    'cheekbones', 
    'cheeks', 
    'chin', 
    'ears', 
    'eyebrows', 
    'eyelids', 
    'eyes', 
    'facial hair', 
    'forehead', 
    'hair', 
    'head', 
    'lips', 
    'mouth', 
    'neck', 
    'nose', 
    'skin', 
    'teeth', 
    'upper lip'
])'''

fig, axes = plt.subplots(nrows=64, ncols=2, figsize=(15, 150))

for index, (feature, ax) in enumerate(zip(features, axes.flatten())):
    cm = confusion_matrix(transposed_labels[index],
                         transposed_predictions[index])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(ax=ax)
    ax.title.set_text(feature)

plt.show()