---

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append(sys.path[0].replace('notebooks', 'src'))

import modeling.modeling_utils as m

In [3]:
m.torch.__version__

'2.1.0+cu121'

In [4]:
m.torch.cuda.is_available() # Check for GPU

True

In [5]:
device = m.torch.device("cuda" if m.torch.cuda.is_available() else "cpu")

In [6]:
m.torch.backends.cudnn.benchmark=True # Helps optimize training w/ GPU

---

The WEBEmo dataset is fairly massive and unable to be uploaded. The ```image-gather``` notebook can be used to download all images, and this notebook contains the code to train a model using curriculum learning. 

In [7]:
train, test = m.load_data()

In [8]:
train.head()

Unnamed: 0.1,Unnamed: 0,file,lvl_three,lvl_one,lvl_two
0,0,1_220_F_83683073_O4yJOnarzTjKXuUBAgkAifmiC8d0I...,1,0,0
1,1,20_220_F_5292725_818KTy3xv82nEkNolcs2m37MOV86s...,20,1,1
2,2,20_220_F_47187567_lwYwc9UQtBK5Be6v4P7HNsCc4Hhr...,20,1,1
3,3,1_220_F_38932828_Osns7NBWCq8AhJonYpQArrToDLLhT...,1,0,0
4,4,1_220_F_97168737_y0VWy7kLMby9BO6lHDfpyfNpW9o0S...,1,0,0


In [9]:
len(train)

5001

# Level 1

<br>
For training, I split the data into 90/10 train/validation sets. 

In [10]:
train_split, val_split = m.train_val_split(X=train['file'], 
                                           y=train['lvl_one'], 
                                           test_size=.1, 
                                           random_state=713)

## Loading Data

Pytorch has some pretty neat classes to help load in data. First, I define the transforms that will be used as photos are iteratively loaded during training.

In [11]:
train_transforms = m.transforms.Compose([m.transforms.Resize(256),
                                         m.transforms.RandomCrop(224),
                                         m.transforms.ToTensor(),
                                         m.transforms.Normalize(mean=[0.485, 0.456, 0.406], #OG means/sds from imagenet
                                                                std=[0.229, 0.224, 0.225])
                                        ])
val_transforms = m.transforms.Compose([m.transforms.Resize(256),
                                       m.transforms.CenterCrop(224),
                                       m.transforms.ToTensor(),
                                       m.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                              std=[0.229, 0.224, 0.225])
                                      ])

In [12]:
# Put data into Pytorch Dataset class
l1_train_dataset = m.ImgDataset(df=train_split,
                                root_dir='../data/images/train',
                                percent_sample=1,
                                transform=train_transforms)
l1_val_dataset = m.ImgDataset(df=val_split,
                              root_dir='../data/images/train',
                              percent_sample=1,
                              transform=val_transforms)

In [13]:
# Pytorch DataLoader iteratively loads minibatches during training
l1_loaders = {'train': m.DataLoader(l1_train_dataset,
                               batch_size=32, 
                               shuffle=True,
                               #pin_memory=True, # Only use pin_memory with GPU
                               num_workers=4), 
              'val': m.DataLoader(l1_val_dataset, 
                             batch_size=32,
                             #pin_memory=True,
                             num_workers=4)}

## Training

I used the ```resnet50``` model from Pytorch as the base for the first level. While the pretrained weights were loaded to initialize training, I didn't freeze any layers. That is, I fine-tuned the ```resnet50``` model instead of using it purely for feature extraction. 

The ```resnet50``` fully-connected classifier layer is of the form ```(input-features, output classes)```, with the original ```output classes``` being 1000 for imagenet; this is simply changed to 2 for the first level.

In [14]:
from torch.optim import lr_scheduler
import torch.optim as optim


In [15]:
# Get everything set up
l1_model = m.models.resnet50(pretrained=True)
num_ftrs = l1_model.fc.in_features
l1_model.fc = m.nn.Linear(num_ftrs, 2) 
l1_model = l1_model.to(device)
l1_criterion = m.nn.CrossEntropyLoss().to(device)
l1_optim = m.torch.optim.SGD(l1_model.parameters(), 
                                   lr=0.01, 
                                   momentum=0.9, 
                                   weight_decay=0.0001)
# Lower learning rate after 5 epochs of no validation loss
l1_scheduler = lr_scheduler.ReduceLROnPlateau(l1_optim, patience=5)



In [21]:
# Train
l1_model_train = m.train_model(model=l1_model, 
                             dataloader=l1_loaders, 
                             criterion=l1_criterion, 
                             optimizer=l1_optim,
                             save_path='../models/l1model.tar',
                             num_epochs=3,
                             scheduler=l1_scheduler,
                             early_stopping=m.EarlyStopping(patience=10))

Epoch 0/2
----------
train Loss: 0.6025 Acc: 0.6784
val Loss: 0.6866 Acc: 0.5808
saving model - best loss
0.602476218117608
0.6866253315807579
tensor(0.6784, device='cuda:0', dtype=torch.float64)
tensor(0.5808, device='cuda:0', dtype=torch.float64)
logs.csv

Epoch 1/2
----------


  log_df = log_df.append(pd.DataFrame({


train Loss: 0.5632 Acc: 0.7047
val Loss: 0.7425 Acc: 0.5948
EarlyStopping counter: 1 out of 10
0.5632485968536801
0.7424675071548797
tensor(0.7047, device='cuda:0', dtype=torch.float64)
tensor(0.5948, device='cuda:0', dtype=torch.float64)
logs.csv

Epoch 2/2
----------


  log_df = log_df.append(pd.DataFrame({


train Loss: 0.5250 Acc: 0.7376
val Loss: 0.8201 Acc: 0.5888
EarlyStopping counter: 2 out of 10
0.5250270613034567
0.8201097339213251
tensor(0.7376, device='cuda:0', dtype=torch.float64)
tensor(0.5888, device='cuda:0', dtype=torch.float64)
logs.csv

Training complete in 3m 10s


  log_df = log_df.append(pd.DataFrame({


Best val Acc: 0.580838
Logs saved to ../models/l1model.tar_logs.csv


# Level 2

The idea behind curriculum learning is to sequentially expose the model to more complex discriminative tasks in increasing difficulty. Level 2 contains the exact same images, but now, there are 6 classes to predict. Below, the level 1 model is initialized, and only 2 things are different from the level 1 training:
1. The learning rate is 1/10 that of level 1 (i.e., 0.001 instead of 0.01)
2. The fully-connected layers are modified to classify level 2 labels (i.e., 6 instead of 2 possibilities)
<br>

First, I make new dataset/dataloader classes for the level-2 data.

In [22]:
l2_train_split = train_split.merge(train, left_on = 'file', right_on = 'file')[['file', 'lvl_two']]
l2_val_split = val_split.merge(train, left_on = 'file', right_on = 'file')[['file', 'lvl_two']]

In [23]:
l2_train_dataset = m.ImgDataset(df=l2_train_split,
                                root_dir='../data/images/train',
                                percent_sample=1,
                                transform=train_transforms)
l2_val_dataset = m.ImgDataset(df=l2_val_split,
                              root_dir='../data/images/train',
                              percent_sample=1,
                              transform=val_transforms)

The level-2 and level-3 data also suffer from class imbalance. To deal with that during training, I use Pytorch's ```WeightedRandomSampler```. By assigning weights to each class, the minibatches become approximately evenly distributed among the classes.

In [24]:
l2_samples_weights = m.weighted_sample(l2_train_split, 'lvl_two')
l2_weighted_sampler = m.WeightedRandomSampler(weights=l2_samples_weights, num_samples=len(l2_samples_weights))

In [25]:
l2_loaders = {'train': m.DataLoader(l2_train_dataset,
                                    batch_size=32, 
                                    sampler=l2_weighted_sampler,
                                    #pin_memory=True, 
                                    num_workers=4),
              'val': m.DataLoader(l2_val_dataset, 
                             batch_size=32,
                             #pin_memory=True,
                             num_workers=4)}

In [28]:
# Initialize model 
l2_model = m.load_model(path='../models/l1model.tar', 
                        base=m.models.resnet50(pretrained=False), 
                        old_classes=2, 
                        new_classes=6, 
                        device=device)
l2_criterion = m.nn.CrossEntropyLoss().to(device)
l2_optim = m.torch.optim.SGD(l2_model.parameters(), 
                                   lr=0.001, 
                                   momentum=0.9, 
                                   weight_decay=0.0001)
l2_scheduler = lr_scheduler.ReduceLROnPlateau(l2_optim, patience=5)

In [30]:
# Train
l2_model_train = m.train_model(model=l2_model, 
                             dataloader=l2_loaders, 
                             criterion=l2_criterion, 
                             optimizer=l2_optim,
                             save_path='../models/l2model.tar',
                             num_epochs=2,
                             scheduler=l2_scheduler,
                             early_stopping=m.EarlyStopping(patience=10))

Epoch 0/1
----------
train Loss: 1.7523 Acc: 0.2289
val Loss: 1.7779 Acc: 0.2196
saving model - best loss
1.7522641068564522
1.7779437163156901
tensor(0.2289, device='cuda:0', dtype=torch.float64)
tensor(0.2196, device='cuda:0', dtype=torch.float64)
logs.csv

Epoch 1/1
----------


  log_df = log_df.append(pd.DataFrame({


train Loss: 1.6312 Acc: 0.2871
val Loss: 1.7849 Acc: 0.2635
EarlyStopping counter: 1 out of 10
1.6312431082195706
1.784878307949759
tensor(0.2871, device='cuda:0', dtype=torch.float64)
tensor(0.2635, device='cuda:0', dtype=torch.float64)
logs.csv

Training complete in 2m 12s


  log_df = log_df.append(pd.DataFrame({


Best val Acc: 0.219561
Logs saved to ../models/l2model.tar_logs.csv


# Level 3

The procedural modifications from level 2 to level 3 are minimal. I still make new dataset/dataloader classes, a weighted sampler, and initialize from the optimal model 2. There are only 2 major changes:
1. The learning rate is lowered to 0.0001
2. The fully-connected layer now outputs 25 instead of 6

In [31]:
l3_train_split = train_split.merge(train, left_on = 'file', right_on = 'file')[['file', 'lvl_three']]
l3_val_split = val_split.merge(train, left_on = 'file', right_on = 'file')[['file', 'lvl_three']]

In [32]:
l3_train_dataset = m.ImgDataset(df=l3_train_split,
                                root_dir='../data/images/train',
                                percent_sample=1,
                                transform=train_transforms)
l3_val_dataset = m.ImgDataset(df=l3_val_split,
                              root_dir='../data/images/train',
                              percent_sample=1,
                              transform=val_transforms)

In [33]:
l3_samples_weights = m.weighted_sample(l3_train_split, 'lvl_three')
l3_weighted_sampler = m.WeightedRandomSampler(weights=l3_samples_weights, num_samples=len(l3_samples_weights))

In [34]:
l3_loaders = {'train': m.DataLoader(l3_train_dataset,
                               batch_size=32, 
                               sampler=l3_weighted_sampler,
                               #pin_memory=True,
                               num_workers=4),
              'val': m.DataLoader(l3_val_dataset, 
                             batch_size=32,
                             #pin_memory=True,
                             num_workers=4)}

In [35]:
l3_model = m.load_model(path='../models/l2model.tar', 
                        base=m.models.resnet50(pretrained=False), 
                        old_classes=6, 
                        new_classes=25, 
                        device=device)
l3_criterion = m.nn.CrossEntropyLoss().to(device)
l3_optim = m.torch.optim.SGD(l3_model.parameters(), 
                                   lr=0.0001, 
                                   momentum=0.9, 
                                   weight_decay=0.0001)
l3_scheduler = lr_scheduler.ReduceLROnPlateau(l3_optim, patience=5)



In [36]:
# Train
l3_model_train = m.train_model(model=l3_model, 
                             dataloader=l3_loaders, 
                             criterion=l3_criterion, 
                             optimizer=l3_optim,
                             save_path='../models/l3model.tar',
                             num_epochs=2,
                             scheduler=l3_scheduler,
                             early_stopping=m.EarlyStopping(patience=10))

Epoch 0/1
----------
train Loss: 3.2189 Acc: 0.0422
val Loss: 3.2143 Acc: 0.0399
saving model - best loss
3.2189214600457086
3.2142922292926355
tensor(0.0422, device='cuda:0', dtype=torch.float64)
tensor(0.0399, device='cuda:0', dtype=torch.float64)
logs.csv

Epoch 1/1
----------


  log_df = log_df.append(pd.DataFrame({


train Loss: 3.2112 Acc: 0.0540
val Loss: 3.2095 Acc: 0.0339
saving model - best loss
3.211155111948649
3.2094635002151457
tensor(0.0540, device='cuda:0', dtype=torch.float64)
tensor(0.0339, device='cuda:0', dtype=torch.float64)
logs.csv

Training complete in 2m 6s


  log_df = log_df.append(pd.DataFrame({


Best val Acc: 0.033932
Logs saved to ../models/l3model.tar_logs.csv


In [40]:
from PIL import Image
import torch
import torchvision.transforms as transforms

# Define a function to predict an image using the trained model
def predict_image(model, image_path, transforms, device):
    """
    Predict the class of a single image.

    Args:
        model (torch.nn.Module): Trained PyTorch model.
        image_path (str): Path to the image file.
        transforms (torchvision.transforms.Compose): Transformations to apply to the image.
        device (torch.device): Device to use for computation.

    Returns:
        int: Predicted class label.
        torch.Tensor: Class probabilities.
    """
    # Load the image
    image = Image.open(image_path).convert("RGB")
    
    # Apply transformations
    image_tensor = transforms(image).unsqueeze(0)  # Add batch dimension
    
    # Move the image tensor to the specified device
    image_tensor = image_tensor.to(device)
    
    # Set the model to evaluation mode
    model.eval()
    
    # Disable gradient calculations for inference
    with torch.no_grad():
        # Get predictions
        outputs = model(image_tensor)
        
        # Convert logits to probabilities
        probabilities = torch.softmax(outputs, dim=1)
        
        # Get the predicted class
        predicted_class = torch.argmax(probabilities, dim=1).item()
    
    return predicted_class, probabilities

# Specify the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Path to the image
image_path = "E:\\fiverr\\Garry_deol_eye_tracking\\emotion_prediction\\Pixels-to-Feelings\\data\\images\\test\\0_220_F_24870_DFd91XhrsOYD0OXCMWgLV9ggisNCrs.jpg"

# Predict the image
predicted_class, probabilities = predict_image(
    model=l1_model,
    image_path=image_path,
    transforms=val_transforms,
    device=device
)

# Display the result
print(f"Predicted class: {predicted_class}")
print(f"Class probabilities: {probabilities}")


Predicted class: 0
Class probabilities: tensor([[0.7555, 0.2445]], device='cuda:0')
