# Ergonomic pose fine-tuning: 

## Seed Hyperparameter Optimization

In [142]:
from mmpose.apis import MMPoseInferencer
from ultralytics import YOLO
import numpy as np
from PIL import Image
import torch
from functools import partial
from torch.utils.data import random_split, DataLoader, Subset, TensorDataset
from torchvision import datasets, transforms
import cv2
import time

data_path_scores = 'E:/Users/Vipin/Documents/BHT/3. Semester/Learning from images/Pose Dataset Complete'
data_path_grouped = 'E:/Users/Vipin/Documents/BHT/3. Semester/Learning from images/Pose Dataset_binned'

yolo_kp_params_classifier = {'lr': 0.0001, 'h1': 512, 'h2': 1024, 'batch_size': 4, 'num_epochs': 300}
yolo_kp_params_scorer = {'lr': 0.001, 'h1': 256, 'h2': 256, 'batch_size': 16, 'num_epochs': 200}
mmpose_kp_params_classifier = {'lr': 0.001, 'h1': 1024, 'h2': 512, 'batch_size': 16, 'num_epochs': 100}
mmpose_kp_params_scorer = {'lr': 5e-05, 'h1': 512, 'h2': 1024, 'batch_size': 4, 'num_epochs': 200}

def set_seed(seed):
    torch.manual_seed(seed) 
    np.random.seed(seed)
    torch.use_deterministic_algorithms(True)
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = True

In [143]:
def get_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    elif torch.backends.mps.is_available():
        return torch.device("mps")
    else:
        return torch.device("cpu")
    
get_device()

device(type='cuda')

In [144]:
class PILToNumpyTransform:
    def __call__(self, pil_img):
        """
        Convert a PIL Image to an OpenCV Image / Numpy Array.

        Parameters:
            pil_img (PIL.Image): The PIL Image to be converted.

        Returns:
            np.ndarray: The converted OpenCV Image in RGB format.
        """
        # Convert PIL Image to NumPy array (RGB)
        img_array = np.array(pil_img)
        img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)

        return img_array

data_transforms = transforms.Compose([
    transforms.Resize((480, 640)), # Resize images to (480, 640)
    transforms.ToTensor(), # Convert images to PyTorch tensors
])

own_dataset_scores = datasets.ImageFolder(root=data_path_scores, transform=data_transforms)
display(own_dataset_scores)
own_dataset_grouped = datasets.ImageFolder(root=data_path_grouped, transform=data_transforms)
display(own_dataset_grouped)

Dataset ImageFolder
    Number of datapoints: 118
    Root location: E:/Users/Vipin/Documents/BHT/3. Semester/Learning from images/Pose Dataset Complete
    StandardTransform
Transform: Compose(
               Resize(size=(480, 640), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )

Dataset ImageFolder
    Number of datapoints: 118
    Root location: E:/Users/Vipin/Documents/BHT/3. Semester/Learning from images/Pose Dataset_binned
    StandardTransform
Transform: Compose(
               Resize(size=(480, 640), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )

In [145]:
generator1 = torch.Generator().manual_seed(13)  # set seed for reproducibility of the split
train_and_val_dataset_scores, test_dataset_scores = random_split(own_dataset_scores, [0.8, 0.2], generator=generator1)  # 80% training and evaluation, 20% testing

train_and_val_dataset_grouped, test_dataset_grouped = random_split(own_dataset_grouped, [0.8, 0.2], generator=generator1)  # 80% training and evaluation, 20% testing

### Define the models

In [146]:
import torch
import torch.nn as nn

class KeypointClassifier(nn.Module):
    def __init__(self, num_keypoints, num_classes, h1, h2):
        super(KeypointClassifier, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(num_keypoints*2, h1),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.layer2 = nn.Sequential(
            nn.Linear(h1, h2),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.layer3 = nn.Sequential(
            nn.Linear(h2, h2 // 2),
            nn.ReLU()
        )
        self.dense = nn.Sequential(
            nn.Linear(h2 // 2, num_classes)
        )
    
    def forward(self, keypoints_flattened):
        x = self.layer1(keypoints_flattened)
        x = self.layer2(x)
        x = self.layer3(x)
        output = self.dense(x)
        
        return output

class KeypointScorer(nn.Module):
    def __init__(self, num_keypoints, h1, h2):
        super(KeypointScorer, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(num_keypoints*2, h1),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.layer2 = nn.Sequential(
            nn.Linear(h1, h2),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.layer3 = nn.Sequential(
            nn.Linear(h2, h2 // 2),
            nn.ReLU()
        )
        self.dense = nn.Sequential(
            nn.Linear(h2 // 2, 1)
        )
    
    def forward(self, keypoints_flattened):
        x = self.layer1(keypoints_flattened)
        x = self.layer2(x)
        x = self.layer3(x)
        output = self.dense(x)
        
        return output

In [147]:
def tensor_to_cv2_image(tensor):
    """
    Converts a PyTorch tensor to an OpenCV image.
    
    Parameters:
    - tensor: A PyTorch tensor, in the format CxHxW with values normalized to [0, 1].
    
    Returns:
    - An OpenCV image, in BGR format.
    """
    
    # Denormalize the tensor and convert it to a numpy array
    image = tensor.mul(255).byte().permute(2, 1, 0).cpu().numpy()
    image = np.fliplr(image)
    
    # Convert the color space from RGB to BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    return image


def get_keypoints_from_mmpose(model, inputs):
    with torch.no_grad(): # gradients are not computed for the frozen model
        results = []
        for input in inputs:
            start_time = time.perf_counter()
            
            cv2_img = tensor_to_cv2_image(input)
            result = list(next(model(cv2_img, show=False)).values())
            keypoints = result[1][0][0]['keypoints']
            
            ellapsed_time_ms = (time.perf_counter() - start_time) * 1000
            print(f"Image processed in {ellapsed_time_ms:.2f} ms")
            results.append(torch.tensor(keypoints))
    return torch.stack(results)

def get_keypoints_from_yolo(yolo_model, inputs):
    with torch.no_grad():
        results = []
        for input in inputs:
            result = yolo_model(input.unsqueeze(0))[0].keypoints.xy[0].to(input.device)
            if result.shape[0] < 17:
                result = torch.cat([result, torch.zeros(17 - result.shape[0], 2).to(input.device)], dim = 0)
            
            results.append(result)
    return torch.stack(results)

processing keypoints:

In [148]:
# z-Value
def process_keypoints_for_model(keypoints):
    # Calculate mean and standard deviation with keepdim=True to preserve broadcasting compatibility
    mean_vals = keypoints.mean(dim=1, keepdim=True)
    std_vals = keypoints.std(dim=1, keepdim=True) + 1e-6  # Adding a small epsilon to prevent division by zero

    # Perform standardization
    kp_tensor_standardized = (keypoints - mean_vals) / std_vals
    
    # Flatten the last two dimensions while keeping the batch dimension
    batch_flattened = kp_tensor_standardized.view(keypoints.size(0), -1)
    
    return batch_flattened

#### training the model:

- with cross-entropy loss function (fits our classification task)
- Adam optimizer

HPO:

In [149]:
import optuna
from sklearn.metrics import f1_score, confusion_matrix
import numpy as np

In [150]:
def preprocess_dataset(kp_detection_model, kp_detection_func, dataset, device):
    processed_keypoints_list = []
    labels_list = []
    
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
    
    # Assuming `dataset` is an iterable of (input, label) pairs
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        keypoints = kp_detection_func(kp_detection_model, inputs)  # Extract keypoints        
        processed_kps = process_keypoints_for_model(keypoints)  # Process keypoints        
        processed_keypoints_list.append(processed_kps)
        labels_list.append(labels)

    # Convert lists to tensors
    processed_keypoints_tensor = torch.cat(processed_keypoints_list, dim=0)
    labels_tensor = torch.cat(labels_list, dim=0)
    
    # Create a new TensorDataset and DataLoader
    preprocessed_dataset = TensorDataset(processed_keypoints_tensor, labels_tensor)
    
    return preprocessed_dataset
    
def train_and_eval_model(kp_model, optimizer, loss_fn, num_epochs, train_loader, val_loader, device):
    
    kp_model = kp_model.to(device)  # move model to device
    kp_model.train()  # set model to training mode
    
    print("type of train_loader", type(train_loader))

    for epoch in range(num_epochs):
        for batch in train_loader:
            # access images and labels
            inputs = batch[0].to(device)
            labels = batch[1].to(device)
            
            optimizer.zero_grad()  # Zero the parameter gradients
            classification_output = kp_model(inputs) # get results for the classification
            
            # Use this for classification
            # loss = loss_fn(classification_output, labels)
            
            # Use this for scoring
            loss = loss_fn(classification_output.float(), labels.float())
            
            loss.backward()  # Backpropagate the loss
            optimizer.step()  # Update weights

            # todo: further processing, such as calculating accuracy or loss, goes here

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
            
        
        kp_model.eval()  # model to evaluation mode

        total_loss = 0
        all_predictions = []
        all_labels = []

        with torch.no_grad():  # no need to compute gradients, because we are in evaluation mode
            for inputs, labels in val_loader:  # iterate over validation dataset
                inputs, labels = inputs.to(device), labels.to(device)  # move data to device
                
                # Not necessary because already preprocessed
                # keypoints = get_keypoints_from_yolo(yolo_model, inputs) # get keypoints from the YOLO model
                # processed_kps = process_keypoints_for_classifier(keypoints) # prepare the keypoints for the classifier
                
                classification_output = kp_model(inputs) # get results for the classification 
                
                # Use this for classification
                # loss = loss_fn(classification_output, labels)
                
                # Use this for scoring
                loss = loss_fn(classification_output.float(), labels.float())
                
                total_loss += loss.item()  # accumulate the loss
                # get predictions for output
                _, predicted = torch.max(classification_output.data, 1)
                # collect the predictions and labels
                all_predictions.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
            # calculate the validation metrics:
            avg_loss = total_loss / len(val_loader)  # get the average loss
            # accuracy = (np.array(all_predictions) == np.array(all_labels)).mean()
            # f1 = f1_score(all_labels, all_predictions, average='weighted')  
            # conf_matrix = confusion_matrix(all_labels, all_predictions)
            
            print(f"Validation Loss: {avg_loss:.4f}")
            # print(f"Validation Accuracy: {accuracy:.4f}")
            # print(f"Validation F1 Score: {f1:.4f}")
            # print("Confusion Matrix:")
            # print(conf_matrix)
            
            # here: F1 score chosen as the metric to optimize
            # other options: - combining metrics like accuracy and F1 score to maximize on both
            #                        - or multi-objective optimization on F1 score and accuracy
    return avg_loss 

In [151]:
# this function was generated by gpt-4

def get_k_fold_indices(n, k=5, random_seed=None):
    """
    Generate indices for k-fold cross-validation.

    Parameters:
    - n: Total number of samples in the dataset.
    - k: Number of folds.
    - random_seed: Optional seed for reproducibility.

    Returns:
    - A list of tuples, each containing (train_indices, val_indices) for a fold.
    """
    # Initialize the random generator
    g = torch.Generator()
    if random_seed is not None:
        g.manual_seed(random_seed)
    
    # Generate a random permutation of indices
    indices = torch.randperm(n, generator=g).tolist()
    
    # Calculate fold sizes
    fold_sizes = [n // k for _ in range(k)]
    for i in range(n % k):
        fold_sizes[i] += 1
    
    # Generate train and validation indices for each fold
    current = 0
    folds_indices = []
    for fold_size in fold_sizes:
        start, end = current, current + fold_size
        val_indices = indices[start:end]
        train_indices = indices[:start] + indices[end:]
        folds_indices.append((train_indices, val_indices))
        current = end
    
    return folds_indices


In [152]:
def objective(trial, params, dataset, num_keypoints, num_classes):
    # Define hyperparameters to optimize
    lr = params["lr"]
    h1 = params["h1"]
    h2 = params["h2"]
    batch_size = params["batch_size"]
    num_epochs = params["num_epochs"]
    seed = trial.suggest_categorical("seed", [13, 23, 42, 2000, 1986])

    validation_scores = []

    n = len(dataset)
    k = 5
    folds_indices = get_k_fold_indices(n, k, random_seed=13)
    device = get_device()
    
    set_seed(seed)
    
    for fold, (train_idx, val_idx) in enumerate(folds_indices, start=1):
        print(f"Trial {trial.number}, Fold {fold}/{k}")
        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)
        
        # Create data loaders for training and validation
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
        
        # Initializing the model and optimizer with the chosen hyperparameters
        kp_model = KeypointScorer(num_keypoints, h1, h2).to(device)
        # kp_model = KeypointClassifier(num_keypoints=num_keypoints, num_classes=num_classes, h1=h1, h2=h2).to(device)
        optimizer = torch.optim.Adam(kp_model.parameters(), lr=lr, weight_decay=1e-5)
    
        # training and evaluating the model - Watch out for the Loss - CrossEntropyLoss for classification and MSELoss for scoring
        validation_score = train_and_eval_model(kp_model, optimizer, nn.MSELoss(), num_epochs, train_loader, val_loader, device)
        validation_scores.append(validation_score)

    return np.mean(validation_scores)


In [153]:
print(yolo_kp_params_classifier)
print(yolo_kp_params_scorer)
print(mmpose_kp_params_classifier)
print(mmpose_kp_params_scorer)

{'lr': 0.0001, 'h1': 512, 'h2': 1024, 'batch_size': 4, 'num_epochs': 300}
{'lr': 0.001, 'h1': 256, 'h2': 256, 'batch_size': 16, 'num_epochs': 200}
{'lr': 0.001, 'h1': 1024, 'h2': 512, 'batch_size': 16, 'num_epochs': 100}
{'lr': 5e-05, 'h1': 512, 'h2': 1024, 'batch_size': 4, 'num_epochs': 200}


In [154]:
mmpose_model = MMPoseInferencer('rtmw-m_8xb1024-270e_cocktail14-256x192')
yolo_model = YOLO('yolov8n-pose.pt')
preprocessed_dataset = preprocess_dataset(mmpose_model, get_keypoints_from_mmpose, train_and_val_dataset_grouped, get_device())
optimize = partial(objective, params = mmpose_kp_params_scorer, dataset = preprocessed_dataset, num_keypoints = 133, num_classes = 3)
search_space = {
    "seed" : [13, 23, 42, 2000, 1986]
}

study = optuna.create_study(sampler=optuna.samplers.GridSampler(search_space), direction='minimize')
study.optimize(optimize, n_trials=5)

# Best hyperparameters
print("Best hyperparameters:", study.best_params)

Loads checkpoint by http backend from path: https://download.openmmlab.com/mmpose/v1/projects/rtmw/rtmw-dw-l-m_simcc-cocktail14_270e-256x192-20231122.pth


Loads checkpoint by http backend from path: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth
Image processed in 57.21 ms
Image processed in 52.74 ms
Image processed in 58.17 ms
Image processed in 58.81 ms
Image processed in 57.61 ms
Image processed in 56.05 ms
Image processed in 56.89 ms
Image processed in 54.54 ms
Image processed in 56.29 ms
Image processed in 57.44 ms
Image processed in 58.57 ms
Image processed in 58.83 ms
Image processed in 61.39 ms
Image processed in 58.46 ms
Image processed in 58.16 ms
Image processed in 58.24 ms
Image processed in 59.02 ms
Image processed in 54.46 ms
Image processed in 59.20 ms
Image processed in 58.30 ms
Image processed in 55.78 ms
Image processed in 54.65 ms
Image processed in 54.29 ms
Image processed in 55.82 ms
Image processed in 63.05 ms
Image processed in 56.82 ms
Image processed in 59.20 ms
Image processed in 57.62 ms
Image processed in 56.20 ms
Image processed in 54.70 ms
Imag

[I 2024-03-24 14:56:04,509] A new study created in memory with name: no-name-24fc9785-f352-4752-a81b-f2a52f3ad604
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Image processed in 60.59 ms
Trial 0, Fold 1/5
type of train_loader <class 'torch.utils.data.dataloader.DataLoader'>
Epoch [1/200], Loss: 0.2181
Validation Loss: 1.1818
Epoch [2/200], Loss: 1.0941
Validation Loss: 0.5936
Epoch [3/200], Loss: 0.7305
Validation Loss: 0.5773
Epoch [4/200], Loss: 0.9800
Validation Loss: 0.6427
Epoch [5/200], Loss: 0.8264
Validation Loss: 0.5756
Epoch [6/200], Loss: 1.1656
Validation Loss: 0.5655
Epoch [7/200], Loss: 0.5080
Validation Loss: 0.6152
Epoch [8/200], Loss: 0.7013
Validation Loss: 0.5827
Epoch [9/200], Loss: 0.6209
Validation Loss: 0.6121
Epoch [10/200], Loss: 0.4793
Validation Loss: 0.5794
Epoch [11/200], Loss: 0.2412
Validation Loss: 0.5849
Epoch [12/200], Loss: 1.0139
Validation Loss: 0.6128
Epoch [13/200], Loss: 0.3620
Validation Loss: 0.5961
Epoch [14/200], Loss: 0.2910
Validation Loss: 0.5842
Epoch [15/200], Loss: 1.0111
Validation Loss: 0.5994
Epoch [16/200], Loss: 0.7277
Validation Loss: 0.6241
Epoch [17/200], Loss: 0.8474
Validation Loss:

[I 2024-03-24 14:56:40,837] Trial 0 finished with value: 0.6419476521015166 and parameters: {'seed': 23}. Best is trial 0 with value: 0.6419476521015166.


Validation Loss: 0.7142
Trial 1, Fold 1/5
type of train_loader <class 'torch.utils.data.dataloader.DataLoader'>
Epoch [1/200], Loss: 0.7589
Validation Loss: 1.1801
Epoch [2/200], Loss: 0.0183
Validation Loss: 0.5899
Epoch [3/200], Loss: 0.2099
Validation Loss: 0.5896
Epoch [4/200], Loss: 0.5096
Validation Loss: 0.5903
Epoch [5/200], Loss: 0.4292
Validation Loss: 0.5835
Epoch [6/200], Loss: 0.7653
Validation Loss: 0.6521
Epoch [7/200], Loss: 0.6864
Validation Loss: 0.6096
Epoch [8/200], Loss: 0.2842
Validation Loss: 0.5806
Epoch [9/200], Loss: 0.5164
Validation Loss: 0.5990
Epoch [10/200], Loss: 0.4444
Validation Loss: 0.6199
Epoch [11/200], Loss: 0.8534
Validation Loss: 0.6112
Epoch [12/200], Loss: 0.5188
Validation Loss: 0.5774
Epoch [13/200], Loss: 0.0160
Validation Loss: 0.5947
Epoch [14/200], Loss: 0.5183
Validation Loss: 0.5854
Epoch [15/200], Loss: 0.7456
Validation Loss: 0.5673
Epoch [16/200], Loss: 0.3490
Validation Loss: 0.5663
Epoch [17/200], Loss: 0.4464
Validation Loss: 0.5

[I 2024-03-24 14:57:17,142] Trial 1 finished with value: 0.6378273200988769 and parameters: {'seed': 2000}. Best is trial 1 with value: 0.6378273200988769.


Trial 2, Fold 1/5
type of train_loader <class 'torch.utils.data.dataloader.DataLoader'>
Epoch [1/200], Loss: 0.7544
Validation Loss: 1.4823
Epoch [2/200], Loss: 0.7858
Validation Loss: 0.8014
Epoch [3/200], Loss: 1.2242
Validation Loss: 0.5614
Epoch [4/200], Loss: 0.7171
Validation Loss: 0.6035
Epoch [5/200], Loss: 1.0102
Validation Loss: 0.6020
Epoch [6/200], Loss: 1.0119
Validation Loss: 0.5967
Epoch [7/200], Loss: 0.5356
Validation Loss: 0.5824
Epoch [8/200], Loss: 0.9809
Validation Loss: 0.6210
Epoch [9/200], Loss: 0.5067
Validation Loss: 0.5908
Epoch [10/200], Loss: 0.6810
Validation Loss: 0.6115
Epoch [11/200], Loss: 0.2137
Validation Loss: 0.5642
Epoch [12/200], Loss: 0.5245
Validation Loss: 0.6000
Epoch [13/200], Loss: 0.7570
Validation Loss: 0.6469
Epoch [14/200], Loss: 0.3786
Validation Loss: 0.5754
Epoch [15/200], Loss: 0.5179
Validation Loss: 0.5928
Epoch [16/200], Loss: 0.7519
Validation Loss: 0.5984
Epoch [17/200], Loss: 0.5133
Validation Loss: 0.5823
Epoch [18/200], Loss

[I 2024-03-24 14:57:53,557] Trial 2 finished with value: 0.6394465655088425 and parameters: {'seed': 42}. Best is trial 1 with value: 0.6378273200988769.


Trial 3, Fold 1/5
type of train_loader <class 'torch.utils.data.dataloader.DataLoader'>
Epoch [1/200], Loss: 0.1863
Validation Loss: 1.2265
Epoch [2/200], Loss: 1.0102
Validation Loss: 0.6030
Epoch [3/200], Loss: 0.6361
Validation Loss: 0.6007
Epoch [4/200], Loss: 0.7181
Validation Loss: 0.6078
Epoch [5/200], Loss: 0.7864
Validation Loss: 0.5980
Epoch [6/200], Loss: 0.4150
Validation Loss: 0.6262
Epoch [7/200], Loss: 0.2817
Validation Loss: 0.6238
Epoch [8/200], Loss: 0.4085
Validation Loss: 0.6240
Epoch [9/200], Loss: 0.5068
Validation Loss: 0.6229
Epoch [10/200], Loss: 0.2996
Validation Loss: 0.6209
Epoch [11/200], Loss: 0.2733
Validation Loss: 0.6297
Epoch [12/200], Loss: 0.4404
Validation Loss: 0.6072
Epoch [13/200], Loss: 0.2746
Validation Loss: 0.6501
Epoch [14/200], Loss: 0.8495
Validation Loss: 0.6035
Epoch [15/200], Loss: 0.7611
Validation Loss: 0.6330
Epoch [16/200], Loss: 0.6606
Validation Loss: 0.6494
Epoch [17/200], Loss: 0.5289
Validation Loss: 0.5905
Epoch [18/200], Loss

[I 2024-03-24 14:58:30,114] Trial 3 finished with value: 0.6268693971633912 and parameters: {'seed': 1986}. Best is trial 3 with value: 0.6268693971633912.


Epoch [199/200], Loss: 1.0349
Validation Loss: 0.6950
Epoch [200/200], Loss: 0.3874
Validation Loss: 0.6998
Trial 4, Fold 1/5
type of train_loader <class 'torch.utils.data.dataloader.DataLoader'>
Epoch [1/200], Loss: 0.7388
Validation Loss: 1.1845
Epoch [2/200], Loss: 0.7277
Validation Loss: 0.6033
Epoch [3/200], Loss: 0.7744
Validation Loss: 0.5493
Epoch [4/200], Loss: 1.0019
Validation Loss: 0.6202
Epoch [5/200], Loss: 0.5095
Validation Loss: 0.6073
Epoch [6/200], Loss: 1.0067
Validation Loss: 0.6057
Epoch [7/200], Loss: 0.4307
Validation Loss: 0.5892
Epoch [8/200], Loss: 0.8197
Validation Loss: 0.6441
Epoch [9/200], Loss: 0.5334
Validation Loss: 0.5795
Epoch [10/200], Loss: 0.7221
Validation Loss: 0.5905
Epoch [11/200], Loss: 0.7214
Validation Loss: 0.6064
Epoch [12/200], Loss: 0.5729
Validation Loss: 0.5830
Epoch [13/200], Loss: 0.7743
Validation Loss: 0.5971
Epoch [14/200], Loss: 0.9830
Validation Loss: 0.5682
Epoch [15/200], Loss: 0.4790
Validation Loss: 0.6105
Epoch [16/200], Lo

[I 2024-03-24 14:59:06,352] Trial 4 finished with value: 0.6461048281192779 and parameters: {'seed': 13}. Best is trial 3 with value: 0.6268693971633912.


Epoch [199/200], Loss: 1.0794
Validation Loss: 0.7282
Epoch [200/200], Loss: 0.2101
Validation Loss: 0.6955
Best hyperparameters: {'seed': 1986}


In [155]:
best_params = study.best_params
best_params

{'seed': 1986}

In [156]:
best_params = study.best_params
# extract best_parameters
batch_size = best_params['batch_size']
lr = best_params['lr']
h1 = best_params['h1']
h2 = best_params['h2']
num_epochs = best_params['num_epochs']

KeyError: 'batch_size'

#### train and evaluate final model on test set:

Unfortunately Optuna cannot output the best model, so we train again on the combined train and validation set with the best parameters found.

dataloader for final evaluation:

In [None]:
train_and_eval_loader = preprocess_dataset(mmpose_model, train_and_val_dataset, get_device())
test_loader = preprocess_dataset(mmpose_model, test_dataset, get_device())

Image processed in 112.85 ms
Image processed in 62.68 ms
Image processed in 61.85 ms
Image processed in 58.51 ms
Image processed in 62.82 ms
Image processed in 63.22 ms
Image processed in 60.42 ms
Image processed in 60.82 ms
Image processed in 54.75 ms
Image processed in 57.09 ms
Image processed in 58.36 ms
Image processed in 58.68 ms
Image processed in 63.09 ms
Image processed in 62.29 ms
Image processed in 63.19 ms
Image processed in 60.65 ms
Image processed in 58.50 ms
Image processed in 64.25 ms
Image processed in 66.18 ms
Image processed in 63.61 ms
Image processed in 60.31 ms
Image processed in 61.46 ms
Image processed in 61.57 ms
Image processed in 61.95 ms
Image processed in 61.38 ms
Image processed in 62.43 ms
Image processed in 58.98 ms
Image processed in 57.29 ms
Image processed in 58.04 ms
Image processed in 53.93 ms
Image processed in 61.89 ms
Image processed in 60.12 ms
Image processed in 60.72 ms
Image processed in 52.80 ms
Image processed in 62.11 ms
Image processed in 

final evaluation on test set:

In [None]:
# new instance of the model:
kp_model = KeypointScorer(num_keypoints, h1, h2)
# kp_model = KeypointClassifier(num_keypoints, num_classes, h1, h2)
kp_model = kp_model.to(get_device())  # move model to device
kp_model.train()  # set model to training mode

# loss function and optimizer
criterion = nn.MSELoss() # nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(kp_model.parameters(), lr=lr, weight_decay=1e-5)

for epoch in range(num_epochs):
    for inputs, labels in train_and_eval_loader:  # Assuming data_loader is your DataLoader instance for the dataset
        inputs, labels = inputs.to(get_device()), labels.to(get_device())  # move data to device
        optimizer.zero_grad()  # Zero the parameter gradients
        classification_output = kp_model(inputs) # get results for the classification 
        
        # Use this for classification
        # loss = criterion(classification_output, labels)
        
        # Use this for scoring
        loss = criterion(classification_output.float(), labels.float())
        
        loss.backward()  # Backpropagate the loss
        optimizer.step()  # Update weights

        # todo: further processing, such as calculating accuracy or loss, goes here

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
    
kp_model.eval()  # model to evaluation mode

total_loss = 0
all_predictions = []
all_labels = []
with torch.no_grad():  # no need to compute gradients, because we are in evaluation mode
    for inputs, labels in test_loader:  # iterate over validation dataset
        inputs, labels = inputs.to(get_device()), labels.to(get_device())  # move data to device
        classification_output = kp_model(inputs) # get results for the classification 
        
        # Use this for classification
        loss = criterion(classification_output, labels)
        
        # Use this for scoring
        # loss = criterion(classification_output.float(), labels.float())
        
        total_loss += loss.item()  # accumulate the loss
        
        if classification_output.data.dim() == 1:
            classification_output.data = classification_output.data.unsqueeze(0)
        
        _, predicted = torch.max(classification_output.data, 1)
        # collect the predictions and labels
        all_predictions.extend(predicted.cpu().numpy())
        
        if labels.dim() == 0:
            labels = labels.unsqueeze(0)  # Add a dimension to make it iterable
        all_labels.extend(labels.cpu().numpy())
    # calculate the validation metrics:
    avg_loss = total_loss / len(test_loader)  # get the average loss
    # accuracy = (np.array(all_predictions) == np.array(all_labels)).mean()
    # f1 = f1_score(all_labels, all_predictions, average='weighted')  
    # conf_matrix = confusion_matrix(all_labels, all_predictions)
    
    print(f"Test Loss: {avg_loss:.4f}")
    # print(f"Test Accuracy: {accuracy:.4f}")
    # print(f"Test F1 Score: {f1:.4f}")
    # print("Confusion Matrix:")
    # print(conf_matrix)

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/200], Loss: 11.8278
Epoch [2/200], Loss: 3.8881
Epoch [3/200], Loss: 3.2793
Epoch [4/200], Loss: 3.0183
Epoch [5/200], Loss: 0.8485
Epoch [6/200], Loss: 2.2129
Epoch [7/200], Loss: 3.3966
Epoch [8/200], Loss: 0.5521
Epoch [9/200], Loss: 3.3730
Epoch [10/200], Loss: 1.7887
Epoch [11/200], Loss: 0.3325
Epoch [12/200], Loss: 0.4270
Epoch [13/200], Loss: 0.4965
Epoch [14/200], Loss: 0.6571
Epoch [15/200], Loss: 0.5159
Epoch [16/200], Loss: 0.5595
Epoch [17/200], Loss: 0.4413
Epoch [18/200], Loss: 0.6999
Epoch [19/200], Loss: 1.2832
Epoch [20/200], Loss: 0.2218
Epoch [21/200], Loss: 0.2286
Epoch [22/200], Loss: 0.0025
Epoch [23/200], Loss: 0.1119
Epoch [24/200], Loss: 0.0659
Epoch [25/200], Loss: 0.4963
Epoch [26/200], Loss: 0.0802
Epoch [27/200], Loss: 0.0000
Epoch [28/200], Loss: 1.3841
Epoch [29/200], Loss: 0.6992
Epoch [30/200], Loss: 0.3252
Epoch [31/200], Loss: 0.0203
Epoch [32/200], Loss: 1.6401
Epoch [33/200], Loss: 1.0623
Epoch [34/200], Loss: 1.3181
Epoch [35/200], Loss: 