# Main File for all Models
In this file we will train all the models with the best hyperparameters already found through the HPO notebooks that used optuna gridsearch.

## Setup (Imports and Helper Functions)

In [1]:
from mmpose.apis import MMPoseInferencer
import numpy as np
from PIL import Image
import torch
from torch.utils.data import random_split, DataLoader, Subset, TensorDataset
from torchvision import datasets, transforms
import cv2
import time

In [2]:
def get_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    elif torch.backends.mps.is_available():
        return torch.device("mps")
    else:
        return torch.device("cpu")
    
get_device()

device(type='cuda')

In [3]:
def tensor_to_cv2_image(tensor):
    """
    Converts a PyTorch tensor to an OpenCV image.
    
    Parameters:
    - tensor: A PyTorch tensor, in the format CxHxW with values normalized to [0, 1].
    
    Returns:
    - An OpenCV image, in BGR format.
    """
    
    # Denormalize the tensor and convert it to a numpy array
    image = tensor.mul(255).byte().permute(1, 2, 0).cpu().numpy()
    
    # Convert the color space from RGB to BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    return image

def visualize_output(image_tensor, keypoints_tensor, tup = [], show_neck_angle = False, show_trunk_angle = False, show_upper_arm_angle = False, show_lower_arm_angle = False):
    """
    Visualize the output model on the input image.

    Parameters:
    image_tensor (torch.Tensor): The original image.
    keypoints_tensor (torch.Tensor): Array of keypoints for pose estimation.
    """

    result_image = tensor_to_cv2_image(image_tensor)
    keypoints = keypoints_tensor.cpu().numpy()
    i = 0
    
    # Draw keypoints
    for x, y in keypoints:
        color = (255, 0, 0) if i in tup else (0, 255, 0)            
        i += 1
        
        cv2.circle(result_image, (int(x), int(y)), 20, color, -1)
    
    
    # Helper points
    hip_midpoint = keypoints[11] + ((keypoints[12] - keypoints[11]) / 2)
    shoulder_midpoint = keypoints[5] + ((keypoints[6] - keypoints[5]) / 2)
    nose_point = keypoints[0]
    
    # Draw lines
    color = (255, 255, 191)
    cyan = (0, 255, 255)
    
    if show_neck_angle:
        # Mid shoulders - Nose
        cv2.line(result_image, (int(shoulder_midpoint[0]), int(shoulder_midpoint[1])), (int(nose_point[0]), int(nose_point[1])), cyan, 10)
        # Midpoint hip - Midpoint shoulders
        cv2.line(result_image, (int(hip_midpoint[0]), int(hip_midpoint[1])), (int(shoulder_midpoint[0]), int(shoulder_midpoint[1])), cyan, 10)
        # Midpoint hip blue
        cv2.circle(result_image, (int(hip_midpoint[0]), int(hip_midpoint[1])), 20, (0, 0, 255), -1)
        # Midpoint shoulders red
        cv2.circle(result_image, (int(shoulder_midpoint[0]), int(shoulder_midpoint[1])), 20, (255, 0, 0), -1)
        # Nose blue
        cv2.circle(result_image, (int(nose_point[0]), int(nose_point[1])), 20, (0, 0, 255), -1)
    if show_trunk_angle:
        # Left - Right Hips
        cv2.line(result_image, (int(keypoints[11][0]), int(keypoints[11][1])), (int(keypoints[12][0]), int(keypoints[12][1])), cyan, 10)
        # Left Hip - Left Shoulder
        cv2.line(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), (int(keypoints[11][0]), int(keypoints[11][1])), cyan, 10)
        # Right Hip blue
        cv2.circle(result_image, (int(keypoints[12][0]), int(keypoints[12][1])), 20, (0, 0, 255), -1)
        # Left Hip red
        cv2.circle(result_image, (int(keypoints[11][0]), int(keypoints[11][1])), 20, (255, 0, 0), -1)
        # Left Shoulder blue
        cv2.circle(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), 20, (0, 0, 255), -1)
    if show_upper_arm_angle:
        # Left Hip - Left Shoulders
        cv2.line(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), (int(keypoints[11][0]), int(keypoints[11][1])), cyan, 10)
        # Left Shoulder - Left Elbow
        cv2.line(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), (int(keypoints[7][0]), int(keypoints[7][1])), cyan, 10)
        # Left Hip blue
        cv2.circle(result_image, (int(keypoints[11][0]), int(keypoints[11][1])), 20, (0, 0, 255), -1)
        # Left Shoulder red
        cv2.circle(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), 20, (255, 0, 0), -1)
        # Left Elbow blue
        cv2.circle(result_image, (int(keypoints[7][0]), int(keypoints[7][1])), 20, (0, 0, 255), -1)
    if show_lower_arm_angle:
        # Left Shoulder - Left Elbow
        cv2.line(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), (int(keypoints[7][0]), int(keypoints[7][1])), cyan, 10)
        # Left Elbow - Left Wrist
        cv2.line(result_image, (int(keypoints[7][0]), int(keypoints[7][1])), (int(keypoints[9][0]), int(keypoints[9][1])), cyan, 10)
        # Left Shoulder blue
        cv2.circle(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), 20, (0, 0, 255), -1)
        # Left Elbow red
        cv2.circle(result_image, (int(keypoints[7][0]), int(keypoints[7][1])), 20, (255, 0, 0), -1)
        # Left Wrist blue
        cv2.circle(result_image, (int(keypoints[9][0]), int(keypoints[9][1])), 20, (0, 0, 255), -1)
        
        
    if not (show_neck_angle or show_trunk_angle or show_upper_arm_angle or show_lower_arm_angle):
        # Left - Right Shoulders
        cv2.line(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), (int(keypoints[6][0]), int(keypoints[6][1])), color, 10)
        # Left - Right Hips
        cv2.line(result_image, (int(keypoints[11][0]), int(keypoints[11][1])), (int(keypoints[12][0]), int(keypoints[12][1])), color, 10)
        # Left Shoulder - Left Hip
        cv2.line(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), (int(keypoints[11][0]), int(keypoints[11][1])), color, 10)
        # Right Shoulder - Right Hip
        cv2.line(result_image, (int(keypoints[6][0]), int(keypoints[6][1])), (int(keypoints[12][0]), int(keypoints[12][1])), color, 10)
        
        # Mid shoulders - Nose
        cv2.line(result_image, (int(shoulder_midpoint[0]), int(shoulder_midpoint[1])), (int(nose_point[0]), int(nose_point[1])), color, 10)
        
        # Left shoulder - Left elbow
        cv2.line(result_image, (int(keypoints[5][0]), int(keypoints[5][1])), (int(keypoints[7][0]), int(keypoints[7][1])), color, 10)
        # Right shoulder - Right elbow
        cv2.line(result_image, (int(keypoints[6][0]), int(keypoints[6][1])), (int(keypoints[8][0]), int(keypoints[8][1])), color, 10)
        # Left elbow - Left wrist
        cv2.line(result_image, (int(keypoints[7][0]), int(keypoints[7][1])), (int(keypoints[9][0]), int(keypoints[9][1])), color, 10)
        # Right elbow - Right wrist
        cv2.line(result_image, (int(keypoints[8][0]), int(keypoints[8][1])), (int(keypoints[10][0]), int(keypoints[10][1])), color, 10)
    
    
    return result_image

def plot_results(image, mode="inline", scale=1):
    """
    Displays an image using either a popup window or inline display.

    Parameters
    ----------
    image : numpy.ndarray or CV2 image
        The image to display.
    mode : str, optional
        The display mode. Must be either "popup" or "inline". If "popup", the image is displayed in a popup window. If "inline", the image is displayed inline in the notebook. Default is "inline".
    scale : float, optional
        The scaling factor for the image. Default is 1.

    Raises
    ------
    ValueError
        If the mode is not "popup" or "inline".
    """
    
    if type(image) is np.ndarray:
        image = Image.fromarray(image)
    
    if mode == "popup":
        pass
    elif mode == "inline":
        if type(image) == np.ndarray:
            # Convert from BGR to RGB (because OpenCV uses BGR order for color channels, whereas PIL uses RGB.)
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(image_rgb)
    else:
        raise ValueError("Mode must be either 'popup' or 'inline'")

    if type(image) is np.ndarray:
        cv2.imshow('Image', image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    else:
        width, height = image.size
        display(image.resize((int(width*scale), int(height*scale))))

## Loading the images

Images with binned labels (green, yellow, red):

In [4]:
data_path = 'E:/Users/Vipin/Documents/BHT/3. Semester/Learning from images/Pose Dataset_binned'
data_transforms = transforms.Compose([
    transforms.Resize((640, 480)), # Resize images to 640x640
    transforms.ToTensor(), # Convert images to PyTorch tensors
])

dataset_binned = datasets.ImageFolder(root=data_path, transform=data_transforms)
display(dataset_binned)

seed_generator = torch.Generator().manual_seed(13)
trainset_binned, testset_binned = random_split(dataset_binned, [0.8, 0.2], generator=seed_generator)

Dataset ImageFolder
    Number of datapoints: 118
    Root location: E:/Users/Vipin/Documents/BHT/3. Semester/Learning from images/Pose Dataset_binned
    StandardTransform
Transform: Compose(
               Resize(size=(640, 480), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )

Images with the scores as from RULA:

In [5]:
data_path = 'E:/Users/Vipin/Documents/BHT/3. Semester/Learning from images/Pose Dataset Complete'
data_transforms = transforms.Compose([
    transforms.Resize((640, 480)), # Resize images to 640x640
    transforms.ToTensor(), # Convert images to PyTorch tensors
])

dataset_scores = datasets.ImageFolder(root=data_path, transform=data_transforms)
display(dataset_scores)

seed_generator = torch.Generator().manual_seed(13)
trainset_scores, testset_scores = random_split(dataset_scores, [0.8, 0.2], generator=seed_generator)

Dataset ImageFolder
    Number of datapoints: 118
    Root location: E:/Users/Vipin/Documents/BHT/3. Semester/Learning from images/Pose Dataset Complete
    StandardTransform
Transform: Compose(
               Resize(size=(640, 480), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )

## Model training
We are using the following pretrained models:
- YOLOv8 Pose Estimation for keypoint detection
- RTMPose (through mmpose) for keypoint detection

For these models we take these approaches:
- Using the keypoints as input for our models
- Using the keypoints and calculating angles between the keypoints that are useful for pose quality estimation
- Removing the last layers of the pretrained model and adding our own layer for the quality estimation instead of keypoint detection

For the quality estimation we use two types of models:
- ScoringModel that predicts the pose quality scores (1-7, given by us through the RULA sheet) on a continuous scale
- ClassificationModel that predicts the pose quality class (green, yellow, red) based on the scores

The binning for the classification is as follows:
- Green: 1, 2
- Yellow: 3, 4, 5
- Red: 6, 7

### YOLOv8 for Keypoint Detection

### RTMPose for Keypoint Detection

### YOLOv8 for Keypoint Detection with our calculated angles

### Finetuned YOLOv8 model