In [1]:
import os
import shutil
import pandas as pd
import numpy as np
from PIL import Image, ImageOps
import cv2
import matplotlib.pyplot as plt
from datetime import datetime
import time
import json
import random

import imgaug.augmenters as iaa
import numpy as np
from imgaug.augmentables.kps import Keypoint, KeypointsOnImage

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch.utils.data import DataLoader, TensorDataset

# get the FLOPs
from fvcore.nn import FlopCountAnalysis, flop_count_table, parameter_count, flop_count_str
import torchprofile
# # decrease Cuda memory usage
# from torch.cuda.amp import GradScaler, autocast # use gradscaler amd mixed precision training

ModuleNotFoundError: No module named 'torchsummary'

# checking torch and tf

In [1]:
import tensorflow as tf
#from tensorflow.python.platform import build_info as tf_build_info
#print(tf.__version__)
#print("CUDA Version:", tf_build_info.cuda_version)
#print("cuDNN Version:", tf_build_info.cudnn_version)
#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


In [2]:
print('TensorFlow version:',tf.__version__)

TensorFlow version: 2.7.0


In [3]:
print("CUDA version: ", tf.sysconfig.get_build_info()["cuda_version"])

CUDA version:  11.2


In [4]:
print("cuDNN version: ", tf.sysconfig.get_build_info()["cudnn_version"])

cuDNN version:  8


In [5]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


2024-09-27 16:17:05.048293: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-09-27 16:17:05.083014: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-09-27 16:17:05.086604: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [6]:
import torch
print(torch.__version__)
print('CUDA version:',torch.version.cuda)
print('cuDNN version:',torch.backends.cudnn.version())
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

ModuleNotFoundError: No module named 'torch'

In [3]:
import torch
print("Is CUDA available: ", torch.cuda.is_available())
print("Number of GPUs available: ", torch.cuda.device_count())
print("CUDA device name: ", torch.cuda.get_device_name(0))


Is CUDA available:  True
Number of GPUs available:  1
CUDA device name:  NVIDIA GeForce RTX 3050 Laptop GPU


# DLC imports

In [1]:
import deeplabcut
print(deeplabcut.__version__)

Loading DLC 3.0.0rc4...
3.0.0rc4


In [4]:
print(deeplabcut.__file__)

/home/matthew/anaconda3/envs/DEEPLABCUT_PYTORCH_v2/lib/python3.10/site-packages/deeplabcut/__init__.py


In [13]:
# DLC imports (used with DEEPLABCUT env)
import pandas as pd
import os
import pickle
import json
import numpy as np

# SLEAP imports

In [7]:
import sleap

In [8]:
sleap.versions()

SLEAP: 1.4.1a2
TensorFlow: 2.7.0
Numpy: 1.21.5
Python: 3.7.12
OS: Linux-6.1.0-0.deb11.17-amd64-x86_64-with-debian-11.0


In [9]:
sleap.system_summary()

GPUs: 1/1 available
  Device: /physical_device:GPU:0
         Available: True
       Initialized: False
     Memory growth: None


# 0.0 Important Functions

In [None]:
# load ids into list 
def load_file_to_list(file_path):
    """
    loads a text file to a list with each entry on a new line becoming a new entry in the list.

    :param file_path: Path to the file where the list should be saved.
    :return list of data from file
    """
    # Open the file for writing
    lst = []
    with open(file_path, 'r') as file:
        # Write each item on a new line
        for line in file:
            lst.append(line.strip())
    return lst

In [None]:
# function to load the image data into an arr
# in the same order as the annotations and ids are stored (use id list for this)

# The load image data function may take a while to run

def load_image_data(ids_to_load, image_folder, crop_ext):

  # list for loading image data
  selected_imgs = []

  # for loop for loading image data that is present in the list of ids
  for i, img_id in enumerate(ids_to_load):

    # load the image
    img_path = os.path.join(image_folder, img_id+crop_ext)
    #print(img_path)
    img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
    #print(img)

    # change the img to RGB from BGR as plt uses RGB colour scale
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # scaling the pixel values to [0, 1] (you don't need to scal them back)
    img = img/255

    selected_imgs.append(img)

  # Convert the list of images to a NumPy array
  selected_imgs_array = np.array(selected_imgs)
  
  return selected_imgs_array

In [None]:
def df_to_json(df, path):
    """
    Converts a DataFrame to a .json file.

    Parameters:
    df (pd.DataFrame): The DataFrame to be converted to JSON.
    path (str): The path (including file name) where the .json file will be saved.
    """
    df.to_json(path, orient='records', indent=4)

In [None]:

def json_to_df(path):
    """
    Converts a .json file to a DataFrame.

    Parameters:
    path (str): The path to the .json file that will be read.

    Returns:
    pd.DataFrame: The DataFrame created from the JSON file.
    """
    #print(path)
    df = pd.read_json(path, orient='records')
    #print(f"JSON file has been successfully converted to DataFrame.")
    return df

In [None]:
def create_data_lists(df_to_list, list_of_cols):

  # create temp lists
  keypoints_temp = []

  # step through the rows and
  for _, row in df_to_list.iterrows():

    # extract the data arrays
    keypoints_data = row[list_of_cols].values

    # adding data to the list
    keypoints_temp.append(keypoints_data)

  # Convert the list to a NumPy array and make sure that they are float32
  keypoints_array = np.array(keypoints_temp, dtype=np.float32)
  
  return keypoints_array

In [None]:
def set_dtypes_df_full_annotation_abs(df):
    df['vid_id'] = df['vid_id'].astype(str)
    df['img_id'] = df['img_id'].astype(str)
    df['bbox_id'] = df['bbox_id'].astype(str)
    df['bbox_c_x'] = df['bbox_c_x'].astype('float32')
    df['bbox_c_y'] = df['bbox_c_y'].astype('float32')
    df['bbox_w'] = df['bbox_w'].astype('float32')
    df['bbox_h'] = df['bbox_h'].astype('float32')
    df['Head_x'] = df['Head_x'].astype('float32')
    df['Head_y'] = df['Head_y'].astype('float32')
    df['Beak_x'] = df['Beak_x'].astype('float32')
    df['Beak_y'] = df['Beak_y'].astype('float32')
    df['Body_top_x'] = df['Body_top_x'].astype('float32')
    df['Body_top_y'] = df['Body_top_y'].astype('float32')
    df['RFlipper_mid_x'] = df['RFlipper_mid_x'].astype('float32')
    df['RFlipper_mid_y'] = df['RFlipper_mid_y'].astype('float32')
    df['LFlipper_mid_x'] = df['LFlipper_mid_x'].astype('float32')
    df['LFlipper_mid_y'] = df['LFlipper_mid_y'].astype('float32')
    df['Body_bottom_x'] = df['Body_bottom_x'].astype('float32')
    df['Body_bottom_y'] = df['Body_bottom_y'].astype('float32')
    df['RFoot_x'] = df['RFoot_x'].astype('float32')
    df['RFoot_y'] = df['RFoot_y'].astype('float32')
    df['LFoot_x'] = df['LFoot_x'].astype('float32')
    df['LFoot_y'] = df['LFoot_y'].astype('float32')
    df['kp_outside_best_bbox'] = df['kp_outside_best_bbox'].astype('float32')
    df['kp_missing'] = df['kp_missing'].astype('float32')
    df['kp_primary_missing'] = df['kp_primary_missing'].astype(bool)
    df['img_width'] = df['img_width'].astype('float32')
    df['img_height'] = df['img_height'].astype('float32')
    df['bbox_max_h_w'] = df['bbox_max_h_w'].astype('float32')
    return df

In [None]:
def unnorm_keypoints(img_size, keypoints, kp_to_null=None):
    """
    De-normalizes keypoints based on image size and returns the de-normalized keypoints along with 
    the positions of any missing or nullified keypoints.

    Parameters:
    - img_size: Tuple of the image dimensions (height, width).
    - keypoints: List of normalized keypoints (with values between -1 and 1).
    - kp_to_null: Optional. List of indices where the keypoints should be nulled (set to NaN).

    Returns:
    - new_keypoints: List of de-normalized keypoints where each coordinate is scaled back to the 
                     image's pixel dimensions.
    - missing_kp: List of indices where the keypoints were either originally set to -10 (indicating 
                  missing keypoints) or explicitly nullified by the kp_to_null list.
    """
    
    # Extract image width and height
    readjust_x = img_size[0]  # width of the image
    readjust_y = img_size[1]  # height of the image

    new_keypoints = []  # List to store the de-normalized keypoints
    missing_kp = []     # List to store the indices of missing or nullified keypoints

    # Iterate through each keypoint
    for i, keypoint in enumerate(keypoints):
        # Null keypoints if they are -10 or if they are specified in kp_to_null
        if keypoint == -10 or (kp_to_null and i in kp_to_null):
            keypoint = np.nan  # Set keypoint to NaN
            missing_kp.append(i)  # Record the index of the missing or nullified keypoint

        # De-normalize the x-coordinates
        if i % 2 == 0:  # Even indices are x-coordinates
            keypoint = keypoint * readjust_x + readjust_x / 2
        # De-normalize the y-coordinates
        else:  # Odd indices are y-coordinates
            keypoint = keypoint * readjust_y + readjust_y / 2

        new_keypoints.append(keypoint)  # Append the de-normalized keypoint to the list

    return new_keypoints, missing_kp

In [None]:
def norm_keypoints(img_size, keypoints):
    """
    Normalizes keypoints based on image size and replaces any NaN values with -10.

    Parameters:
    - img_size: Tuple of the image dimensions (width, height).
    - keypoints: List of de-normalized keypoints where each coordinate is in pixel dimensions.

    Returns:
    - norm_keypoints: List of normalized keypoints where each coordinate is scaled to the range 
                      [-1, 1] relative to the image size, with NaNs replaced by -10.
    """
    
    # Extract image width and height
    readjust_x = img_size[0]  # width of the image
    readjust_y = img_size[1]  # height of the image

    norm_keypoints = []  # List to store the normalized keypoints

    # Iterate through each keypoint
    for i, keypoint in enumerate(keypoints):
        # Replace NaN values with -10
        if np.isnan(keypoint):
            keypoint = -10.0
        else:
            # Normalize the x-coordinates
            if i % 2 == 0:  # Even indices are x-coordinates
                keypoint = (keypoint - readjust_x / 2) / readjust_x
            # Normalize the y-coordinates
            else:  # Odd indices are y-coordinates
                keypoint = (keypoint - readjust_y / 2) / readjust_y

        norm_keypoints.append(keypoint)  # Append the normalized keypoint to the list

    return norm_keypoints

In [None]:
# Denormalize keypoints for an array of images
def unnorm_keypoints_arr(kp_arr, img_arr):
    """
    Denormalizes keypoints for each image in the array based on the corresponding image size.
    It converts normalized keypoints (range [-1, 1]) back to pixel coordinates.

    Parameters:
    - kp_arr: Array of normalized keypoints, where each entry is a list of keypoints for an image.
              The keypoints are expected to be in the format [x1, y1, x2, y2, ...].
    - img_arr: Array of images. The size of each image is used to scale the keypoints back 
               to their pixel coordinates.

    Returns:
    - kp_abs_arr: Array of denormalized keypoints where each entry corresponds to the denormalized 
                  keypoints for the corresponding image in `img_arr`.
    """

    kp_abs_list = []  # List to store the denormalized keypoints for each image

    # Iterate through each set of keypoints and corresponding image
    for i, kp in enumerate(kp_arr):
        img_size = img_arr[i].shape  # Get the size of the current image (height, width, channels)

        # Denormalize the keypoints based on the image size
        kp_abs, missing_kp = unnorm_keypoints(img_size, kp_arr[i])

        # Save the denormalized keypoints to the list
        kp_abs_list.append(kp_abs)
    
    # Convert the list of denormalized keypoints to a NumPy array
    kp_abs_arr = np.array(kp_abs_list)

    return kp_abs_arr


In [None]:
# Normalize keypoints for an array of images
def norm_keypoints_arr(kp_arr, img_arr):
    """
    Normalizes keypoints for each image in the array based on the corresponding image size.
    It converts keypoints from pixel coordinates back to normalized coordinates (range [-1, 1]).

    Parameters:
    - kp_arr: Array of keypoints, where each entry is a list of keypoints for an image.
              The keypoints are expected to be in the format [x1, y1, x2, y2, ...] 
              with pixel coordinates.
    - img_arr: Array of images. The size of each image is used to scale the keypoints 
               to normalized coordinates.

    Returns:
    - kp_norm_arr: Array of normalized keypoints where each entry corresponds to the normalized 
                   keypoints for the corresponding image in `img_arr`.
    """
        
    kp_norm_list = []  # List to store the normalized keypoints for each image

    # Iterate through each set of keypoints and corresponding image
    for i, kp in enumerate(kp_arr):
        img_size = img_arr[i].shape  # Get the size of the current image (height, width, channels)

        # Normalize the keypoints based on the image size
        kp_norm = norm_keypoints(img_size, kp_arr[i])

        # Save the normalized keypoints to the list
        kp_norm_list.append(kp_norm)
    
    # Convert the list of normalized keypoints to a NumPy array
    kp_norm_arr = np.array(kp_norm_list)  

    return kp_norm_arr

In [None]:
# Apply augmentation to images and keypoints
def apply_aug(img_arr_orig, kp_arr_orig, aug, num_of_kp=8):
    """
    Applies augmentation to a batch of images and their corresponding keypoints.

    Parameters:
    - img_arr_orig: Original array of images. Shape should be (num_imgs, height, width, channels).
    - kp_arr_orig: Original array of keypoints. Shape should be (num_imgs, num_of_kp*2), where each 
                   keypoint is represented by its x and y coordinates in pixel values.
    - aug: An imgaug augmentation sequence or augmenter to apply to the images and keypoints.
    - num_of_kp: Optional. Number of keypoints per image (default is 8).

    Returns:
    - img_arr_aug: Augmented array of images. Same shape as `img_arr_orig`.
    - kp_arr_aug: Augmented array of keypoints. Same shape as `kp_arr_orig`.
    """
    # print(img_arr_orig.shape)
    #print(kp_arr_orig.shape)
    
    # Initialize lists to store augmented images and keypoints
    aug_img = []  # List for augmented images
    aug_kp = []   # List for augmented keypoints

    # Get the number of images in the batch
    num_imgs = img_arr_orig.shape[0]
    #print(num_imgs)

    # Loop over each image and its corresponding keypoints
    for i in range(num_imgs):
        image = img_arr_orig[i]  # Extract the i-th image
        #print(image.shape)
        
        # Convert keypoints to KeypointsOnImage format for imgaug
        keypoints = kp_arr_orig[i]
        #print(keypoints)
        kps = [Keypoint(x=keypoints[j*2], y=keypoints[j*2+1]) for j in range(num_of_kp)]
        kps_on_image = KeypointsOnImage(kps, shape=image.shape)
        
        # Apply the augmentation to the image and keypoints
        image_aug, kps_aug = aug(image=image, keypoints=kps_on_image)
        
        # Convert augmented keypoints back to the original flattened format [x1, y1, x2, y2, ...]
        keypoints_aug = []
        for kp in kps_aug.keypoints:
            keypoints_aug.extend([kp.x, kp.y])
        
        # Append the augmented image and keypoints to their respective lists
        aug_img.append(image_aug)
        aug_kp.append(keypoints_aug)

    # Convert the lists of augmented images and keypoints back to NumPy arrays
    img_arr_aug = np.array(aug_img)
    kp_arr_aug = np.array(aug_kp)

    return img_arr_aug, kp_arr_aug


In [None]:
def detect_padding(image):
    """
    Detects if padding is on the x-axis (left and right) or y-axis (top and bottom)
    of the image and calculates the padding size on one side.

    Parameters:
    - image: A NumPy array representing the image. The shape should be (width, height, channels).

    Returns:
    - is_padding_x: True if padding is on the x-axis, False if padding is on the y-axis.
    - padding_size: The size of the padding on one side in pixels.
    """

    width, height, _ = image.shape
    
    # Check for padding along the x-axis (left and right)
    left_column = image[:, 0, :]#image[0, :, :]  # The first column (left side)
    right_column = image[:, -1, :] #image[-1, :, :]  # The last column (right side)

    # Check for padding along the y-axis (top and bottom)
    top_row = image[:, 0, :]  # The first row (top side)
    bottom_row = image[:, -1, :]  # The last row (bottom side)
    #print(image[:, 5, :] *255)
    #print(left_column*255)
    
    # Check if the columns are fully black (indicating padding)
    if np.all(left_column*255 < 30) and np.all(right_column*255 < 30):
        # Padding is along the x-axis
        is_padding_x = True
        #plot_img(image)
        # Calculate padding size
        #padding_size = np.sum(image[0, :, 0]*255 < 30) // 2  # Count black pixels on one side
        # if padding_size > 60:
        sum1 = np.sum(image[5, :, 0]*255 < 20) // 2
        sum2 = np.sum(image[10, :, 0]*255 < 20) // 2
        sum3 = np.sum(image[60, :, 0]*255 < 20) // 2
        sum4 = np.sum(image[110, :, 0]*255 < 20) // 2
        sum5 = np.sum(image[-60, :, 0]*255 < 20) // 2
        sum6 = np.sum(image[-10, :, 0]*255 < 20) // 2
        sum7 = np.sum(image[-5, :, 0]*255 < 20) // 2
        padding_size = min(sum1, sum2, sum3, sum4, sum5, sum6, sum7)
        #     padding_size = max(average - 5, 1)
        # if padding_size > 60:
        #     sum1 = np.sum(image[5, :, 0]*255 < 10) // 2
        #     sum2 = np.sum(image[10, :, 0]*255 < 10) // 2
        #     sum3 = np.sum(image[60, :, 0]*255 < 10) // 2
        #     sum4 = np.sum(image[110, :, 0]*255 < 10) // 2
        #     sum5 = np.sum(image[-60, :, 0]*255 < 10) // 2
        #     sum6 = np.sum(image[-10, :, 0]*255 < 10) // 2
        #     sum7 = np.sum(image[-5, :, 0]*255 < 10) // 2
        #     average = (sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7) // 7  # Floor division for rounding down
        #     padding_size = max(average - 5, 1)
        if padding_size > 20: 
            padding_size = 20

    else:
        # Padding is along the y-axis (top and bottom)
        is_padding_x = False
        # Calculate padding size
        padding_size = np.sum(image[:, 0, 0]*255 < 30) // 2  # Count black pixels on one side
        # if padding_size > 60:
        sum1 = np.sum(image[:, 5, 0]*255 < 20) // 2
        sum2 = np.sum(image[:, 10, 0]*255 < 20) // 2
        sum3 = np.sum(image[:, 60, 0]*255 < 20) // 2
        sum4 = np.sum(image[:, 110, 0]*255 < 20) // 2
        sum5 = np.sum(image[:, -60, 0]*255 < 20) // 2
        sum6 = np.sum(image[:, -10, 0]*255 < 20) // 2
        sum7 = np.sum(image[:, -5, 0]*255 < 20) // 2
        padding_size = min(sum1, sum2, sum3, sum4, sum5, sum6, sum7)
        #     padding_size = max(average - 5, 1)
        # if padding_size > 60:
        #     sum1 = np.sum(image[:, 5, 0]*255 < 10) // 2
        #     sum2 = np.sum(image[:, 10, 0]*255 < 10) // 2
        #     sum3 = np.sum(image[:, 60, 0]*255 < 10) // 2
        #     sum4 = np.sum(image[:, 110, 0]*255 < 10) // 2
        #     sum5 = np.sum(image[:, -60, 0]*255 < 10) // 2
        #     sum6 = np.sum(image[:, -10, 0]*255 < 10) // 2
        #     sum7 = np.sum(image[:, -5, 0]*255 < 10) // 2
        #     average = (sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7) // 7  # Floor division for rounding down
        #     padding_size = max(average - 5, 1)
        if padding_size > 20: 
            padding_size = 20

    return is_padding_x, padding_size

In [None]:
def apply_aug_translate(train_imgs_array, train_kp_array_abs):

    
    # Get the number of images in the batch
    num_imgs = train_imgs_array.shape[0]
    # print(num_imgs)

    # creat empty arrays
    train_imgs_array_aug_trans = np.empty((0, train_imgs_array.shape[1], train_imgs_array.shape[2], train_imgs_array.shape[3]), dtype=train_imgs_array.dtype)
    train_kp_array_aug_trans = np.empty((0, train_kp_array_abs.shape[1]), dtype=train_kp_array_abs.dtype)

    # print(train_imgs_array_aug_trans.shape)
    # print(train_kp_array_aug_trans.shape)

    # Loop over each image and its corresponding keypoints
    for i in range(num_imgs):
        image = train_imgs_array[i]  # Extract the i-th image
        kp = train_kp_array_abs[i]
        # print(i)
        # print(image.shape)
        # print(kp.shape)

        is_padding_x, padding_size = detect_padding(image)
        # print(f'this: {i}')
        # print(is_padding_x)
        # print(padding_size)

        if is_padding_x:
            seq_trans_x_left = iaa.Sequential([
                iaa.TranslateX(px=(-padding_size, -padding_size)),
            ])
            seq_trans_x_right = iaa.Sequential([
                iaa.TranslateX(px=(padding_size, padding_size)),
            ])

            # Convert to shape (1, 220, 220, 3) and (1, 16)
            image = np.expand_dims(image, axis=0)
            #print(is_padding_x)
            #print(image.shape)
            #print(i)
            kp = np.expand_dims(kp, axis=0)

            # apply augmentations
            single_trans_x_left_img_arr, single_trans_x_left_kp_arr = apply_aug(image, kp, seq_trans_x_left)
            single_trans_x_right_img_arr, single_trans_x_right_kp_arr = apply_aug(image, kp, seq_trans_x_right)

            #save to image array
            train_imgs_array_aug_trans = np.concatenate((train_imgs_array_aug_trans, single_trans_x_left_img_arr), axis=0)
            train_imgs_array_aug_trans = np.concatenate((train_imgs_array_aug_trans, single_trans_x_right_img_arr), axis=0)
            #save to kp array
            train_kp_array_aug_trans = np.concatenate((train_kp_array_aug_trans, single_trans_x_left_kp_arr), axis=0)
            train_kp_array_aug_trans = np.concatenate((train_kp_array_aug_trans, single_trans_x_right_kp_arr), axis=0)

        else :
            seq_trans_y_up = iaa.Sequential([
                iaa.TranslateY(px=(-padding_size, -padding_size)),
            ])
            seq_trans_y_down = iaa.Sequential([
                iaa.TranslateY(px=(padding_size, padding_size)),
            ])

            # Convert to shape (1, 220, 220, 3) and (1, 16)
            image = np.expand_dims(image, axis=0)
            #print(is_padding_x)
            #print(image.shape)
            #print(i)
            kp = np.expand_dims(kp, axis=0)

            # apply augmentations
            single_trans_y_up_img_arr, single_trans_y_up_kp_arr = apply_aug(image, kp, seq_trans_y_up)
            single_trans_y_down_img_arr, single_trans_y_down_kp_arr = apply_aug(image, kp, seq_trans_y_down)

            #save to image array
            train_imgs_array_aug_trans = np.concatenate((train_imgs_array_aug_trans, single_trans_y_up_img_arr), axis=0)
            train_imgs_array_aug_trans = np.concatenate((train_imgs_array_aug_trans, single_trans_y_down_img_arr), axis=0)
            #save to kp array
            train_kp_array_aug_trans = np.concatenate((train_kp_array_aug_trans, single_trans_y_up_kp_arr), axis=0)
            train_kp_array_aug_trans = np.concatenate((train_kp_array_aug_trans, single_trans_y_down_kp_arr), axis=0)


    return train_imgs_array_aug_trans, train_kp_array_aug_trans

In [None]:
def replace_out_of_img_kp(arr):
    """
    Finds and replaces the elements in the array that are outside the frame but not the missing ones.
    Specifically, elements greater than 0.5 are replaced with 0.5, and elements less than -0.5 but 
    greater than -9.0 are replaced with -0.5.

    Parameters:
    - arr: A NumPy array of shape (n, 16).

    Returns:
    - modified_arr: The modified NumPy array with replaced values.
    - count_replacements: The number of elements that were replaced.
    """
    # Make a copy of the array to avoid modifying the original array
    modified_arr = arr.copy()

    # Replace elements greater than 0.5 with 0.5
    count_pos_replacements = np.sum(modified_arr > 0.5)
    modified_arr[modified_arr > 0.5] = 0.49

    # Replace elements less than -0.5 but greater than -9.0 with -0.5
    count_neg_replacements = np.sum((modified_arr < -0.5) & (modified_arr > -9.0))
    modified_arr[(modified_arr < -0.5) & (modified_arr > -9.0)] = -0.49

    # Total count of replacements
    count_replacements = count_pos_replacements + count_neg_replacements

    return modified_arr, count_replacements

In [None]:
def replace_out_of_img_kp(arr):
    """
    Finds and replaces the elements in the array that are outside the frame but not the missing ones.
    Specifically, elements greater than 0.5 are replaced with 0.5, and elements less than -0.5 but 
    greater than -9.0 are replaced with -0.5.

    Parameters:
    - arr: A NumPy array of shape (n, 16).

    Returns:
    - modified_arr: The modified NumPy array with replaced values.
    - count_replacements: The number of elements that were replaced.
    """
    # Make a copy of the array to avoid modifying the original array
    modified_arr = arr.copy()

    # Replace elements greater than 0.5 with 0.5
    count_pos_replacements = np.sum(modified_arr > 0.5)
    modified_arr[modified_arr > 0.5] = 0.49

    # Replace elements less than -0.5 but greater than -9.0 with -0.5
    count_neg_replacements = np.sum((modified_arr < -0.5) & (modified_arr > -9.0))
    modified_arr[(modified_arr < -0.5) & (modified_arr > -9.0)] = -0.49

    # Total count of replacements
    count_replacements = count_pos_replacements + count_neg_replacements

    return modified_arr, count_replacements

In [None]:
def masked_mse(y_true, y_pred):
    """
    Computes the mean squared error, ignoring the invisible keypoints.
    Assuming that -10.0 indicates an invisible keypoint.
    """
    # Create a mask where keypoints are visible
    mask = (y_true != -10.0).float().to(y_true.device)

    # Apply the mask to filter out invisible keypoints from both
    # the predictions and the true values
    y_true_masked = y_true * mask
    y_pred_masked = y_pred * mask

    # Compute the Mean Squared Error only on the visible keypoints
    mse = F.mse_loss(y_pred_masked, y_true_masked, reduction='sum') / mask.sum()
    return mse

In [None]:
# PCK
# put in a function that will use the max bbox if primary kp is missing
def pck_metric(y_true, y_pred, threshold=0.2):
    """
    Computes the Percentage of Correct Keypoints (PCK) metric.
    
    Parameters:
    y_true (torch.Tensor): The ground truth keypoints (batch_size, num_keypoints*2).
    y_pred (torch.Tensor): The predicted keypoints (batch_size, num_keypoints*2).
    threshold (float): The distance threshold for a keypoint to be considered correct.
                       Typically set relative to the size of the bounding box (e.g., 0.2).
    
    Returns:
    float: The percentage of correct keypoints.
    """
    # Create a mask where keypoints are visible (not equal to -10)
    mask = (y_true != -10.0).float().to(y_true.device)
    #print(mask)

    # Apply the mask to filter out invisible keypoints
    y_true_masked = y_true * mask
    y_pred_masked = y_pred * mask

    # print(y_true_masked)
    # print(y_pred_masked)

    # Compute the Euclidean distance between the predicted and true keypoints
    distances = torch.sqrt((y_pred_masked[:, ::2] - y_true_masked[:, ::2]) ** 2 +
                           (y_pred_masked[:, 1::2] - y_true_masked[:, 1::2]) ** 2)
    
    #print(distances)
    
    # Normalize the distances (relative to the max and min y coord)
    Norm_max_min_kp = torch.max(y_true_masked[:, 1::2], dim=1)[0] - torch.min(y_true_masked[:, 1::2], dim=1)[0]
    # Normalise based on the distance between the head and the bottom of the body (position 0, 1 and )
    #print(y_true[:, 0],y_true[:,10],y_true[:, 1],y_true[:, 11])
    #print((y_true[:, 0] - y_true[:,10]) ** 2)
    #print((y_true[:, 1] - y_true[:, 11]) ** 2)
    Norm_head_lowerbody = torch.sqrt((y_true[:, 0] - y_true[:,10]) ** 2 +
                        (y_true[:, 1] - y_true[:, 11]) ** 2)
    #print(Norm_head_lowerbody)
    normalized_distances = distances / Norm_head_lowerbody[:, None]
    #print(distances)
    #print(normalized_distances)

    # Count the correct keypoints (distance <= threshold)
    correct_keypoints = (normalized_distances <= threshold).float() * mask[:, ::2]
    #print(correct_keypoints)

    # Calculate the PCK as the percentage of correct keypoints
    pck = correct_keypoints.sum() / mask[:, ::2].sum()
    return pck#.item()


In [None]:
class DeepPoseModel(nn.Module):
    def __init__(self, nkeypoints=8):
        # Initializes the DeepPoseModel with the dataset and training configuration.
        super(DeepPoseModel, self).__init__()
        
        # The feature extractor part of the model, composed of several convolutional layers.
        self.features = nn.Sequential(
            # Conv2d: Input channels = 3 (RGB image), Output channels = 96, kernel size = 11x11,
            # stride = 4, padding = 4. 
            # Input: (batch_size, 3, 220, 220)
            # Output: (batch_size, 96, 55, 55)
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=4),
            
            # Local Response Normalization (LRN) over 5 neighboring channels
            nn.LocalResponseNorm(5),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),

            # Max pooling with 3x3 kernel and stride 2
            # output size: (batch_size, 96, 27, 27)
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            # Conv2d: Input channels = 96, Output channels = 256, kernel size = 5x5,
            # stride = 2, padding = 2.
            # Input: (batch_size, 96, 27, 27)
            # Output: (batch_size, 256, 27, 27)
            nn.Conv2d(48, 128, kernel_size=5, stride=1, padding=2),
            
            # Local Response Normalization (LRN) over 5 neighboring channels
            nn.LocalResponseNorm(5),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),

            # Max pooling with 3x3 kernel and stride 2
            # output size: (batch_size, 96, 13, 13)
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            # Conv2d: Input channels = 256, Output channels = 384, kernel size = 3x3,
            # stride = 1, padding = 1.
            # Input: (batch_size, 256, 13, 13)
            # Output: (batch_size, 384, 13, 13)
            nn.Conv2d(128, 192, kernel_size=3, stride=1, padding=1),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # Conv2d: Input channels = 384, Output channels = 384, kernel size = 3x3,
            # stride = 1, padding = 1.
            # Input: (batch_size, 384, 13, 13)
            # Output: (batch_size, 384, 13, 13)
            nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # Conv2d: Input channels = 384, Output channels = 256, kernel size = 3x3,
            # stride = 1, padding = 1.
            # Input: (batch_size, 384, 13, 13)
            # Output: (batch_size, 256, 13, 13)
            nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # MaxPool2d: Kernel size = 3x3, stride = 2.
            # Input: (batch_size, 256, 13, 13)
            # Output: (batch_size, 256, 6, 6)
            # Max pooling with 3x3 kernel and stride 2
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        
        # The classifier part of the model, composed of fully connected layers.
        self.classifier = nn.Sequential(
            # Flatten the input tensor
            # Input: (batch_size, 256, 6, 6)
            # Output: (batch_size, 256 * 6 * 6) = (batch_size, 9216)
            nn.Flatten(),
            
            # Linear layer with input size 6400 and output size 4096
            # Input: (batch_size, 6400)
            # Output: (batch_size, 4096)
            nn.Linear(128 * 6 * 6, 4096),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # Dropout layer with 60% dropout rate
            nn.Dropout(0.6),
            
            # Linear layer with input size 4096 and output size 4096
            # Input: (batch_size, 4096)
            # Output: (batch_size, 4096)
            nn.Linear(4096, 4096),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # Dropout layer with 60% dropout rate
            nn.Dropout(0.6),
            
            # Final linear layer with input size 4096 and output size nkeypoints * 2
            # Output is (nkeypoints * 2) coordinates (x, y) for each keypoint
            # Input: (batch_size, 4096)
            # Output: (batch_size, nkeypoints * 2)
            nn.Linear(4096, nkeypoints * 2)
        )
        
    def forward(self, x):
        # Define the forward pass through the network.
        # Pass input `x` through the feature extractor
        x = self.features(x)
        # Pass the result through the classifier to get the final output
        x = self.classifier(x)
        return x

In [None]:
# def load_data_PyTorch(img_arr, kp_arr, batch_size, train_flag=True):
#     '''
#     conLoad data into PT dataLoader in specified batch size
    
#     Params
#     img_arr: images loaded into an array (i,255,255,3) and are converted to (i,3,255,255)
#     kp_arr: array of keypoints (i, num_kp*2)
#     batch_size: batch size 

#     Return:
#     PT_Dataset: containing input (x) and groundtruth (y)
#     PT_DataLoader: Dataloader containing dataset and batch size

#     '''

#     # create tensors from arrays and load them to the GPU
#     img_tensor = torch.tensor(img_arr, dtype=torch.float32).permute(0, 3, 1, 2)#.to('cuda')
#     kp_tensor = torch.tensor(kp_arr, dtype=torch.float32)#.to('cuda')

#     # Create a TensorDataset and DataLoader for training data
#     dataset = TensorDataset(img_tensor, kp_tensor)
#     dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=train_flag)

#     return dataloader

In [None]:
def create_timestamped_dir(descriptor, base_dir='/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/PE/'):
    """
    Creates a directory with a timestamp appended to the base directory name.
    Returns the path to the created directory.
    
    Parameters:
    descriptor: string describing the run generally model_dataDescriptor
    base_dir (str): The base directory name. Default is './training_results'.
    
    Returns:
    str: The path to the created directory.
    """
    # Get the current datetime and format it as YYYY-MM-DD_HH-MM-SS
    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

    base_dir_descriptor = f"{base_dir}{descriptor}"
    
    # Create the final directory name with the timestamp
    final_dir = f"{base_dir_descriptor}_{timestamp}"
    
    # Create the directory
    os.makedirs(final_dir, exist_ok=True)
    
    return final_dir


In [None]:
def plot_training_curves(train_data, val_data, save_dir, data_descriptor='Loss', show_plot=False):
    # Plot the loss curves
    plt.figure(figsize=(10, 5))
    plt.plot(train_data, label=f'Training {data_descriptor}')
    plt.plot(val_data, label=f'Validation {data_descriptor}')
    plt.xlabel('Epoch')
    plt.ylabel(f'{data_descriptor}')
    plt.title(f'Training and Validation {data_descriptor} Over Epochs')
    plt.legend()

    # Save the plot
    plot_path = os.path.join(save_dir, f'{data_descriptor}_plot.png')
    plt.savefig(plot_path)
    #print(f'{data_descriptor} plot saved to {plot_path}')

    # Optionally, display the plot
    if show_plot == True:
        plt.show()

In [None]:
def save_stats_and_models(model, epoch, val_loss, val_pck, save_dir, 
                     best_val_loss=None, best_val_pck=None, 
                     final_model=False, train_loss_list=None, val_loss_list=None, train_pck_list=None, val_pck_list=None):
    """
    Saves the best models based on validation loss, PCK value, and final model.
    Saves the train and val curves and results for training
    
    Parameters:
    - model (torch.nn.Module): The PyTorch model to be saved.
    - epoch (int): The current epoch number.
    - val_loss (float): The current validation loss.
    - val_pck (float): The current validation PCK value.
    - save_dir (str): The directory where the models will be saved.
    - best_val_loss (float): The best validation loss seen so far.
    - best_val_pck (float): The best validation PCK value seen so far.
    - final_model (bool): If True, saves the final model after all epochs.
    - train_loss_list (list): List of all the loss values from each epoch
    
    Returns:
    - best_val_loss (float): Updated best validation loss.
    - best_val_pck (float): Updated best validation PCK value.
    - model_save_path_best_val_loss
    - model_save_path_best_val_pck
    - final_model_path
    """
    model_save_path_best_val_loss = None
    model_save_path_best_val_pck = None
    
    # Check if the current model has the lowest validation loss
    if best_val_loss is None or val_loss < best_val_loss:
        best_val_loss = val_loss
        model_name = f'best_val_loss_model_epoch_{epoch}_PCK_{val_pck:.4f}_loss_{val_loss:.4f}.pth'
        model_save_path_best_val_loss = os.path.join(save_dir, model_name)
        torch.save(model.state_dict(), model_save_path_best_val_loss)
        print(f'New best model saved with lowest validation loss to {model_save_path_best_val_loss}')
    
    # Check if the current model has the highest validation PCK
    if best_val_pck is None or val_pck > best_val_pck:
        best_val_pck = val_pck
        model_save_path_best_val_pck = os.path.join(save_dir, f'best_val_pck_model_epoch_{epoch}_PCK_{val_pck:.4f}_loss_{val_loss:.4f}.pth')
        torch.save(model.state_dict(), model_save_path_best_val_pck)
        print(f'New best model saved with highest validation PCK to {model_save_path_best_val_pck}')
    
    # Save the final model and perform final stats evaluation and save
    if final_model:
        final_model_path = os.path.join(save_dir, f'final_model_epoch_{epoch}_PCK_{val_pck:.4f}_loss_{val_loss:.4f}.pth')
        torch.save(model.state_dict(), final_model_path)
        print(f'Final model saved to {final_model_path}')
        plot_training_curves(train_loss_list, val_loss_list, save_dir, 'Loss', show_plot=True)
        plot_training_curves(train_pck_list, val_pck_list, save_dir, data_descriptor='PCK@0.1', show_plot=True)
        return best_val_loss, best_val_pck, model_save_path_best_val_loss, model_save_path_best_val_pck, final_model_path
    
    return best_val_loss, best_val_pck, model_save_path_best_val_loss, model_save_path_best_val_pck, None

In [None]:
def augment_data(aug, train_imgs_array, train_kp_array): 
    #
    print('augmenting data...')
    
    # unnorm kp
    train_kp_array_abs = unnorm_keypoints_arr(train_kp_array, train_imgs_array)

    # specify augmentations
    # lrflip
    seq_lrflip = iaa.Sequential([
        iaa.Fliplr(1.0)
    ])
    # rotate clock
    seq_rotate_clock = iaa.Sequential([
        iaa.Affine(rotate=(5, 20)),
    ])
    #rotate anticlock
    seq_rotate_anticlock = iaa.Sequential([
        iaa.Affine(rotate=(-20, -5)),
    ])

    # apply augmentation
    #lrflip
    train_imgs_array_aug_lrflip, train_kp_array_aug_lrflip_abs = apply_aug(train_imgs_array, train_kp_array_abs, seq_lrflip)
    # rotate clock
    train_imgs_array_aug_rclock, train_kp_array_aug_rclock_abs = apply_aug(train_imgs_array, train_kp_array_abs, seq_rotate_clock)
    #rotate anticlock
    train_imgs_array_aug_ranticlock, train_kp_array_aug_ranticlock_abs = apply_aug(train_imgs_array, train_kp_array_abs, seq_rotate_anticlock)
    #translat
    train_imgs_array_aug_trans, train_kp_array_aug_trans = apply_aug_translate(train_imgs_array, train_kp_array_abs)

    # norm the aug kp
    #lrflip
    train_kp_array_aug_lrflip_norm = norm_keypoints_arr(train_kp_array_aug_lrflip_abs, train_imgs_array_aug_lrflip)  
    # rotate clock
    train_kp_array_aug_rclock_norm = norm_keypoints_arr(train_kp_array_aug_rclock_abs, train_imgs_array_aug_rclock)
    #rotate anticlock
    train_kp_array_aug_ranticlock_norm = norm_keypoints_arr(train_kp_array_aug_ranticlock_abs, train_imgs_array_aug_ranticlock)
    #translat
    train_kp_array_aug_trans_norm = norm_keypoints_arr(train_kp_array_aug_trans, train_imgs_array_aug_trans)

    # combine augmented arrays to original array
    #save to image array
    train_imgs_array_aug = np.concatenate((train_imgs_array, train_imgs_array_aug_lrflip), axis=0)
    train_imgs_array_aug = np.concatenate((train_imgs_array_aug, train_imgs_array_aug_rclock), axis=0)
    train_imgs_array_aug = np.concatenate((train_imgs_array_aug, train_imgs_array_aug_ranticlock), axis=0)
    train_imgs_array_aug = np.concatenate((train_imgs_array_aug, train_imgs_array_aug_trans), axis=0)
    #save to kp array
    train_kp_array_aug = np.concatenate((train_kp_array, train_kp_array_aug_lrflip_norm), axis=0)
    train_kp_array_aug = np.concatenate((train_kp_array_aug, train_kp_array_aug_rclock_norm), axis=0)
    train_kp_array_aug = np.concatenate((train_kp_array_aug, train_kp_array_aug_ranticlock_norm), axis=0)
    train_kp_array_aug = np.concatenate((train_kp_array_aug, train_kp_array_aug_trans_norm), axis=0)

    if aug == 2:
        #put additional augmentations here and then concat the arrays
        pass

    return train_imgs_array_aug, train_kp_array_aug


In [None]:
def load_data(dataset, augmentation, crop_extension):

    print('laoding data ...')

    DATA_PARENT_PATH = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/'

    if dataset == 1: # Simple dataset 

        # variables
        dataset_name = 'PE_Simple'
        crop_extension = '_crop_220x220.jpg'# cropsize extension

        # loading ids to a list
        path = DATA_PARENT_PATH + dataset_name + '/ids_test_bbox.txt'
        ids_test_bbox = load_file_to_list(path)
        path = DATA_PARENT_PATH + dataset_name + '/ids_val_bbox.txt'
        ids_val_bbox=load_file_to_list(path)
        path = DATA_PARENT_PATH + dataset_name + '/ids_train_bbox.txt'
        ids_train_bbox=load_file_to_list(path)
        path = DATA_PARENT_PATH + dataset_name + '/ids_test.txt'
        ids_test=load_file_to_list(path)
        path = DATA_PARENT_PATH + dataset_name + '/ids_val.txt'
        ids_val=load_file_to_list(path)
        path = DATA_PARENT_PATH + dataset_name + '/ids_train.txt'
        ids_train=load_file_to_list(path)

        # load image data to array 
        img_dir = '/images'
        path = DATA_PARENT_PATH + dataset_name + img_dir + '/test'
        test_imgs_array = load_image_data(ids_test_bbox, path, crop_extension)
        path = DATA_PARENT_PATH + dataset_name + img_dir + '/val'
        val_imgs_array = load_image_data(ids_val_bbox, path, crop_extension)
        path = DATA_PARENT_PATH + dataset_name + img_dir + '/train'
        train_imgs_array = load_image_data(ids_train_bbox, path, crop_extension)

        # load annoation to df and set datatyoes
        anno_dir = '/annotation'
        path = DATA_PARENT_PATH + dataset_name + anno_dir + '/test_annotation_simple.json'
        df_full_annotation_norm_test = json_to_df(path)
        df_full_annotation_norm_test = set_dtypes_df_full_annotation_abs(df_full_annotation_norm_test)
        path = DATA_PARENT_PATH + dataset_name + anno_dir + '/val_annotation_simple.json'
        df_full_annotation_norm_val = json_to_df(path)
        df_full_annotation_norm_val = set_dtypes_df_full_annotation_abs(df_full_annotation_norm_val)
        path = DATA_PARENT_PATH + dataset_name + anno_dir + '/train_annotation_simple.json'
        df_full_annotation_norm_train = json_to_df(path)
        df_full_annotation_norm_train = set_dtypes_df_full_annotation_abs(df_full_annotation_norm_train)

        # create lists with col names (potentially remove)
        id_cols = df_full_annotation_norm_test.iloc[:, :3].columns.to_list()
        bbox_cols = df_full_annotation_norm_test.iloc[:, 3:7].columns.to_list()
        kp_cols = df_full_annotation_norm_test.iloc[:, 7:23].columns.to_list()

        # load the kps to arrays
        test_kp_array = create_data_lists(df_full_annotation_norm_test, kp_cols)
        val_kp_array = create_data_lists(df_full_annotation_norm_val, kp_cols)
        train_kp_array = create_data_lists(df_full_annotation_norm_train, kp_cols)

        if augmentation > 1: 
            train_imgs_array, train_kp_array = augment_data(augmentation, train_imgs_array, train_kp_array)
        
        train_kp_array, _ = replace_out_of_img_kp(train_kp_array)

        return train_imgs_array, val_imgs_array, test_imgs_array, train_kp_array, val_kp_array, test_kp_array
    


In [None]:
def run_train(model, dataset, aug, optimizer, lr, batch_size, num_epochs, crop_extension):

    print('training ...')

    # load data
    train_imgs_array, val_imgs_array, test_imgs_array, train_kp_array, val_kp_array, test_kp_array = load_data(dataset, aug, crop_extension)

    # define model
    if model == 1:
        model = DeepPoseModel(nkeypoints=8).to('cuda')  # Move the model to GPU
        descriptor = 'DeepPose'
    
    if dataset == 1:
        descriptor = descriptor + '_Simple'
    
    if aug == 1:
        descriptor = descriptor + '_noAug'
    if aug == 2:
        descriptor = descriptor + '_simpleAug'
    if aug == 3:
        descriptor = descriptor + '_largeAug'

    if optimizer == 1:
        # Define your optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        descriptor = descriptor + '_Adam'
    
    # get naming convention
    descriptor = descriptor + '_' + str(lr) + '_' + str(batch_size) + '_' + str(num_epochs)
    # create save dir
    save_dir = create_timestamped_dir(descriptor)
    
    # create tensors from arrays and load to a PT dataloader
    #train
    train_imgs_tensor = torch.tensor(train_imgs_array, dtype=torch.float32).permute(0, 3, 1, 2)#.to('cuda')
    train_kp_tensor = torch.tensor(train_kp_array, dtype=torch.float32)
    train_dataset = TensorDataset(train_imgs_tensor, train_kp_tensor)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    #val
    val_imgs_tensor = torch.tensor(val_imgs_array, dtype=torch.float32).permute(0, 3, 1, 2)#.to('cuda')
    val_kp_tensor = torch.tensor(val_kp_array, dtype=torch.float32)#.to('cuda')
    val_dataset = TensorDataset(val_imgs_tensor, val_kp_tensor)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)# shuffle omly needs to be true for traing
    #test
    test_imgs_tensor = torch.tensor(test_imgs_array, dtype=torch.float32).permute(0, 3, 1, 2)#.to('cuda')
    test_kp_tensor = torch.tensor(test_kp_array, dtype=torch.float32)#.to('cuda')
    test_dataset = TensorDataset(test_imgs_tensor, test_kp_tensor)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)# shuffle omly needs to be true for traing
        
    # Lists to store the training and validation loss for each epoch
    train_losses = []
    val_losses = []
    train_pck_list = []
    val_pck_list = []
    best_val_loss = None
    best_val_pck = None
    model_save_path_best_val_loss = None
    model_save_path_best_val_pck = None

    print('start training loop ...')
    # training loop
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_train_loss = 0.0
        running_pck_01 = 0.0
        running_pck_val_01 = 0.0

        # loop for a single batch
        for batch_images, batch_keypoints in train_dataloader:

            # Move the data to the GPU
            batch_images = batch_images.to('cuda')
            batch_keypoints = batch_keypoints.to('cuda')

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(batch_images)
            # Compute the loss
            loss = masked_mse(batch_keypoints, outputs)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Accumulate the loss
            running_train_loss += loss.item()

            # compute metrics
            pck_01 = pck_metric(batch_keypoints, outputs, 0.1)

            # accumulate metrics
            running_pck_01 += pck_01.item()

        # calculate average loss for epoch
        avg_train_loss = running_train_loss / len(train_dataloader)
        # calculate average pck for epoch
        avg_pck_01 = running_pck_01 / len(train_dataloader)
    
        # populate train losses list for evaluation
        train_losses.append(avg_train_loss)
        # populate train pck list for evaluation
        train_pck_list.append(avg_pck_01)

        # evalution for training phase
        model.eval()
        running_val_loss = 0.0

        with torch.no_grad(): # dont update weights

            # evaluation loop for a single batch
            for batch_images, batch_keypoints in val_dataloader:

                # Move the data to the GPU
                batch_images = batch_images.to('cuda')
                batch_keypoints = batch_keypoints.to('cuda')
                
                # forward pass
                outputs = model(batch_images)
                # Compute the loss
                loss = masked_mse(batch_keypoints, outputs)

                # Accumulate the loss
                running_val_loss += loss.item()

                # compute metrics
                pck_01_val = pck_metric(batch_keypoints, outputs, 0.1)

                # accumulate metrics
                running_pck_val_01 += pck_01_val.item()

        # calculate average loss for epoch
        avg_val_loss = running_val_loss / len(val_dataloader)
        # calculate average pck for epoch
        avg_val_pck_01 = running_pck_val_01 / len(val_dataloader)

        # populate train losses list for evaluation
        val_losses.append(avg_val_loss)
        # populate train pck list for evaluation
        val_pck_list.append(avg_val_pck_01)

        # save best performing models based on the PCK and loss as well as the stats
        best_val_loss, best_val_pck, model_save_path_best_val_loss_temp, model_save_path_best_val_pck_temp, _ = save_stats_and_models(
        model, epoch + 1, avg_val_loss, avg_val_pck_01, save_dir, 
        best_val_loss, best_val_pck)

        if model_save_path_best_val_loss_temp:
            model_save_path_best_val_loss = model_save_path_best_val_loss_temp
        
        if model_save_path_best_val_pck_temp:
            model_save_path_best_val_pck = model_save_path_best_val_pck_temp

        print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Train PCK0.1: {avg_pck_01:.4f}, Val PCK0.1: {avg_val_pck_01:.4f}')
        
    best_val_loss, best_val_pck, _, _, final_model_path = save_stats_and_models(model, num_epochs, avg_val_loss, avg_val_pck_01, save_dir, 
                    best_val_loss, best_val_pck, final_model=True, train_loss_list=train_losses, 
                    val_loss_list=val_losses, train_pck_list=train_pck_list, val_pck_list=val_pck_list)
    
    return save_dir, model_save_path_best_val_loss, model_save_path_best_val_pck, final_model_path, val_dataloader, test_dataloader,\
        train_imgs_array, val_imgs_array, test_imgs_array, train_kp_array, val_kp_array, test_kp_array



In [None]:
def load_model_eval(model_path, model_class, device='cuda'):
    """
    Loads a PyTorch model from a .pth file.

    Parameters:
    - model_path (str): The path to the .pth model file.
    - model_class (torch.nn.Module): The class of the model to instantiate.
    - device (str): The device to load the model onto ('cuda' or 'cpu').

    Returns:
    - model (torch.nn.Module): The loaded PyTorch model.
    """
    # Instantiate the model class
    model = model_class().to(device)
    
    # Load the state dictionary into the model
    model.load_state_dict(torch.load(model_path, map_location=device))
    
    # Set the model to evaluation mode
    model.eval()
    
    return model

In [None]:
def evaluate_pck(model, dataloader, threshold=0.2, device='cuda'):
    """
    Evaluates the average PCK over an entire dataset.

    Parameters:
    - model: The trained model to evaluate.
    - dataloader: A DataLoader providing the data to evaluate on.
    - threshold: The PCK threshold distance (default is 0.2).
    - device: The device to perform computations on (default is 'cuda').

    Returns:
    - average_pck: The average PCK over the entire dataset.
    """
    total_pck = 0.0
    num_batches = 0

    with torch.no_grad():  # Disable gradient computation
        for batch_images, batch_keypoints in dataloader:
            batch_images = batch_images.to(device)
            batch_keypoints = batch_keypoints.to(device)

            # Get model predictions
            outputs = model(batch_images)

            # Compute PCK for the current batch
            pck = pck_metric(batch_keypoints, outputs, threshold)
            total_pck += pck.item()
            num_batches += 1

    average_pck = total_pck / num_batches
    
    return average_pck


In [None]:
def evaluate_pck_per_keypoint(model, dataloader, num_keypoints=8, threshold=0.2, device='cuda'):
    """
    Evaluates the average PCK for each keypoint individually.

    Parameters:
    - model: The trained model to evaluate.
    - dataloader: A DataLoader providing the data to evaluate on.
    - num_keypoints: The number of keypoints in the dataset.
    - threshold: The PCK threshold distance (default is 0.2).
    - device: The device to perform computations on (default is 'cuda').

    Returns:
    - keypoint_pcks: A list of average PCK values for each keypoint.
    """
    #model.eval()  # Set the model to evaluation mode
    total_pck_per_keypoint = torch.zeros(num_keypoints, device=device)
    total_visable_kp = torch.zeros(num_keypoints, device=device)
    num_batches = 0

    with torch.no_grad():  # Disable gradient computation
        for batch_images, batch_keypoints in dataloader:
            batch_images = batch_images.to(device)
            batch_keypoints = batch_keypoints.to(device)

            # Get model predictions
            outputs = model(batch_images)

            # Create a mask for visible keypoints
            mask = (batch_keypoints != -10.0).float().to(device)

            # Compute the Euclidean distances for each keypoint
            distances = torch.sqrt((outputs[:, ::2] - batch_keypoints[:, ::2]) ** 2 +
                                   (outputs[:, 1::2] - batch_keypoints[:, 1::2]) ** 2)

            # Normalize the distances
            Norm_head_lowerbody = torch.sqrt((batch_keypoints[:, 0] - batch_keypoints[:,10]) ** 2 +
                                             (batch_keypoints[:, 1] - batch_keypoints[:, 11]) ** 2)
            normalized_distances = distances / Norm_head_lowerbody[:, None]

            # Compute correct keypoints (distance <= threshold) for each keypoint
            correct_keypoints_per_keypoint = (normalized_distances <= threshold).float() * mask[:, ::2]

            # Accumulate PCK per keypoint
            total_pck_per_keypoint += correct_keypoints_per_keypoint.sum(dim=0)
            total_visable_kp += mask[:, ::2].sum(dim=0)
            num_batches += 1

    # Average PCK per keypoint
    #keypoint_pcks = (total_pck_per_keypoint / mask[:, ::2].sum(dim=0)).cpu().numpy()
    keypoint_pcks = (total_pck_per_keypoint / total_visable_kp).cpu().numpy()
    return keypoint_pcks

In [None]:
def full_pck_evaluation(model, val_dataloader, test_dataloader):

    print('calculating PCK ...')

    # create lists for pck at different thresholds
    avg_pck_test_list = []
    avg_pck_val_list = []
    avg_pck_per_kp_val_list = []
    avg_pck_per_kp_test_list = []
    
    # create a for loop to get PCK at 0.01 to 0.2
    for i in range (1, 21):

        # get pck threshold
        pck_threshold = (i/100)

        # calculate average pck
        avg_pck_val = evaluate_pck(model, val_dataloader, threshold=pck_threshold)
        avg_pck_test = evaluate_pck(model, test_dataloader, threshold=pck_threshold)

        # calculate average pck per kp
        avg_pck_per_kp_val = evaluate_pck_per_keypoint(model, val_dataloader, threshold=pck_threshold)
        avg_pck_per_kp_test = evaluate_pck_per_keypoint(model, test_dataloader, threshold=pck_threshold)

        if i == 5:
            # capture pck@0.05
            avg_pck_val_005 = avg_pck_val
            avg_pck_test_005 = avg_pck_test

        if i == 10:
            # capture pck@0.1
            avg_pck_val_01 = avg_pck_val
            avg_pck_test_01 = avg_pck_test

        if i == 20:
            # capture pck@0.2
            avg_pck_val_02 = avg_pck_val
            avg_pck_test_02 = avg_pck_test

        # save to lists
        avg_pck_test_list.append(avg_pck_test)
        avg_pck_val_list.append(avg_pck_val)
        avg_pck_per_kp_val_list.append(avg_pck_per_kp_val)
        avg_pck_per_kp_test_list.append(avg_pck_per_kp_test)

    return avg_pck_val_list, avg_pck_test_list, avg_pck_per_kp_val_list, avg_pck_per_kp_test_list, \
        avg_pck_val_005, avg_pck_test_005, avg_pck_val_01, avg_pck_test_01, avg_pck_val_02, avg_pck_test_02


In [None]:
def gpu_inference_time(model, dummy_input):
        
    # Warm up GPU to avoid initial overheads
    with torch.no_grad():
        for _ in range(10):
            _ = model(dummy_input)

    # Synchronize GPU and measure the time
    torch.cuda.synchronize()  # Ensure all previous CUDA operations are complete
    start_time = time.time()
    with torch.no_grad():
        _ = model(dummy_input)
    torch.cuda.synchronize()  # Wait for all CUDA operations to finish
    end_time = time.time()

    # Calculate elapsed time
    return (end_time - start_time)


In [None]:
def cpu_inference_time(model, dummy_input):

    # move model and dummy data to cpu
    model.to('cpu')
    dummy_input = dummy_input.to('cpu')

    # Warm up to avoid initial overheads affecting the time
    with torch.no_grad():
        for _ in range(10):
            _ = model(dummy_input)

    # Time the forward pass
    start_time = time.time()
    with torch.no_grad():
        _ = model(dummy_input)
    end_time = time.time()

    # move model and dummy iput back to gpu
    model.to('cuda')
    dummy_input.to('cuda')

    # Calculate elapsed time
    return (end_time - start_time)

In [None]:
def load_pck_to_dict(arr_list):
        
    # Define the keys for the dictionary
    keys = ['head', 'beak', 'body_top', 'rflipper', 'lflipper', 'body_bottom', 'rfoot', 'lfoot']

    # Initialize the dictionary with empty lists for each key
    results_dict = {key: [] for key in keys}

    # Populate the dictionary with values from the arrays
    for array in arr_list:
        for i, key in enumerate(keys):
            results_dict[key].append(array[i])

    return results_dict

In [None]:
def load_results_to_dict(save_dir, avg_pck_val_005, avg_pck_test_005, avg_pck_val_01, avg_pck_test_01, avg_pck_val_02, avg_pck_test_02,
                         total_params, total_flops, gpu_inf_time, cpu_inf_time, param_dict, flops_extend, avg_pck_test_list, 
                         avg_pck_per_kp_test_list, avg_pck_val_list, avg_pck_per_kp_val_list, num_train_imgs, num_val_imgs, num_test_imgs):

    description = save_dir.split('/')[-1]
    #print(description)

    #avg_pck_test_dict = load_pck_to_dict(avg_pck_test_list)
    avg_pck_per_kp_test_dict = load_pck_to_dict(avg_pck_per_kp_test_list)
    #avg_pck_val_dict = load_pck_to_dict(avg_pck_val_list)
    avg_pck_per_kp_val_dict = load_pck_to_dict(avg_pck_per_kp_val_list)

    results_dict = {
    'description': '',  # Placeholder for a string description
    'pck005': None,  # Placeholder for PCK@0.05 variable
    'pck01': None,  # Placeholder for PCK@0.1 variable
    'pck02': None,  # Placeholder for PCK@0.2 variable
    'total_params': None,  # Placeholder for total parameters variable
    'GFLOPs': None,  # Placeholder for GFLOPs variable
    'GPU_inf(ms)': None,  # Placeholder for GPU inference time variable
    'CPU_inf(ms)': None,  # Placeholder for CPU inference time variable
    'param_dict': {},  # Placeholder for parameter dictionary
    'flops_dict': {},  # Placeholder for FLOPs dictionary
    'PCK001-02': [],  # Placeholder for PCK@0.01-0.2 list
    'PCK001-02_per_kp': {},  # Placeholder for PCK per joint dictionary
    'val_PCK001-02': [],  # Placeholder for PCK@0.01-0.2 list
    'val_PCK001-02_per_kp': {},  # Placeholder for PCK per joint dictionary
    'val_pck005': None,  # Placeholder for PCK@0.05 variable
    'val_pck01': None,  # Placeholder for PCK@0.1 variable
    'val_pck02': None,  # Placeholder for PCK@0.2 variable
    'num_train_imgs': None, # number of train imgs
    'num_val_imgs': None, # number of train imgs
    'num_test_imgs': None, # number of train imgs
}
    
    results_dict['description'] = description
    results_dict['pck005'] = avg_pck_test_005 
    results_dict['pck01'] = avg_pck_test_01  
    results_dict['pck02'] = avg_pck_test_02 
    results_dict['total_params'] = total_params  
    results_dict['GFLOPs'] = (total_flops/1e9)
    results_dict['GPU_inf(ms)'] = gpu_inf_time*1000  
    results_dict['CPU_inf(ms)'] = cpu_inf_time*1000  
    results_dict['param_dict'] = param_dict  
    results_dict['flops_dict'] = flops_extend 
    results_dict['PCK001-02'] = avg_pck_test_list
    results_dict['PCK001-02_per_kp'] = avg_pck_per_kp_test_dict
    results_dict['val_PCK001-02'] = avg_pck_val_list
    results_dict['val_PCK001-02_per_kp'] = avg_pck_per_kp_val_dict
    results_dict['val_pck005'] = avg_pck_val_005 
    results_dict['val_pck01'] = avg_pck_val_01  
    results_dict['val_pck02'] = avg_pck_val_02
    results_dict['num_train_imgs'] = num_train_imgs
    results_dict['num_val_imgs'] = num_val_imgs
    results_dict['num_test_imgs'] = num_test_imgs

    return results_dict

In [None]:
def convert_numpy_types(obj):
    """
    Convert numpy types in an object to their native Python equivalents.
    """
    if isinstance(obj, np.ndarray):
        return obj.tolist()  # Convert numpy arrays to lists
    elif isinstance(obj, np.generic):
        return obj.item()  # Convert numpy scalars to native Python types
    elif isinstance(obj, dict):
        return {key: convert_numpy_types(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_numpy_types(element) for element in obj]
    else:
        return obj

In [None]:
def save_dict_to_json(data_dict, save_dir):
    """
    Saves a dictionary to a JSON file.

    Parameters:
    - data_dict: The dictionary to save.
    - save_dir: The directory path where the JSON file will be saved.

    Returns:
    - None
    """
    # Convert any numpy types in the dictionary to native Python types
    data_dict = convert_numpy_types(data_dict)

    # Ensure the save directory exists
    os.makedirs(os.path.dirname(save_dir), exist_ok=True)

    # json name
    results_json = save_dir+'/results.json'
    
    # Save the dictionary to a JSON file
    with open(results_json, 'w') as json_file:
        json.dump(data_dict, json_file, indent=4)

In [None]:
# def plot_comparison(img, pred_keypoints, true_keypoints, save_dir, img_num, nkeypoints=8, keypoint_labels=None, connections = [(0, 1), (0, 2), (2, 3), (2, 4), (2, 5), (5, 6), (5, 7)]):
#     """
#     Plots predicted keypoints vs. ground truth keypoints on the same image.

#     Parameters:
#     - img: The image on which to plot the keypoints.
#     - pred_keypoints: The predicted keypoints (flattened x, y coordinates).
#     - true_keypoints: The ground truth keypoints (flattened x, y coordinates).
#     - save_dir: Directory to save the result to
#     - img_num: image number that is getting compared
#     - nkeypoints:  Optional The number of keypoints (default=8).
#     - keypoint_labels: Optional list of keypoint labels to display next to the keypoints.
#     - connections: OPtional list of tupels defining the connections between kps
#     """

#     fig = plt.figure(figsize=(8, 8), dpi=100)
#     plt.imshow(img)
    
#     # Extract x and y coordinates for predicted keypoints
#     pred_x_keypoints = pred_keypoints[::2]
#     pred_y_keypoints = pred_keypoints[1::2]
    
#     # Extract x and y coordinates for ground truth keypoints
#     true_x_keypoints = true_keypoints[::2]
#     true_y_keypoints = true_keypoints[1::2]

#     # Plot skeleton for true keypoints
#     for (i, j) in connections:
#         plt.plot([true_x_keypoints[i], true_x_keypoints[j]], 
#                  [true_y_keypoints[i], true_y_keypoints[j]], 
#                  'r-', linewidth=1)

#     # Plot skeleton for predicted keypoints
#     for (i, j) in connections:
#         plt.plot([pred_x_keypoints[i], pred_x_keypoints[j]], 
#                  [pred_y_keypoints[i], pred_y_keypoints[j]], 
#                  'g-', linewidth=1)
    
#     # Plot predicted keypoints
#     plt.scatter(pred_x_keypoints, pred_y_keypoints, marker='o', c='g', s=100, label='Predicted', edgecolor='black')
    
#     # Plot ground truth keypoints
#     plt.scatter(true_x_keypoints, true_y_keypoints, marker='x', c='r', s=100, label='Ground Truth')
    
#     # If labels are provided, add them to the plot
#     if keypoint_labels is not None:
#         for i, (x, y) in enumerate(zip(true_x_keypoints, true_y_keypoints)):
#             plt.text(x, y, keypoint_labels[i], fontsize=8, color='white',
#                      bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

#     # If labels are provided, add them to the plot
#     if keypoint_labels is not None:
#         for i, (x, y) in enumerate(zip(pred_x_keypoints, pred_y_keypoints)):
#             plt.text(x, y, keypoint_labels[i], fontsize=8, color='white',
#                      bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

#     # Add a legend to differentiate between predicted and ground truth keypoints
#     plt.legend()

#     # Save the plot
#     plot_path = os.path.join(save_dir, f'Comparison of predicted and ground truth for img {img_num}.png')
#     plt.savefig(plot_path)
#     #print(f'{data_descriptor} plot saved to {plot_path}')

#     plt.show()


In [None]:
# def predict_and_plot(model_path, start_img, end_img, model_class=DeepPoseModel, device='cuda'):
#     """
#     Loads a model, predicts keypoints for a range of images, and plots the predicted keypoints 
#     versus ground truth keypoints on the same image. The images with plotted keypoints are then 
#     saved to a specified directory.

#     Parameters:
#     - model_path: The file path to the saved model's .pth file.
#     - start_img: The starting index of the images in the validation set to process.
#     - end_img: The ending index of the images in the validation set to process (exclusive).
#     - model_class: Optional. The class of the model architecture to instantiate and load 
#                    with the saved weights (default=DeepPoseModel).
#     - device: Optional. The device to run the model on ('cuda' for GPU, 'cpu' for CPU; default='cuda').
    
#     Returns:
#     - None. The function saves the images with plotted keypoints to the directory derived from the 
#             model path.
#     """

#     # get img lists
#     img_arr = val_imgs_array[start_img:end_img,:,:,:]
#     true_kp_arr = val_kp_array[start_img:end_img,:]

#     # Load the model
#     model = load_model(model_path, model_class, device=device)

#     # Get predictions
#     predictions = predict(model, img_arr, device=device)
#     #print(predictions)

#     # DeNorm predictions 
#     predictions_abs = []
#     true_kp_arr_abs = []
#     for i, kp in enumerate(predictions):

#         img_size = img_arr[i].shape
#         #print(img_size)

#         #unNorm each prediction
#         true_kp_abs, missing_kp = unnorm_keypoints(img_size, true_kp_arr[i])
#         #print(missing_kp)
#         kp_abs, missing_kp = unnorm_keypoints(img_size, kp, kp_to_null=missing_kp)
#         #print(missing_kp)
        

#         # save result to new list
#         predictions_abs.append(kp_abs)
#         true_kp_arr_abs.append(true_kp_abs)

#     #print(predictions_abs)

#     # get the save directory parent (where the images will be saved)
#     save_dir = model_path.rsplit('/',1)[0]

#     # labels
#     labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']

#     for i, kp in enumerate(predictions_abs):

#         plot_comparison(img_arr[i], predictions_abs[i], true_kp_arr_abs[i], save_dir, img_num=i+start_img)#, keypoint_labels=labels)

In [None]:
# def predict(model, images, img_is_tensor=False, device='cuda'):
#     """
#     Generates predictions from a PyTorch model given an array of images.

#     Parameters:
#     - model (torch.nn.Module): The PyTorch model to use for predictions.
#     - images (np.array): Array of images (e.g., shape: (num_images, 220, 220, 3)).
#     - device (str): The device to run the model on ('cuda' or 'cpu').

#     Returns:
#     - predictions (np.array): Array of predictions (e.g., keypoints for each image).
#     """
#     # Convert images to PyTorch tensor and move to the specified device
#     if not img_is_tensor:
#         images_tensor = torch.tensor(images, dtype=torch.float32).permute(0, 3, 1, 2).to(device)
    
#     # Forward pass through the model to get predictions
#     with torch.no_grad():
#         predictions = model(images_tensor)
    
#     # Convert predictions back to a NumPy array and move to CPU if necessary
#     predictions = predictions.cpu().numpy() if device == 'cuda' else predictions.numpy()
    
#     return predictions

In [None]:
def predict(model, images, img_is_tensor=False, device='cuda'):
    """
    Generates predictions from a PyTorch model given an array of images.

    Parameters:
    - model (torch.nn.Module): The PyTorch model to use for predictions.
    - images (np.array): Array of images (e.g., shape: (num_images, 220, 220, 3)).
    - device (str): The device to run the model on ('cuda' or 'cpu').

    Returns:
    - predictions (np.array): Array of predictions (e.g., keypoints for each image).
    """
    # Convert images to PyTorch tensor and move to the specified device
    if not img_is_tensor:
        images_tensor = torch.tensor(images, dtype=torch.float32).permute(0, 3, 1, 2).to(device)
    
    # Forward pass through the model to get predictions
    with torch.no_grad():
        predictions = model(images_tensor)
    
    # Convert predictions back to a NumPy array and move to CPU if necessary
    predictions = predictions.cpu().numpy() if device == 'cuda' else predictions.numpy()
    
    return predictions

In [None]:
def plot_comparison(img, pred_keypoints, true_keypoints, save_dir, img_num, nkeypoints=8, keypoint_labels=None, connections = [(0, 1), (0, 2), (2, 3), (2, 4), (2, 5), (5, 6), (5, 7)]):
    """
    Plots predicted keypoints vs. ground truth keypoints on the same image.

    Parameters:
    - img: The image on which to plot the keypoints.
    - pred_keypoints: The predicted keypoints (flattened x, y coordinates).
    - true_keypoints: The ground truth keypoints (flattened x, y coordinates).
    - save_dir: Directory to save the result to
    - img_num: image number that is getting compared
    - nkeypoints:  Optional The number of keypoints (default=8).
    - keypoint_labels: Optional list of keypoint labels to display next to the keypoints.
    - connections: OPtional list of tupels defining the connections between kps
    """

    fig = plt.figure(figsize=(8, 8), dpi=100)
    plt.imshow(img)
    
    # Extract x and y coordinates for predicted keypoints
    pred_x_keypoints = pred_keypoints[::2]
    pred_y_keypoints = pred_keypoints[1::2]
    
    # Extract x and y coordinates for ground truth keypoints
    true_x_keypoints = true_keypoints[::2]
    true_y_keypoints = true_keypoints[1::2]

    # Plot skeleton for true keypoints
    for (i, j) in connections:
        plt.plot([true_x_keypoints[i], true_x_keypoints[j]], 
                 [true_y_keypoints[i], true_y_keypoints[j]], 
                 'r-', linewidth=1)

    # Plot skeleton for predicted keypoints
    for (i, j) in connections:
        plt.plot([pred_x_keypoints[i], pred_x_keypoints[j]], 
                 [pred_y_keypoints[i], pred_y_keypoints[j]], 
                 'g-', linewidth=1)
    
    # Plot predicted keypoints
    plt.scatter(pred_x_keypoints, pred_y_keypoints, marker='o', c='g', s=100, label='Predicted', edgecolor='black')
    
    # Plot ground truth keypoints
    plt.scatter(true_x_keypoints, true_y_keypoints, marker='x', c='r', s=100, label='Ground Truth')
    
    # If labels are provided, add them to the plot
    if keypoint_labels is not None:
        for i, (x, y) in enumerate(zip(true_x_keypoints, true_y_keypoints)):
            plt.text(x, y, keypoint_labels[i], fontsize=8, color='white',
                     bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

    # If labels are provided, add them to the plot
    if keypoint_labels is not None:
        for i, (x, y) in enumerate(zip(pred_x_keypoints, pred_y_keypoints)):
            plt.text(x, y, keypoint_labels[i], fontsize=8, color='white',
                     bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

    # Add a legend to differentiate between predicted and ground truth keypoints
    plt.legend()

    
    # Ensure the save directory exists
    os.makedirs(save_dir, exist_ok=True)

    # Save the plot
    part = save_dir.split('/')[-1]
    plot_path = os.path.join(save_dir, f'{part}_vs_GT_img_{img_num}.png')
    plt.savefig(plot_path)
    #print(f'{data_descriptor} plot saved to {plot_path}')

    plt.show()


In [None]:
def predict_and_plot(model, img_arr, kp_arr, start_img, end_img, save_dir, keypoint_display=False, device='cuda'):
    """
    Loads a model, predicts keypoints for a range of images, and plots the predicted keypoints 
    versus ground truth keypoints on the same image. The images with plotted keypoints are then 
    saved to a specified directory.

    Parameters:
    - model_path: The file path to the saved model's .pth file.
    - start_img: The starting index of the images in the validation set to process.
    - end_img: The ending index of the images in the validation set to process (exclusive).
    - model_class: Optional. The class of the model architecture to instantiate and load 
                   with the saved weights (default=DeepPoseModel).
    - device: Optional. The device to run the model on ('cuda' for GPU, 'cpu' for CPU; default='cuda').
    
    Returns:
    - None. The function saves the images with plotted keypoints to the directory derived from the 
            model path.
    """

    # get img lists
    img_arr = img_arr[start_img:end_img,:,:,:]
    true_kp_arr = kp_arr[start_img:end_img,:]

    # Get predictions
    predictions = predict(model, img_arr, device=device)
    #print(predictions)

    # DeNorm predictions 
    predictions_abs = []
    true_kp_arr_abs = []
    for i, kp in enumerate(predictions):

        img_size = img_arr[i].shape
        #print(img_size)

        #unNorm each prediction
        true_kp_abs, missing_kp = unnorm_keypoints(img_size, true_kp_arr[i])
        #print(missing_kp)
        kp_abs, missing_kp = unnorm_keypoints(img_size, kp, kp_to_null=missing_kp)
        #print(missing_kp)
        

        # save result to new list
        predictions_abs.append(kp_abs)
        true_kp_arr_abs.append(true_kp_abs)

    # labels
    labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']

    for i, kp in enumerate(predictions_abs):

        if keypoint_display:
            plot_comparison(img_arr[i], predictions_abs[i], true_kp_arr_abs[i], save_dir, img_num=i+start_img, keypoint_labels=labels)
        else:
            plot_comparison(img_arr[i], predictions_abs[i], true_kp_arr_abs[i], save_dir, img_num=i+start_img)#, keypoint_labels=labels)

In [None]:
# if not running directly off the back of training then set this to true
# straight_eval = True
def run_eval(save_dir, model_save_path_best_val_loss, model_save_path_best_val_pck, final_model_path, model=None, val_dataloader=None, \
             test_dataloader=None, train_imgs_array=None, val_imgs_array=None, test_imgs_array=None, val_kp_array=None, \
                test_kp_array=None, straight_eval=True):
    if straight_eval:
        # run just evalutation

        # set model paths
        save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/PE/DeepPose_Simple_SimpleAug_batch16_2024-08-22_16-34-02'
        model_save_path_best_val_loss = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/PE/DeepPose_Simple_SimpleAug_batch16_2024-08-22_16-34-02/best_val_loss_model_epoch_22_PCK_0.6433_loss_0.0053.pth'
        model_save_path_best_val_pck = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/PE/DeepPose_Simple_SimpleAug_batch16_2024-08-22_16-34-02/best_val_loss_model_epoch_22_PCK_0.6433_loss_0.0053.pth'
        final_model_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/PE/DeepPose_Simple_SimpleAug_batch16_2024-08-22_16-34-02/final_model_epoch_30_PCK_0.5835_loss_0.0076.pth'

        # load the data
        train_imgs_array, val_imgs_array, test_imgs_array, train_kp_array, val_kp_array, test_kp_array = load_data(1, 1, crop_extension)
        #val
        val_imgs_tensor = torch.tensor(val_imgs_array, dtype=torch.float32).permute(0, 3, 1, 2)#.to('cuda')
        val_kp_tensor = torch.tensor(val_kp_array, dtype=torch.float32)#.to('cuda')
        val_dataset = TensorDataset(val_imgs_tensor, val_kp_tensor)
        val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)# shuffle omly needs to be true for traing
        #test
        test_imgs_tensor = torch.tensor(test_imgs_array, dtype=torch.float32).permute(0, 3, 1, 2)#.to('cuda')
        test_kp_tensor = torch.tensor(test_kp_array, dtype=torch.float32)#.to('cuda')
        test_dataset = TensorDataset(test_imgs_tensor, test_kp_tensor)
        test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)# shuffle omly needs to be true for traing

    # create evaluation dir
    save_dir_eval = create_timestamped_dir('/evaluation', save_dir)

    if model == 1:
        model_class = DeepPoseModel
        input_size = (1, 3, 220, 220)#(torch.randn((1,3,220,220)),)

    # load model for evaluation
    model = load_model_eval(model_save_path_best_val_pck, model_class)

    avg_pck_val_list, avg_pck_test_list, avg_pck_per_kp_val_list, avg_pck_per_kp_test_list,\
        avg_pck_val_005, avg_pck_test_005, avg_pck_val_01, avg_pck_test_01, avg_pck_val_02, avg_pck_test_02 \
        = full_pck_evaluation(model, val_dataloader, test_dataloader)

    print('Finding other metrics ...')

    # Calculate number of FLOPs
    dummy_input = torch.randn(input_size).to('cuda') # move the dummy input to GPU
    flops = FlopCountAnalysis(model, dummy_input)
    total_flops = flops.total()
    flops_extend = flops.by_module_and_operator() * 2
    flops_2 = torchprofile.profile_macs(model, dummy_input) * 2
    print(f'Total FLOPs: {flops_2}')
    print(flop_count_table(flops))

    # Calculate the number of params
    param_dict = parameter_count(model)
    total_params = param_dict['']
    # print(total_params)

    # Calculate inference time GPU
    gpu_inf_time = gpu_inference_time(model, dummy_input)
    # print(gpu_inf_time)
    # print(gpu_inf_time*1e3)

    # Calculate inference time CPU
    cpu_inf_time = cpu_inference_time(model, dummy_input)
    # print(cpu_inf_time)
    # print(cpu_inf_time*1e3)

    # load results to a dict
    results = load_results_to_dict(save_dir, avg_pck_val_005, avg_pck_test_005, avg_pck_val_01, avg_pck_test_01, avg_pck_val_02, avg_pck_test_02,\
                        total_params, total_flops, gpu_inf_time, cpu_inf_time, param_dict, flops_extend, avg_pck_test_list, \
                        avg_pck_per_kp_test_list, avg_pck_val_list, avg_pck_per_kp_val_list, train_imgs_array.shape[0], \
                        val_imgs_array.shape[0], test_imgs_array.shape[0])

    # save 
    print('saving metrics ...')
    save_dict_to_json(results, save_dir_eval)

    # plot and save images
    print('plotting and saving some result images ...')
    # get random images with seed so that it is consistant
    # Set a fixed seed for reproducibility
    fixed_seed = 42
    random.seed(fixed_seed)

    # val - create loop to produce and save 5 random images to the save dir
    # Generate unique random numbers for validation
    val_random_nums = random.sample(range(val_imgs_array.shape[0]), 5)

    for i, random_num in enumerate(val_random_nums):

        print('VALIDATION', i)
        predict_and_plot(model, val_imgs_array, val_kp_array, random_num, random_num+1, save_dir_eval+'/val_predictions')

    # test - create loop to produce and save 5 random images to the save dir
    # Generate unique random numbers for testing
    test_random_nums = random.sample(range(test_imgs_array.shape[0]), 15)
    for i, random_num in enumerate(test_random_nums):
        print(f'TEST {i}')
        # get a random image in the list
        predict_and_plot(model, test_imgs_array, test_kp_array, random_num, random_num+1, save_dir_eval+'/test_predictions')

    print('done!')

In [None]:
def plot_img_and_keypoint(img, keypoints, nkeypoints=8, keypoint_labels=None):
  fig = plt.figure(figsize=(8, 8), dpi=100)
  plt.imshow(img)
  #print(keypoints)
  x_keypoints = keypoints[::2]
  y_keypoints = keypoints[1::2]
  #print(x_keypoints)
  #print(y_keypoints)
  plt.scatter(x_keypoints, y_keypoints, marker='.', c=np.arange(nkeypoints), cmap='jet')

    # If labels are provided, add them to the plot
  if keypoint_labels is not None:
      for i, (x, y) in enumerate(zip(x_keypoints, y_keypoints)):
          plt.text(x, y, keypoint_labels[i], fontsize=12, color='white', 
                    bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

  plt.show()

In [14]:
def display_all_cols(df):
    with pd.option_context('display.max_columns', None):
        print(df.head())


# 0.1. Model input and run train and evalutaion

In [None]:
# Run a model
model = 1 # 1. DeepPose
dataset = 1 # 1. Simple - need to think how to handle crop image size - this should be moved to the model rather maybe
augmentation = 2 # 1. no aug, 2. simple aug, 3. large aug*
batch_size = 16
num_epochs = 300
learning_rate = 0.00005
optimizer = 1 # 1. Adam
crop_extension = '_crop_220x220.jpg'# cropsize extension

In [None]:
# run just train
save_dir, model_save_path_best_val_loss, model_save_path_best_val_pck, final_model_path, val_dataloader, test_dataloader, \
    train_imgs_array, val_imgs_array, test_imgs_array, train_kp_array, val_kp_array, test_kp_array\
        = run_train(model, dataset, augmentation, optimizer, learning_rate, batch_size, num_epochs, crop_extension)

In [None]:
# run just evaluation
run_eval(save_dir, model_save_path_best_val_loss, model_save_path_best_val_pck, final_model_path, model=model)

In [None]:
# run both train and evaluation

# train
save_dir, model_save_path_best_val_loss, model_save_path_best_val_pck, final_model_path, val_dataloader, test_dataloader, \
    train_imgs_array, val_imgs_array, test_imgs_array, train_kp_array, val_kp_array, test_kp_array\
        = run_train(model, dataset, augmentation, optimizer, learning_rate, batch_size, num_epochs, crop_extension)

# evaluate
run_eval(save_dir, model_save_path_best_val_loss, model_save_path_best_val_pck, final_model_path, model=model, val_dataloader=val_dataloader, \
             test_dataloader=test_dataloader, train_imgs_array=train_imgs_array, val_imgs_array=val_imgs_array, test_imgs_array=test_imgs_array,\
                 val_kp_array=val_kp_array, test_kp_array=test_kp_array, straight_eval=False)

# 4. Create a SLEAP model

## 4.1. Get the data ready for use

# 3. Create a DLC model 

## 3.1. create project

In [12]:
# create new project
config_path = deeplabcut.create_new_project('DLC_simple_dataset','model1', ['/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDetect1/raw_videos/flap1.mp4', '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDetect1/raw_videos/flap2.mp4'],
              copy_videos=False, multianimal=True, working_directory = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model')

Created "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/videos"
Created "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/labeled-data"
Created "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/training-datasets"
Created "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/dlc-models"
Attempting to create a symbolic link of the video ...
Created the symlink of /home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDetect1/raw_videos/flap1.mp4 to /home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/videos/flap1.mp4
Created the symlink of /home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDet

In [13]:
print(config_path)

/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/config.yaml


## 3.2. load the annotations previously annotated to the labelled data directory

In [6]:
#config_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-02/config.yaml'
config_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/config.yaml'

In [16]:
#df = pd.read_hdf('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-02/CollectedData_Matt.h5')
#df2 = pd.read_hdf('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/labeled-data/flap2/CollectedData_model1.h5')
df2 = pd.read_hdf('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/training-datasets/iteration-0/UnaugmentedDataSet_DLC_simple_datasetSep15/CollectedData_model1.h5')

In [18]:
# df = pd.read_hdf('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-02/labeled-data/flap1/Filtered_CollectedData_model1.h5')

In [17]:
display_all_cols(df2)

NameError: name 'display_all_cols' is not defined

In [15]:
display_all_cols(df)

scorer                                                                                                     Matt  \
individuals                                                                                                 ID1   
bodyparts                                                                                                  Head   
coords                                                                                                        x   
frames_to_label PenguinPi1_video_2024-02-26_15-15 frame_PenguinPi1_video_2024-02-26_15-15.mp4_0.jpg   59.451922   
                                                  frame_PenguinPi1_video_2024-02-26_15-15.mp4_1.jpg   56.044090   
                                                  frame_PenguinPi1_video_2024-02-26_15-15.mp4_10.jpg  58.547084   
                                                  frame_PenguinPi1_video_2024-02-26_15-15.mp4_11.jpg  60.695114   
                                                  frame_PenguinPi1_video_2024-02

In [21]:
# Example usage
# Assuming your DataFrame is `df`
df_updated = add_missing_ids(df2)

In [22]:
display_all_cols(df_updated)

scorer                              model1                          \
individuals                            ID1                           
bodyparts                             Head                    Beak   
coords                                   x          y            x   
labeled-data flap2 img001.png  1273.169432  -0.474729  1413.080089   
                   img024.png  1414.688258  65.460179  1541.733567   
                   img034.png  1528.868220  86.366369  1646.264518   
                   img054.png  1580.329611  92.799043  1713.807594   
                   img074.png  1638.223676  67.068347  1776.526165   

scorer                                                              \
individuals                                                          
bodyparts                                     Body_top               
coords                                  y            x           y   
labeled-data flap2 img001.png   30.080472  1266.736758  198.938162   
                   

In [18]:
df2.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,scorer,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1
Unnamed: 0_level_1,Unnamed: 1_level_1,individuals,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,...,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20
Unnamed: 0_level_2,Unnamed: 1_level_2,bodyparts,Head,Head,Beak,Beak,Body_top,Body_top,RFlipper_mid,RFlipper_mid,LFlipper_mid,LFlipper_mid,...,RFlipper_mid,RFlipper_mid,LFlipper_mid,LFlipper_mid,Body_bottom,Body_bottom,RFoot,RFoot,LFoot,LFoot
Unnamed: 0_level_3,Unnamed: 1_level_3,coords,x,y,x,y,x,y,x,y,x,y,...,x,y,x,y,x,y,x,y,x,y
labeled-data,flap12,img002.png,1232.96522,388.702042,1318.198149,430.514422,1083.405552,486.800319,1072.148373,618.670134,,,...,,,,,,,,,,
labeled-data,flap12,img007.png,1245.830568,422.47358,1319.806318,467.502297,1072.148373,501.273835,1113.960753,684.605041,,,...,,,,,,,,,,
labeled-data,flap12,img028.png,1236.181557,424.081749,1282.818443,470.718634,1072.148373,504.490172,1212.05903,660.482514,970.833759,575.249585,...,,,,,,,,,,
labeled-data,flap12,img051.png,1160.597639,411.216401,,,1096.2709,506.098341,1097.879068,586.506765,956.360243,597.763944,...,,,,,,,,,,
labeled-data,flap12,img058.png,1160.597639,408.000064,,,1068.932036,502.882004,1094.662731,528.612699,948.3194,502.882004,...,,,,,,,,,,
labeled-data,flap12,img065.png,1168.638481,406.391895,1189.544671,435.338928,1062.499362,490.016656,1205.626356,610.629292,912.939694,586.506765,...,,,,,,,,,,
labeled-data,flap12,img074.png,1179.89566,404.783727,1208.842693,438.555265,1078.581047,494.841161,1202.410019,650.833504,974.050096,607.412955,...,,,,,,,,,,
labeled-data,flap12,img088.png,1154.164965,412.824569,1189.544671,453.028781,1057.674856,501.273835,1165.422144,554.343395,941.886726,491.624825,...,,,,,,,,,,
labeled-data,flap12,img112.png,1096.2709,401.56739,,,1086.621889,493.232993,1210.450862,654.04984,951.535737,628.319145,...,,,,,,,,,,
labeled-data,flap12,img145.png,1060.891193,390.310211,,,1056.066688,488.408488,1195.977345,522.180026,892.033504,535.045373,...,,,,,,,,,,


In [23]:
df_updated.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,scorer,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1,model1
Unnamed: 0_level_1,Unnamed: 1_level_1,individuals,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,...,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20
Unnamed: 0_level_2,Unnamed: 1_level_2,bodyparts,Head,Head,Beak,Beak,Body_top,Body_top,RFlipper_mid,RFlipper_mid,LFlipper_mid,LFlipper_mid,...,RFlipper_mid,RFlipper_mid,LFlipper_mid,LFlipper_mid,Body_bottom,Body_bottom,RFoot,RFoot,LFoot,LFoot
Unnamed: 0_level_3,Unnamed: 1_level_3,coords,x,y,x,y,x,y,x,y,x,y,...,x,y,x,y,x,y,x,y,x,y
labeled-data,flap2,img001.png,1273.169432,-0.474729,1413.080089,30.080472,1266.736758,198.938162,1094.662731,399.959221,1430.769943,380.6612,...,,,,,,,,,,
labeled-data,flap2,img024.png,1414.688258,65.460179,1541.733567,161.950287,1347.145182,245.575048,1067.323867,324.375303,1474.190491,408.000064,...,,,,,,,,,,
labeled-data,flap2,img034.png,1528.86822,86.366369,1646.264518,173.207466,1316.589981,195.721825,1139.691449,309.901787,,,...,,,,,,,,,,
labeled-data,flap2,img054.png,1580.329611,92.799043,1713.807594,160.342119,1384.133057,184.464646,1252.263242,356.538673,1318.198149,182.856477,...,,,,,,,,,,
labeled-data,flap2,img074.png,1638.223676,67.068347,1776.526165,126.570581,1413.080089,153.909445,1400.214742,367.795852,,,...,,,,,,,,,,
labeled-data,flap2,img097.png,1670.387045,86.366369,1805.473197,147.476771,1438.810785,171.599298,1327.84716,232.7097,,,...,,,,,,,,,,
labeled-data,flap2,img105.png,1676.819719,87.974537,1810.297703,145.868602,1469.365986,169.99113,1380.91672,269.697575,,,...,,,,,,,,,,
labeled-data,flap2,img121.png,1675.211551,70.284684,1789.391512,105.664391,1499.921187,161.950287,1462.933312,263.264901,1329.455329,178.031972,...,,,,,,,,,,
labeled-data,flap2,img135.png,1720.240268,62.243842,1810.297703,89.582706,1535.300893,134.611423,1610.884812,408.000064,,,...,,,,,,,,,,
labeled-data,flap2,img149.png,1737.930121,52.594831,1811.905871,86.366369,1559.423421,157.125782,1517.61104,263.264901,1388.957562,250.399553,...,,,,,,,,,,


In [10]:
#print(df)

scorer                                  Ro                           \
individuals                            ID1                            
bodyparts                             Head                     Beak   
coords                                   x           y            x   
labeled-data flap1 img010.png  1232.965220   84.758200  1326.238992   
                   img014.png  1232.965220   78.325526  1331.063497   
                   img024.png  1232.965220   75.109190  1331.063497   
                   img048.png  1226.532546   84.758200  1326.238992   
                   img071.png  1183.111997  116.921570  1200.801851   
                   img082.png  1160.597639   96.015380  1167.030313   
                   img086.png  1168.638481   79.933695  1179.895660   
                   img097.png          NaN         NaN  1247.438736   
                   img120.png  1339.104340    4.349777  1461.325144   
                   img136.png  1329.455329   73.501021  1388.957562   

score

In [None]:
# # Define the body parts to retain
# body_parts_to_keep = ['Head', 'Beak', 'Body_top', 'RFlipper_mid', 'LFlipper_mid', 'Body_bottom', 'RFoot', 'LFoot']

# # Filter the dataframe by selecting only the relevant body parts
# filtered_df = df.loc[:, (slice(None), slice(None), body_parts_to_keep)]

# # Save the filtered dataframe back to an HDF5 file
# filtered_file_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-02/labeled-data/flap1/CollectedData_model1_edit.h5'
# filtered_df.to_hdf(filtered_file_path, key='df', mode='w')

In [21]:
# df.to_hdf('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-02/labeled-data/flap1/CollectedData_model1_edit.h5',key='df', mode='w')

In [11]:
# df = pd.read_csv('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-02/labeled-data/flap1/CollectedData_model1.csv')

In [16]:
df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,scorer,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt,Matt
Unnamed: 0_level_1,Unnamed: 1_level_1,individuals,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,ID1,...,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20,ID20
Unnamed: 0_level_2,Unnamed: 1_level_2,bodyparts,Head,Head,Beak,Beak,Body_top,Body_top,RFlipper_mid,RFlipper_mid,LFlipper_mid,LFlipper_mid,...,RFlipper_mid,RFlipper_mid,LFlipper_mid,LFlipper_mid,Body_bottom,Body_bottom,RFoot,RFoot,LFoot,LFoot
Unnamed: 0_level_3,Unnamed: 1_level_3,coords,x,y,x,y,x,y,x,y,x,y,...,x,y,x,y,x,y,x,y,x,y
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_0.jpg,59.451922,294.394441,,,40.93857,291.127378,,,,,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_1.jpg,56.04409,289.686473,,,43.629254,285.765999,,,,,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_10.jpg,58.547084,289.329461,,,48.343945,287.181432,,,,,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_11.jpg,60.695114,290.940483,,,47.538434,287.986943,,,,,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_12.jpg,56.667559,286.912928,,,48.343945,287.181432,,,,,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_13.jpg,59.621099,290.67198,,,49.417959,288.255446,,,,,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_14.jpg,552.335913,90.637598,,,535.711135,80.175453,,,538.577476,95.510378,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_15.jpg,57.271329,292.683434,,,48.341359,287.23833,,,,,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_16.jpg,551.04606,89.777695,,,533.704696,78.885599,,,538.434159,94.507158,...,,,,,,,,,,
frames_to_label,PenguinPi1_video_2024-02-26_15-15,frame_PenguinPi1_video_2024-02-26_15-15.mp4_17.jpg,550.329474,90.494281,,,536.42772,79.745502,,,540.29728,95.653695,...,,,,,,,,,,


In [7]:
def add_missing_ids(df, total_ids=20):
    """
    Adds missing individual IDs to the DataFrame until it reaches a specified number of total IDs.
    
    Parameters:
    - df (pd.DataFrame): The DataFrame with the labeled data and existing IDs.
    - total_ids (int): The total number of IDs that should be in the DataFrame. Defaults to 20.
    
    Returns:
    - pd.DataFrame: A new DataFrame with missing IDs added as NaNs.
    """
    # Get the existing IDs
    existing_ids = df.columns.get_level_values('individuals').unique()

    # Check how many IDs need to be added
    missing_ids_count = total_ids - len(existing_ids)
    
    if missing_ids_count <= 0:
        # No missing IDs to add
        return df
    
    # Create missing IDs
    new_ids = [f'ID{i}' for i in range(len(existing_ids) + 1, total_ids + 1)]
    
    # Create empty DataFrame with NaNs for new IDs
    new_columns = pd.MultiIndex.from_product([df.columns.get_level_values('scorer').unique(),
                                              new_ids, 
                                              df.columns.get_level_values('bodyparts').unique(),
                                              df.columns.get_level_values('coords').unique()],
                                             names=df.columns.names)
    
    # Create DataFrame with NaNs for new IDs
    new_df = pd.DataFrame(np.nan, index=df.index, columns=new_columns)
    
    # Concatenate the old DataFrame with the new one
    result_df = pd.concat([df, new_df], axis=1)
    
    return result_df


In [8]:
def replace_scorer(df, old_scorer, new_scorer):
    """
    Replaces the scorer in the MultiIndex of the DataFrame.

    Parameters:
    - df: The DataFrame with a MultiIndex.
    - old_scorer: The scorer value to be replaced.
    - new_scorer: The new scorer value.

    Returns:
    - df: The DataFrame with the scorer replaced.
    """

    # Rename the 'scorer' level of the index
    df.columns = df.columns.set_levels(
        [new_scorer if scorer == old_scorer else scorer for scorer in df.columns.levels[0]], level=0
    )
    
    return df

In [12]:
def process_labelled_images(parent_dir, model_name, base_dir, kp_to_keep):
    """
    adjust data have correct naming convention and to have only relevant keypoints, load the first .h5 file from each, and save as new .h5 and .csv.
    Also, generate a list of video paths with crop information and save it to a .txt file in base_dir. (to be put in the config file later).
    
    NOTES: Adjusted csvs are not it the same format... But that's not really an issue, just something to note 
    
    Parameters:
    - parent_dir (str): Path to the parent directory containing nested directories.
    - model_name (str): The model name to use for renaming the saved files.
    - base_dir (str): Path to the base directory where the video_paths.txt will be saved.
    - kp_to_keep (list): list of kp names that need to be kept (filter others out)
    """
    # Create a list to store video paths and crop information
    video_paths_list = []
    
    # Walk through the parent directory and its subdirectories
    for root, dirs, files in os.walk(parent_dir):
        # If there are any .h5 files in the current directory
        h5_files = [f for f in files if f.endswith('.h5')]
        if h5_files:
            # Load the first .h5 file
            h5_file_path = os.path.join(root, h5_files[0])
            
            # Load .h5 into DataFrame
            df = pd.read_hdf(h5_file_path)

            # Filter the dataframe by selecting only the relevant body parts
            #filtered_df = df.loc[:, (slice(None), slice(None), kp_to_keep)]

            # replace scorer with new scorer
            #filtered_df = replace_scorer(filtered_df, old_scorer='Ro', new_scorer='model1')
            #filtered_df = replace_scorer(df, old_scorer='Ro', new_scorer='model1')

            # add ids until there are 20 ids 
            filtered_df = add_missing_ids(df)
            
            # Save to new .h5 file
            new_h5_filename = f"CollectedData_{model_name}.h5"
            new_h5_path = os.path.join(root, new_h5_filename)
            filtered_df.to_hdf(new_h5_path, key='df', mode='w')

            # Save to .csv file
            new_csv_filename = f"CollectedData_{model_name}.csv"
            new_csv_path = os.path.join(root, new_csv_filename)
            filtered_df.to_csv(new_csv_path, index=False)

            # Delete the old .h5 file
            #if os.path.exists(h5_file_path):
                #os.remove(h5_file_path)
                #print(f"Deleted old file: {h5_file_path}")

            # Delete the old .csv file with the same name, if it exists
            
            #old_csv_file = h5_file_path.replace('.h5', '.csv')
            #if os.path.exists(old_csv_file):
                #os.remove(old_csv_file)
                #print(f"Deleted old file: {old_csv_file}")
            
            # Get the directory name 
            dir_name = os.path.basename(root)
            
            # Create video path string
            video_path = f"  /home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDetect1/raw_videos/{dir_name}.mp4"
            crop_info = "    crop: 0, 1920, 0, 1080"
            video_paths_list.append(f"{video_path}:\n{crop_info}")

    # Save video paths to a .txt file in the base directory
    txt_file_path = os.path.join(base_dir, "video_paths.txt")
    with open(txt_file_path, 'w') as txt_file:
        txt_file.write("\n".join(video_paths_list))

    print("Processing complete. Files saved.")

In [10]:
parent_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/labeled-data'
model_name = 'model1'
base_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15'
kp_to_keep = ['Head', 'Beak', 'Body_top', 'RFlipper_mid', 'LFlipper_mid', 'Body_bottom', 'RFoot', 'LFoot']

In [13]:
process_labelled_images(parent_dir, model_name, base_dir, kp_to_keep)

Processing complete. Files saved.


## 3.3. check the labels and relabel where necessary

In [14]:
deeplabcut.check_labels(config_path, visualizeindividuals=True)

Creating images with labels by model1.


100%|██████████| 10/10 [00:07<00:00,  1.28it/s]
100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
100%|██████████| 10/10 [00:07<00:00,  1.25it/s]
100%|██████████| 10/10 [00:06<00:00,  1.55it/s]
100%|██████████| 10/10 [00:07<00:00,  1.32it/s]
100%|██████████| 10/10 [00:06<00:00,  1.50it/s]
100%|██████████| 10/10 [00:06<00:00,  1.46it/s]
100%|██████████| 10/10 [00:08<00:00,  1.24it/s]
100%|██████████| 10/10 [00:09<00:00,  1.10it/s]
100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
100%|██████████| 10/10 [00:08<00:00,  1.19it/s]
100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
100%|██████████| 10/10 [00:08<00:00,  1.21it/s]
100%|██████████| 10/10 [00:07<00:00,  1.34it/s]
100%|██████████| 10/10 [00:06<00:00,  1.45it/s]
100%|██████████| 10/10 [00:07<00:00,  1.39it/s]
100%|██████████| 10/10 [00:06<00:00,  1.45it/s]
100%|██████████| 10/10 [00:06<00:00,  1.49it/s]
100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
100%|██████████| 10/10 [00:08<00:00,  1.

If all the labels are ok, then use the function 'create_training_dataset' to create the training dataset!





In [26]:
deeplabcut.label_frames(config_path)

## 3.4. Load the Dataset

In [18]:
config_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/config3.yaml'

In [19]:
# paf_graph='config' use the skeleton defined
print('hello')
deeplabcut.create_multianimaltraining_dataset(config_path, paf_graph='config')#, augmenter_type='imgaug', )

hello
Using `skeleton` from the config file as a paf_graph. Data-driven skeleton will not be computed.
Utilizing the following graph: [[0, 1], [0, 2], [2, 3], [2, 4], [2, 5], [5, 6], [5, 7]]
Creating training data for: Shuffle: 1 TrainFraction:  0.99


100%|██████████| 356/356 [00:00<00:00, 3694.56it/s]

The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!





In [5]:
#deeplabcut.create_multianimaltraining_dataset(config_path, paf_graph='config', engine=Engine.TENSORFLOW)

Using `skeleton` from the config file as a paf_graph. Data-driven skeleton will not be computed.
Utilizing the following graph: [[0, 1], [0, 2], [2, 3], [2, 4], [2, 5], [5, 6], [5, 7]]
Downloading a ImageNet-pretrained model from http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz....
test editting
Creating training data for: Shuffle: 5 TrainFraction:  0.99


100%|██████████| 356/356 [00:00<00:00, 1432.11it/s]


AttributeError: 'str' object has no attribute 'aliases'

### 3.4.1. Look at the breakdown of the created pickle file. 

In [11]:

def load_pickle(file_path):
    """
    Load a pickle file and return its content.
    
    Parameters:
    - file_path (str): Path to the pickle file.
    
    Returns:
    - data: The data loaded from the pickle file.
    """
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
    
    return data

def save_pickle(data, file_path):
    """
    Save data back to a pickle file.
    
    Parameters:
    - data: The data to save.
    - file_path (str): Path to save the pickle file.
    """
    with open(file_path, 'wb') as file:
        pickle.dump(data, file)
    
    print(f"Data saved to {file_path}")

# # Load the pickle file
# file_path = 'path_to_your_pickle_file.pickle'
# data = load_pickle(file_path)

# # View the data
# print("Data loaded from pickle file:", data)

# # Modify the data (example: if it's a dictionary, you can update a key-value pair)
# if isinstance(data, dict):
#     data['new_key'] = 'new_value'
#     print("Updated data:", data)

# # Save the modified data back to the pickle file
# save_pickle(data, file_path)

In [15]:
#file_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-02/training-datasets/iteration-0/UnaugmentedDataSet_DLC_simple_datasetSep2/DLC_simple_dataset_model195shuffle1.pickle'
file_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-02/training-datasets/iteration-0/UnaugmentedDataSet_DLC_simple_datasetSep2/Documentation_data-DLC_simple_dataset_95shuffle1.pickle'
file_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/evaluation-results/iteration-2/DLC_simple_datasetSep18-trainset99shuffle1/DLC_dlcrnetms5_DLC_simple_datasetSep18shuffle1_5000-snapshot-5000_meta.pickle'
file_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/evaluation-results/iteration-0/DLC_simple_datasetSep18-trainset99shuffle1/DLC_dlcrnetms5_DLC_simple_datasetSep18shuffle1_50000-snapshot-50000_meta.pickle'
file_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/evaluation-results/iteration-2/DLC_simple_datasetSep18-trainset99shuffle1/DLC_dlcrnetms5_DLC_simple_datasetSep18shuffle1_5000-snapshot-5000_full.pickle'
#file_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/evaluation-results/iteration-0/DLC_simple_datasetSep18-trainset99shuffle1/DLC_dlcrnetms5_DLC_simple_datasetSep18shuffle1_50000-snapshot-50000_full.pickle'
# Load the pickle file

data = load_pickle(file_path)

print(data)

# # Save it as a JSON file for easier editing
# json_file_path = file_path.replace('.pickle', '.json')
# with open(json_file_path, 'w') as json_file:
#     json.dump(data, json_file, indent=4)

# print(f"Pickle data saved as JSON to {json_file_path}")

{'metadata': {'nms radius': 5.0, 'minimal confidence': 0.01, 'sigma': 1, 'PAFgraph': [[0, 1], [0, 2], [2, 3], [2, 4], [2, 5], [5, 6], [5, 7]], 'PAFinds': array([0, 1, 2, 3, 4, 5, 6]), 'all_joints': [[0], [1], [2], [3], [4], [5], [6], [7]], 'all_joints_names': ['Head', 'Beak', 'Body_top', 'RFlipper_mid', 'LFlipper_mid', 'Body_bottom', 'RFoot', 'LFoot'], 'stride': 8.0}}


## 3.5. Train the default model

In [24]:
config_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/config3.yaml'

In [None]:
# Pytorch
#deeplabcut.train_network(config_path, shuffle=1, batch_size=1)#, device='cpu')

In [34]:
# Tensorflow
deeplabcut.train_network(config_path, shuffle=1, maxiters=5000)#, device='cpu')

Config:
{'all_joints': [[0], [1], [2], [3], [4], [5], [6], [7]],
 'all_joints_names': ['Head',
                      'Beak',
                      'Body_top',
                      'RFlipper_mid',
                      'LFlipper_mid',
                      'Body_bottom',
                      'RFoot',
                      'LFoot'],
 'alpha_r': 0.02,
 'apply_prob': 0.5,
 'batch_size': 1,
 'contrast': {'clahe': True,
              'claheratio': 0.1,
              'histeq': True,
              'histeqratio': 0.1},
 'convolution': {'edge': False,
                 'emboss': {'alpha': [0.0, 1.0], 'strength': [0.5, 1.5]},
                 'embossratio': 0.1,
                 'sharpen': False,
                 'sharpenratio': 0.3},
 'crop_pad': 0,
 'crop_sampling': 'hybrid',
 'crop_size': [400, 400],
 'cropratio': 0.05,
 'dataset': 'training-datasets/iteration-3/UnaugmentedDataSet_DLC_simple_datasetSep18/DLC_simple_dataset_model199shuffle1.pickle',
 'dataset_type': 'multi-animal-imgaug',
 'de

Selecting multi-animal trainer
Activating limb prediction...
Batch Size is 1
Getting specs multi-animal-imgaug 7 8
Loading already trained DLC with backbone: resnet_50


2024-09-19 17:20:45.714464: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-19 17:20:45.720497: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-19 17:20:45.723505: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

ResourceExhaustedError: Graph execution error:

Detected at node 'beta1_power/Initializer/initial_value' defined at (most recent call last):
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance
      app.start()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
      self._run_once()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
      handle._run()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_334442/3291506601.py", line 2, in <cell line: 2>
      deeplabcut.train_network(config_path, shuffle=1, maxiters=5000)#, device='cpu')
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/training.py", line 262, in train_network
      train(
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/train_multianimal.py", line 166, in train
      learning_rate, train_op, tstep = get_optimizer(total_loss, cfg)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/train.py", line 118, in get_optimizer
      train_op = slim.learning.create_train_op(loss_op, optimizer)
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/learning.py", line 436, in create_train_op
      return training.create_train_op(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/training/training.py", line 463, in create_train_op
      grad_updates = optimizer.apply_gradients(grads, global_step=global_step)
Node: 'beta1_power/Initializer/initial_value'
OOM when allocating tensor of shape [] and type float
	 [[{{node beta1_power/Initializer/initial_value}}]]

Original stack trace for 'beta1_power/Initializer/initial_value':
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
    self._run_once()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
    handle._run()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
    await self.process_one()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 499, in process_one
    await dispatch(*args)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
    await result
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
    reply_content = await reply_content
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
    res = shell.run_cell(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
    return super().run_cell(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
    result = self._run_cell(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
    return runner(coro)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
    coro.send(None)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_334442/3291506601.py", line 2, in <cell line: 2>
    deeplabcut.train_network(config_path, shuffle=1, maxiters=5000)#, device='cpu')
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/training.py", line 262, in train_network
    train(
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/train_multianimal.py", line 166, in train
    learning_rate, train_op, tstep = get_optimizer(total_loss, cfg)
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/train.py", line 118, in get_optimizer
    train_op = slim.learning.create_train_op(loss_op, optimizer)
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/learning.py", line 436, in create_train_op
    return training.create_train_op(
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/training/training.py", line 463, in create_train_op
    grad_updates = optimizer.apply_gradients(grads, global_step=global_step)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/training/optimizer.py", line 697, in apply_gradients
    self._create_slots(var_list)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/training/adam.py", line 192, in _create_slots
    self._create_non_slot_variable(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/training/optimizer.py", line 920, in _create_non_slot_variable
    v = variable_scope.variable(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/variables.py", line 285, in __call__
    return cls._variable_v1_call(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/variables.py", line 226, in _variable_v1_call
    return previous_getter(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/variables.py", line 219, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/variable_scope.py", line 2707, in default_variable_creator
    return resource_variable_ops.ResourceVariable(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/variables.py", line 289, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1768, in __init__
    self._init_from_args(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1957, in _init_from_args
    initial_value = ops.convert_to_tensor(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/profiler/trace.py", line 183, in wrapped
    return func(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/framework/ops.py", line 1642, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/framework/tensor_conversion_registry.py", line 48, in _default_conversion_function
    return constant_op.constant(value, dtype, name=name)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/framework/constant_op.py", line 268, in constant
    return _constant_impl(value, dtype, shape, name, verify_shape=False,
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/framework/constant_op.py", line 290, in _constant_impl
    const_tensor = g._create_op_internal(  # pylint: disable=protected-access
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/framework/ops.py", line 3814, in _create_op_internal
    ret = Operation(


## 3.6. Evaluate model

In [23]:
deeplabcut.evaluate_network(config=config_path)#, plotting=True)#, rescale=True)

Config:
{'all_joints': [[0], [1], [2], [3], [4], [5], [6], [7]],
 'all_joints_names': ['Head',
                      'Beak',
                      'Body_top',
                      'RFlipper_mid',
                      'LFlipper_mid',
                      'Body_bottom',
                      'RFoot',
                      'LFoot'],
 'batch_size': 1,
 'crop_pad': 0,
 'dataset': 'training-datasets/iteration-0/UnaugmentedDataSet_DLC_simple_datasetSep18/DLC_simple_dataset_model199shuffle1.pickle',
 'dataset_type': 'multi-animal-imgaug',
 'deterministic': False,
 'fg_fraction': 0.25,
 'global_scale': 0.8,
 'init_weights': '/home/matthew/.local/lib/python3.9/site-packages/deeplabcut/pose_estimation_tensorflow/models/pretrained/resnet_v1_50.ckpt',
 'intermediate_supervision': False,
 'intermediate_supervision_layer': 12,
 'location_refinement': True,
 'locref_huber_loss': True,
 'locref_loss_weight': 1.0,
 'locref_smooth': False,
 'locref_stdev': 7.2801,
 'log_dir': 'log',
 'mean_pixel': [12

Running  DLC_dlcrnetms5_DLC_simple_datasetSep18shuffle1_50000  with # of trainingiterations: 50000
Activating extracting of PAFs


2024-09-19 16:25:38.138207: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-19 16:25:38.144573: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-19 16:25:38.148566: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Network Evaluation underway...


0it [00:00, ?it/s]2024-09-19 16:25:38.834407: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:417] Loaded runtime CuDNN library: 8.1.0 but source was compiled with: 8.6.0.  CuDNN library needs to have matching major version and equal or higher minor version. If using a binary install, upgrade your CuDNN library.  If building from sources, make sure the library loaded at runtime is compatible with the version specified during compile configuration.
2024-09-19 16:25:38.835242: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at conv_ops.cc:1068 : UNIMPLEMENTED: DNN library is not found.
2024-09-19 16:25:38.835285: I tensorflow/core/common_runtime/executor.cc:1197] [/job:localhost/replica:0/task:0/device:GPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): UNIMPLEMENTED: DNN library is not found.
	 [[{{node resnet_v1_50/conv1/Conv2D}}]]
2024-09-19 16:25:38.835304: I tensorflow/core/common_runtime/executo

UnimplementedError: Graph execution error:

Detected at node 'resnet_v1_50/conv1/Conv2D' defined at (most recent call last):
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance
      app.start()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
      self._run_once()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
      handle._run()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_334442/1255777010.py", line 1, in <cell line: 1>
      deeplabcut.evaluate_network(config=config_path)#, plotting=True)#, rescale=True)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/evaluate.py", line 648, in evaluate_network
      evaluate_multianimal_full(
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/evaluate_multianimal.py", line 321, in evaluate_multianimal_full
      ) = predict.setup_pose_prediction(test_pose_cfg)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/predict.py", line 26, in setup_pose_prediction
      net_heads = PoseNetFactory.create(cfg).test(inputs)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/base.py", line 83, in test
      heads = self.get_net(inputs)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/resnet.py", line 79, in get_net
      net, end_points = self.extract_features(inputs)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/resnet.py", line 41, in extract_features
      net, end_points = net_fun(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_v1.py", line 270, in resnet_v1_50
      return resnet_v1(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_v1.py", line 210, in resnet_v1
      net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_utils.py", line 143, in conv2d_same
      return layers_lib.conv2d(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/ops/arg_scope.py", line 184, in func_with_args
      return func(*args, **current_args)
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/layers/layers.py", line 1171, in convolution2d
      return convolution(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/ops/arg_scope.py", line 184, in func_with_args
      return func(*args, **current_args)
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/layers/layers.py", line 1089, in convolution
      outputs = layer.apply(inputs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 1697, in apply
      return self.__call__(inputs, *args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/legacy_tf_layers/base.py", line 568, in __call__
      outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 784, in __call__
      outputs = call_fn(cast_inputs, *args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/layers/convolutional.py", line 254, in call
      outputs = self._convolution_op(inputs, self.kernel)
Node: 'resnet_v1_50/conv1/Conv2D'
Detected at node 'resnet_v1_50/conv1/Conv2D' defined at (most recent call last):
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance
      app.start()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
      self._run_once()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
      handle._run()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_334442/1255777010.py", line 1, in <cell line: 1>
      deeplabcut.evaluate_network(config=config_path)#, plotting=True)#, rescale=True)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/evaluate.py", line 648, in evaluate_network
      evaluate_multianimal_full(
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/evaluate_multianimal.py", line 321, in evaluate_multianimal_full
      ) = predict.setup_pose_prediction(test_pose_cfg)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/predict.py", line 26, in setup_pose_prediction
      net_heads = PoseNetFactory.create(cfg).test(inputs)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/base.py", line 83, in test
      heads = self.get_net(inputs)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/resnet.py", line 79, in get_net
      net, end_points = self.extract_features(inputs)
    File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/resnet.py", line 41, in extract_features
      net, end_points = net_fun(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_v1.py", line 270, in resnet_v1_50
      return resnet_v1(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_v1.py", line 210, in resnet_v1
      net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_utils.py", line 143, in conv2d_same
      return layers_lib.conv2d(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/ops/arg_scope.py", line 184, in func_with_args
      return func(*args, **current_args)
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/layers/layers.py", line 1171, in convolution2d
      return convolution(
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/ops/arg_scope.py", line 184, in func_with_args
      return func(*args, **current_args)
    File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/layers/layers.py", line 1089, in convolution
      outputs = layer.apply(inputs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 1697, in apply
      return self.__call__(inputs, *args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/legacy_tf_layers/base.py", line 568, in __call__
      outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 784, in __call__
      outputs = call_fn(cast_inputs, *args, **kwargs)
    File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/layers/convolutional.py", line 254, in call
      outputs = self._convolution_op(inputs, self.kernel)
Node: 'resnet_v1_50/conv1/Conv2D'
2 root error(s) found.
  (0) UNIMPLEMENTED: DNN library is not found.
	 [[{{node resnet_v1_50/conv1/Conv2D}}]]
	 [[pose/locref_pred/block4/BiasAdd/_551]]
  (1) UNIMPLEMENTED: DNN library is not found.
	 [[{{node resnet_v1_50/conv1/Conv2D}}]]
0 successful operations.
0 derived errors ignored.

Original stack trace for 'resnet_v1_50/conv1/Conv2D':
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
    self._run_once()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
    handle._run()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
    await self.process_one()
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 499, in process_one
    await dispatch(*args)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
    await result
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
    reply_content = await reply_content
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
    res = shell.run_cell(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
    return super().run_cell(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
    result = self._run_cell(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
    return runner(coro)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
    coro.send(None)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_334442/1255777010.py", line 1, in <cell line: 1>
    deeplabcut.evaluate_network(config=config_path)#, plotting=True)#, rescale=True)
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/evaluate.py", line 648, in evaluate_network
    evaluate_multianimal_full(
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/evaluate_multianimal.py", line 321, in evaluate_multianimal_full
    ) = predict.setup_pose_prediction(test_pose_cfg)
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/core/predict.py", line 26, in setup_pose_prediction
    net_heads = PoseNetFactory.create(cfg).test(inputs)
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/base.py", line 83, in test
    heads = self.get_net(inputs)
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/resnet.py", line 79, in get_net
    net, end_points = self.extract_features(inputs)
  File "/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/DeepLabCut/deeplabcut/pose_estimation_tensorflow/nnets/resnet.py", line 41, in extract_features
    net, end_points = net_fun(
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_v1.py", line 270, in resnet_v1_50
    return resnet_v1(
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_v1.py", line 210, in resnet_v1
    net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/nets/resnet_utils.py", line 143, in conv2d_same
    return layers_lib.conv2d(
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/ops/arg_scope.py", line 184, in func_with_args
    return func(*args, **current_args)
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/layers/layers.py", line 1171, in convolution2d
    return convolution(
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/ops/arg_scope.py", line 184, in func_with_args
    return func(*args, **current_args)
  File "/home/matthew/.local/lib/python3.9/site-packages/tf_slim/layers/layers.py", line 1089, in convolution
    outputs = layer.apply(inputs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 1697, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/legacy_tf_layers/base.py", line 568, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 784, in __call__
    outputs = call_fn(cast_inputs, *args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/autograph/impl/api.py", line 689, in wrapper
    return converted_call(f, args, kwargs, options=options)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/autograph/impl/api.py", line 331, in converted_call
    return _call_unconverted(f, args, kwargs, options, False)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/autograph/impl/api.py", line 458, in _call_unconverted
    return f(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/keras/layers/convolutional.py", line 254, in call
    outputs = self._convolution_op(inputs, self.kernel)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py", line 1176, in op_dispatch_handler
    return dispatch_target(*args, **kwargs)
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/nn_ops.py", line 1181, in convolution_v2
    return convolution_internal(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/nn_ops.py", line 1313, in convolution_internal
    return op(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/nn_ops.py", line 2787, in _conv2d_expanded_batch
    return gen_nn_ops.conv2d(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 1144, in conv2d
    _, _, _op, _outputs = _op_def_library._apply_op_helper(
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/framework/op_def_library.py", line 795, in _apply_op_helper
    op = g._create_op_internal(op_type_name, inputs, dtypes=None,
  File "/home/matthew/anaconda3/envs/DEEPLABCUT_TFv2/lib/python3.9/site-packages/tensorflow/python/framework/ops.py", line 3814, in _create_op_internal
    ret = Operation(


In [None]:
deeplabcut.extract_save_all_maps(config_path)#, shuffle=shuffle, Indices=[0, 5])

In [24]:
# Path to the DeepLabCut config file
config_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/config.yaml'

# Path to the folder with images for inference
image_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/test_results'
vid_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/videos'

destfolder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/test_results'

# Analyze the images in the folder using the pre-trained model
test = deeplabcut.analyze_videos(config_path, [vid_folder], save_as_csv=True, device='cpu', shuffle=6, destfolder=destfolder, videotype = '.mp4')#  videotype='.jpg')

TypeError: analyze_videos() got an unexpected keyword argument 'device'

In [13]:
print(test)

DLC_DlcrnetStride16Ms5_DLC_simple_datasetSep15shuffle3_snapshot_001


In [5]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

2024-09-18 16:04:10.023139: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-09-18 16:04:10.040098: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-09-18 16:04:10.050469: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [28]:
# Path to the DeepLabCut config file
#config_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/config.yaml'

# Path to the folder with images for inference
image_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/test_results'
vid_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/videos'

destfolder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/test_results'

# Analyze the images in the folder using the pre-trained model
#test = deeplabcut.analyze_videos(config_path, [vid_folder], save_as_csv=True, device='cpu', shuffle=4, destfolder=destfolder, videotype = '.mp4')#  videotype='.jpg')

deeplabcut.analyze_time_lapse_frames(config_path, image_folder, save_as_csv=True, shuffle=1, frametype='.png',gputouse=None)

Config:
{'all_joints': [[0], [1], [2], [3], [4], [5], [6], [7]],
 'all_joints_names': ['Head',
                      'Beak',
                      'Body_top',
                      'RFlipper_mid',
                      'LFlipper_mid',
                      'Body_bottom',
                      'RFoot',
                      'LFoot'],
 'batch_size': 1,
 'crop_pad': 0,
 'dataset': 'training-datasets/iteration-3/UnaugmentedDataSet_DLC_simple_datasetSep18/DLC_simple_dataset_model199shuffle1.pickle',
 'dataset_type': 'multi-animal-imgaug',
 'deterministic': False,
 'fg_fraction': 0.25,
 'global_scale': 0.3,
 'init_weights': '/home/matthew/.local/lib/python3.9/site-packages/deeplabcut/pose_estimation_tensorflow/models/pretrained/resnet_v1_50.ckpt',
 'intermediate_supervision': False,
 'intermediate_supervision_layer': 12,
 'location_refinement': True,
 'locref_huber_loss': True,
 'locref_loss_weight': 1.0,
 'locref_smooth': False,
 'locref_stdev': 7.2801,
 'log_dir': 'log',
 'mean_pixel': [12

Using snapshot-5000 for model /home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-18/dlc-models/iteration-3/DLC_simple_datasetSep18-trainset99shuffle1
Activating extracting of PAFs


2024-09-19 12:08:31.321308: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-09-19 12:08:31.325865: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-09-19 12:08:31.331143: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-09-19 12:08:31.335964: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-09-19 12:08:31.339324: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from S

Analyzing all frames in the directory:  /home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/test_results
Starting to extract posture
Overall # of frames:  10  found with (before cropping) frame dimensions:  1920 1080


  0%|          | 0/10 [00:02<?, ?it/s]

Saving results in /home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/DLC_model/DLC_simple_dataset-model1-2024-09-15/test_results...
Saving csv poses!
The folder was analyzed. Now your research can truly start!
If the tracking is not satisfactory for some frame, consider expanding the training set.





# 1. Data Preperation

## 0.0. Important functions

In [16]:
def display_all_cols(df):
    with pd.option_context('display.max_columns', None):
        print(df.head())


In [None]:
# load ids into list 
def load_file_to_list(file_path):
    """
    loads a text file to a list with each entry on a new line becoming a new entry in the list.

    :param file_path: Path to the file where the list should be saved.
    :return list of data from file
    """
    # Open the file for writing
    lst = []
    with open(file_path, 'r') as file:
        # Write each item on a new line
        for line in file:
            lst.append(line.strip())
    return lst

In [None]:
def df_to_json(df, path):
    """
    Converts a DataFrame to a .json file.

    Parameters:
    df (pd.DataFrame): The DataFrame to be converted to JSON.
    path (str): The path (including file name) where the .json file will be saved.
    """
    df.to_json(path, orient='records', indent=4)

In [None]:

def json_to_df(path):
    """
    Converts a .json file to a DataFrame.

    Parameters:
    path (str): The path to the .json file that will be read.

    Returns:
    pd.DataFrame: The DataFrame created from the JSON file.
    """
    print(path)
    df = pd.read_json(path, orient='records')
    print(f"JSON file has been successfully converted to DataFrame.")
    return df

In [None]:
def set_dtypes_df_full_annotation_abs(df):
    df['vid_id'] = df['vid_id'].astype(str)
    df['img_id'] = df['img_id'].astype(str)
    df['bbox_id'] = df['bbox_id'].astype(str)
    df['bbox_c_x'] = df['bbox_c_x'].astype('float32')
    df['bbox_c_y'] = df['bbox_c_y'].astype('float32')
    df['bbox_w'] = df['bbox_w'].astype('float32')
    df['bbox_h'] = df['bbox_h'].astype('float32')
    df['Head_x'] = df['Head_x'].astype('float32')
    df['Head_y'] = df['Head_y'].astype('float32')
    df['Beak_x'] = df['Beak_x'].astype('float32')
    df['Beak_y'] = df['Beak_y'].astype('float32')
    df['Body_top_x'] = df['Body_top_x'].astype('float32')
    df['Body_top_y'] = df['Body_top_y'].astype('float32')
    df['RFlipper_mid_x'] = df['RFlipper_mid_x'].astype('float32')
    df['RFlipper_mid_y'] = df['RFlipper_mid_y'].astype('float32')
    df['LFlipper_mid_x'] = df['LFlipper_mid_x'].astype('float32')
    df['LFlipper_mid_y'] = df['LFlipper_mid_y'].astype('float32')
    df['Body_bottom_x'] = df['Body_bottom_x'].astype('float32')
    df['Body_bottom_y'] = df['Body_bottom_y'].astype('float32')
    df['RFoot_x'] = df['RFoot_x'].astype('float32')
    df['RFoot_y'] = df['RFoot_y'].astype('float32')
    df['LFoot_x'] = df['LFoot_x'].astype('float32')
    df['LFoot_y'] = df['LFoot_y'].astype('float32')
    df['kp_outside_best_bbox'] = df['kp_outside_best_bbox'].astype('float32')
    df['kp_missing'] = df['kp_missing'].astype('float32')
    df['kp_primary_missing'] = df['kp_primary_missing'].astype(bool)
    df['img_width'] = df['img_width'].astype('float32')
    df['img_height'] = df['img_height'].astype('float32')
    df['bbox_max_h_w'] = df['bbox_max_h_w'].astype('float32')
    return df

In [None]:
def unnorm_keypoints(img_size, keypoints, kp_to_null=None):
    """
    De-normalizes keypoints based on image size and returns the de-normalized keypoints along with 
    the positions of any missing or nullified keypoints.

    Parameters:
    - img_size: Tuple of the image dimensions (height, width).
    - keypoints: List of normalized keypoints (with values between -1 and 1).
    - kp_to_null: Optional. List of indices where the keypoints should be nulled (set to NaN).

    Returns:
    - new_keypoints: List of de-normalized keypoints where each coordinate is scaled back to the 
                     image's pixel dimensions.
    - missing_kp: List of indices where the keypoints were either originally set to -10 (indicating 
                  missing keypoints) or explicitly nullified by the kp_to_null list.
    """
    
    # Extract image width and height
    readjust_x = img_size[0]  # width of the image
    readjust_y = img_size[1]  # height of the image

    new_keypoints = []  # List to store the de-normalized keypoints
    missing_kp = []     # List to store the indices of missing or nullified keypoints

    # Iterate through each keypoint
    for i, keypoint in enumerate(keypoints):
        # Null keypoints if they are -10 or if they are specified in kp_to_null
        if keypoint == -10 or (kp_to_null and i in kp_to_null):
            keypoint = np.nan  # Set keypoint to NaN
            missing_kp.append(i)  # Record the index of the missing or nullified keypoint

        # De-normalize the x-coordinates
        if i % 2 == 0:  # Even indices are x-coordinates
            keypoint = keypoint * readjust_x + readjust_x / 2
        # De-normalize the y-coordinates
        else:  # Odd indices are y-coordinates
            keypoint = keypoint * readjust_y + readjust_y / 2

        new_keypoints.append(keypoint)  # Append the de-normalized keypoint to the list

    return new_keypoints, missing_kp

In [None]:
def norm_keypoints(img_size, keypoints):
    """
    Normalizes keypoints based on image size and replaces any NaN values with -10.

    Parameters:
    - img_size: Tuple of the image dimensions (width, height).
    - keypoints: List of de-normalized keypoints where each coordinate is in pixel dimensions.

    Returns:
    - norm_keypoints: List of normalized keypoints where each coordinate is scaled to the range 
                      [-1, 1] relative to the image size, with NaNs replaced by -10.
    """
    
    # Extract image width and height
    readjust_x = img_size[0]  # width of the image
    readjust_y = img_size[1]  # height of the image

    norm_keypoints = []  # List to store the normalized keypoints

    # Iterate through each keypoint
    for i, keypoint in enumerate(keypoints):
        # Replace NaN values with -10
        if np.isnan(keypoint):
            keypoint = -10.0
        else:
            # Normalize the x-coordinates
            if i % 2 == 0:  # Even indices are x-coordinates
                keypoint = (keypoint - readjust_x / 2) / readjust_x
            # Normalize the y-coordinates
            else:  # Odd indices are y-coordinates
                keypoint = (keypoint - readjust_y / 2) / readjust_y

        norm_keypoints.append(keypoint)  # Append the normalized keypoint to the list

    return norm_keypoints

In [None]:
def plot_img_and_keypoint(img, keypoints, nkeypoints=8, keypoint_labels=None):
  fig = plt.figure(figsize=(8, 8), dpi=100)
  plt.imshow(img)
  print(keypoints)
  x_keypoints = keypoints[::2]
  y_keypoints = keypoints[1::2]
  print(x_keypoints)
  print(y_keypoints)
  plt.scatter(x_keypoints, y_keypoints, marker='.', c=np.arange(nkeypoints), cmap='jet')

    # If labels are provided, add them to the plot
  if keypoint_labels is not None:
      for i, (x, y) in enumerate(zip(x_keypoints, y_keypoints)):
          plt.text(x, y, keypoint_labels[i], fontsize=12, color='white', 
                    bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

  plt.show()

## 1.1. Extract and save only the desired keypoints

In [None]:
# get a list of all the video names
def get_unique_video_names(directory):
    """
    Scans the given directory for video files and returns a list of unique file names without the extension.
    
    Parameters:
    directory (str): The path to the directory containing the video files.

    Returns:
    list: A list of unique video file names without extensions.
    """
    unique_names = set()
    
    # Supported video file extensions
    video_extensions = {'.mp4', '.mjpeg'}
    
    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        # Split the filename and extension
        name, ext = os.path.splitext(filename)
        # Check if the file has a video extension
        if ext.lower() in video_extensions:
            unique_names.add(name)  # Add the name to the set (ensures uniqueness)
    
    # Convert the set to a list and return
    return list(unique_names)

In [None]:
#Simple
list_of_vids = get_unique_video_names('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDetect1/raw_videos')
print(len(list_of_vids))
print(list_of_vids[0])

In [None]:
# copy and rename csv files that contain annotations
def copy_csv_files(ids, source_dir, destination_dir):
    """
    Copies CSV files from sub-directories that match the given IDs and renames them to the ID.
    
    Parameters:
    ids (list): A list of IDs (sub-directory names) to search for.
    source_dir (str): The path to the root directory containing sub-directories.
    destination_dir (str): The path to the directory where the CSV files should be copied and renamed.
    """
    # Ensure the destination directory exists
    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)

    for id_ in ids:
        subdir_path = os.path.join(source_dir, id_)
        
        if os.path.isdir(subdir_path):
            # Find the CSV file in the sub-directory
            for file_name in os.listdir(subdir_path):
                if file_name.endswith('.csv'):
                    csv_file_path = os.path.join(subdir_path, file_name)
                    
                    # Create the destination file path
                    destination_file_path = os.path.join(destination_dir, f"{id_}.csv")
                    
                    # Copy the CSV file to the destination directory with the new name
                    shutil.copy(csv_file_path, destination_file_path)
                    print(f"Copied {csv_file_path} to {destination_file_path}")
                    break  # Assuming there is only one CSV file per sub-directory
        else:
            print(f"Sub-directory '{id_}' not found in '{source_dir}'.")

In [None]:
source_directory = '/home/matthew/Desktop/Masters/Masters-data/Roanne Penguins 2022/Penguin Project Annotation and Videos/Penguin Annotations/P1_labeled-data'
destination_directory = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/DLC_Annotations'

copy_csv_files(list_of_vids, source_directory, destination_directory)

### load csv to a dataframe and remove the keypoints not required
need it to look like:
vid_id,img_id,Head,Head,Beak,Beak,Body_top,Body_top,RFlipper_mid,RFlipper_mid,LFlipper_mid,LFlipper_mid,Body_bottom,Body_bottom,RFoot,RFoot,LFoot,LFoot

In [None]:
# read csv into a df
df = pd.read_csv('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/DLC_Annotations/flap1.csv')

In [None]:
# test df function
# print(df.head())
# print(df.info())
# print(df.describe())

In [None]:
# step through list of videos/csv ids and create a list of dfs
list_of_kp_df_raw = []

# step through the list of ids and load each csv into a df and add to list
for _id in list_of_vids:
    
    # load csv to a temp df
    print(_id)
    df = pd.read_csv(f'/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/DLC_Annotations/{_id}.csv')
    list_of_kp_df_raw.append(df)

In [None]:
# print(list_of_kp_df_raw)

In [None]:
# append master df from csv to df with correct kp and column names
######  THIS WILL HAVE TO BE UPDATED WHEN HAVE LOTS OF PENGUINS. JUST ADD AN IF STATEMENT TO PUT ADDITIONAL COLUMNS
######  AS A ADDITIONAL ENTRY AND ADD A BOUNDING BBOX NUMBER COLUMN. THIS WILL LEAD TO SOME EMPTY ROWS IF A PENGUIN
######  ENTRERS THE FRAME. SO FINALLY REMOVE ANY EMPTY ROWS
def consolidate_dataframes(source_dfs):
    """
    Consolidates data from multiple source DataFrames into a single DataFrame with specific columns.
    
    Parameters:
    source_dfs (list of pd.DataFrame): List of source DataFrames to be consolidated.

    Returns:
    pd.DataFrame: A consolidated DataFrame with the selected columns.
    """
    consolidated_df = pd.DataFrame(columns=[
        'vid_id', 'image_id', 'Head_x', 'Head_y', 'Beak_x', 'Beak_y',
        'Body_top_x', 'Body_top_y', 'RFlipper_mid_x', 'RFlipper_mid_y',
        'LFlipper_mid_x', 'LFlipper_mid_y', 'Body_bottom_x', 'Body_bottom_y',
        'RFoot_x', 'RFoot_y', 'LFoot_x', 'LFoot_y'
    ])

    for df in source_dfs:
        # Skip the first 4 rows (headers)
        df = df.iloc[3:]
        
        # Create a temporary DataFrame to hold the required columns
        temp_df = pd.DataFrame({
            'vid_id': df.iloc[:, 1],  # Column 2 (index 1)
            'image_id': range(len(df)),  # Sequential image_id starting from 0
            'Head_x': df.iloc[:, 3],  # Column 4 (index 3)
            'Head_y': df.iloc[:, 4],  # Column 5 (index 4)
            'Beak_x': df.iloc[:, 5],  # Column 6 (index 5)
            'Beak_y': df.iloc[:, 6],  # Column 7 (index 6)
            'Body_top_x': df.iloc[:, 7],  # Column 8 (index 7)
            'Body_top_y': df.iloc[:, 8],  # Column 9 (index 8)
            'RFlipper_mid_x': df.iloc[:, 13],  # Column 14 (index 13)
            'RFlipper_mid_y': df.iloc[:, 14],  # Column 15 (index 14)
            'LFlipper_mid_x': df.iloc[:, 15],  # Column 16 (index 15)
            'LFlipper_mid_y': df.iloc[:, 16],  # Column 17 (index 16)
            'Body_bottom_x': df.iloc[:, 23],  # Column 24 (index 23)
            'Body_bottom_y': df.iloc[:, 24],  # Column 25 (index 24)
            'RFoot_x': df.iloc[:, 27],  # Column 28 (index 27)
            'RFoot_y': df.iloc[:, 28],  # Column 29 (index 28)
            'LFoot_x': df.iloc[:, 29],  # Column 30 (index 29)
            'LFoot_y': df.iloc[:, 30]  # Column 31 (index 30)
        })

        # Append the temp_df to the consolidated DataFrame
        consolidated_df = pd.concat([consolidated_df, temp_df], ignore_index=True)

    return consolidated_df


In [None]:
master_kp_df = consolidate_dataframes(list_of_kp_df_raw)

In [None]:
master_kp_df.head(30)

In [None]:
master_kp_df.info()

In [None]:
# save to json
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/reduced_kp_raw_Simple.json'
df_to_json(master_kp_df, path)

## 1.2. Save df as a json (and vice versa) and set the datatypes

In [None]:
def df_to_json(df, path):
    """
    Converts a DataFrame to a .json file.

    Parameters:
    df (pd.DataFrame): The DataFrame to be converted to JSON.
    path (str): The path (including file name) where the .json file will be saved.
    """
    df.to_json(path, orient='records', indent=4)

In [None]:

def json_to_df(path):
    """
    Converts a .json file to a DataFrame.

    Parameters:
    path (str): The path to the .json file that will be read.

    Returns:
    pd.DataFrame: The DataFrame created from the JSON file.
    """
    print(path)
    df = pd.read_json(path, orient='records')
    print(f"JSON file has been successfully converted to DataFrame.")
    return df

In [None]:
def set_dtypes_df_full_annotation_abs(df):
    df['vid_id'] = df['vid_id'].astype(str)
    df['img_id'] = df['img_id'].astype(str)
    df['bbox_id'] = df['bbox_id'].astype(str)
    df['bbox_c_x'] = df['bbox_c_x'].astype('float32')
    df['bbox_c_y'] = df['bbox_c_y'].astype('float32')
    df['bbox_w'] = df['bbox_w'].astype('float32')
    df['bbox_h'] = df['bbox_h'].astype('float32')
    df['Head_x'] = df['Head_x'].astype('float32')
    df['Head_y'] = df['Head_y'].astype('float32')
    df['Beak_x'] = df['Beak_x'].astype('float32')
    df['Beak_y'] = df['Beak_y'].astype('float32')
    df['Body_top_x'] = df['Body_top_x'].astype('float32')
    df['Body_top_y'] = df['Body_top_y'].astype('float32')
    df['RFlipper_mid_x'] = df['RFlipper_mid_x'].astype('float32')
    df['RFlipper_mid_y'] = df['RFlipper_mid_y'].astype('float32')
    df['LFlipper_mid_x'] = df['LFlipper_mid_x'].astype('float32')
    df['LFlipper_mid_y'] = df['LFlipper_mid_y'].astype('float32')
    df['Body_bottom_x'] = df['Body_bottom_x'].astype('float32')
    df['Body_bottom_y'] = df['Body_bottom_y'].astype('float32')
    df['RFoot_x'] = df['RFoot_x'].astype('float32')
    df['RFoot_y'] = df['RFoot_y'].astype('float32')
    df['LFoot_x'] = df['LFoot_x'].astype('float32')
    df['LFoot_y'] = df['LFoot_y'].astype('float32')
    df['kp_outside_best_bbox'] = df['kp_outside_best_bbox'].astype('float32')
    df['kp_missing'] = df['kp_missing'].astype('float32')
    df['kp_primary_missing'] = df['kp_primary_missing'].astype(bool)
    df['img_width'] = df['img_width'].astype('float32')
    df['img_height'] = df['img_height'].astype('float32')
    return df

## 1.3. Create a single annotation with bbox and keypoints in the correct form and linked

### Load keypoints df and make them the correct format

In [None]:
# load the reduced_kp_raw_Simple_df
df_reduced_kp_raw = json_to_df('/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/reduced_kp_raw_Simple.json')

In [None]:
df_reduced_kp_raw.head()


In [None]:
# correct the format of the keypoints
    # 1st string
df_reduced_kp_raw.iloc[:, 0] = df_reduced_kp_raw.iloc[:, 0].astype(str)

# Ensure the second column is an integer
df_reduced_kp_raw.iloc[:, 1] = df_reduced_kp_raw.iloc[:, 1].astype(str)

# Format the remaining columns as floats with minimal decimal points
for col in df_reduced_kp_raw.columns[2:]:
    df_reduced_kp_raw[col] = df_reduced_kp_raw[col].astype(np.float32)

In [None]:
df_reduced_kp_raw.info()

### Load bboxes into a df to be used and correct format

In [None]:
# step through bounding box text files and create a df with the following output
# vid_id,img_id,bbox_c_x, bbox_c_y, bbox_w, bbox_h
def bbox_txt_files_to_df(directory):
    """
    Processes all text files in the given directory and returns a DataFrame with the columns:
    vid_id, img_id, bbox_c_x, bbox_c_y, bbox_w, bbox_h.

    Parameters:
    directory (str): Path to the directory containing the text files.

    Returns:
    pd.DataFrame: The processed DataFrame.
    """
    data = []
    #count = 0

    for filename in os.listdir(directory):
        if filename.endswith('.txt'):
            # Extract vid_id and img_id from the filename
            parts = filename.split('_')
            vid_id = parts[1].split('.')[0]
            img_id = parts[-1].split('.')[0]
            #print(img_id)

            # Read the text file
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r') as file:
                for line in file:
                    figures = line.strip().split()
                    if len(figures) == 5:
                        bbox_c_x, bbox_c_y, bbox_w, bbox_h = map(float, figures[1:])
                        data.append([vid_id, img_id, np.float32(bbox_c_x), np.float32(bbox_c_y), np.float32(bbox_w), np.float32(bbox_h)])
                        #count += 1
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=['vid_id', 'img_id', 'bbox_c_x', 'bbox_c_y', 'bbox_w', 'bbox_h'])

    return df

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDetect1/raw_annotations'

bbox_df_raw = bbox_txt_files_to_df(path)

In [None]:
bbox_df_raw.head()

In [None]:
bbox_df_raw.info()

In [None]:
# see where there are more than one bounding box per image
duplicates = bbox_df_raw[bbox_df_raw.duplicated(['vid_id', 'img_id'], keep=False)]

In [None]:
duplicates.head(30)

### Get image size stored

In [None]:
# step through image files and create a df with the following output
# vid_id,img_id,img_wid,img_height

def image_files_to_df(directory):
    """
    Processes all .jpg image files in the given directory and returns a DataFrame with the columns:
    vid_id, img_id, img_wid, img_height.

    Parameters:
    directory (str): Path to the directory containing the image files.

    Returns:
    pd.DataFrame: The processed DataFrame.
    """
    data = []

    for filename in os.listdir(directory):
        if filename.endswith('.jpg'):
            # Extract vid_id and img_id from the filename
            parts = filename.split('_')
            vid_id = parts[1].split('.')[0]
            img_id = parts[-1].split('.')[0]

            # Read the image file and get its dimensions
            filepath = os.path.join(directory, filename)
            with Image.open(filepath) as img:
                img_wid, img_height = img.size

            # Append the data to the list
            data.append([vid_id, img_id, np.float32(img_wid), np.float32(img_height)])
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=['vid_id', 'img_id', 'img_wid', 'img_height'])

    return df

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDetect1/raw_images'

imgsize_df_raw = image_files_to_df(path)

In [None]:
imgsize_df_raw.head()

In [None]:
imgsize_df_raw.info()

### Match keypoint to bbox, check for keypoints outside of bbox, match bbox ids

In [None]:
# loop through all keypoint entries in df
# for each keypoint filter the bbox df to have only those ids
# do the same for the image df
# find the bbox that will contain the most keypoints from the bbox df
#   this will require the rescaling of the bbox
#   find how many keypoints are outside the bbox
#   find how many keypoints are missing
#   find whether the primary kp are missing (True/False)
# check if this is img_id = 0 
#   yes: increment bbox_id starting at 0
#   no: find bbox from previous vid_id, img_id - 1 in final df that has the best fit and make bbox id = to that bbox_id, unless distance is over 20% of img size, then make bbox_id last bbox_id + 1
# remove the bounding box from the original bbox df - I REMOVED THIS STEP AS UNNECESSARY

In [None]:
# for each keypoint filter the bbox df to have only those ids

def filter_dataframe_based_on_another(vid_id, img_id, df_to_filter):
    """
    filters df_to_filter to show entries with the same vid_id and img_id.

    Parameters:
    vid_id: string with vid_id
    image_id: string with img_id
    df_to_filter (pd.DataFrame): The DataFrame to filter based on vid_id and img_id.

    Returns:
    list of pd.DataFrame: A list of filtered DataFrames, one for each row in df_main.
    """
        
    # Filter df_to_filter based on the current row's vid_id and img_id
    filtered_df = df_to_filter[(df_to_filter['vid_id'] == vid_id) & (df_to_filter['img_id'] == img_id)]

    return filtered_df

In [None]:
# rescale the bounding box
def denorm_bbox_df(df_bbox, img_width, img_height):
    """
    Denormalizes bounding boxes in a DataFrame from normalized values to absolute pixel values.

    Parameters:
    df_bboxes (pd.DataFrame): The DataFrame containing bounding box coordinates.
                              Expected columns: ['vid_id', 'image_id', 'bbox_c_x', 'bbox_c_y', 'bbox_w', 'bbox_h']
    img_width (int): The width of the image.
    img_height (int): The height of the image.

    Returns:
    pd.DataFrame: A DataFrame with denormalized bounding boxes.
                  Columns: ['vid_id', 'image_id', 'bbox_c_x', 'bbox_c_y', 'bbox_w', 'bbox_h']
    """
    # Create a copy of the DataFrame to avoid modifying the original DataFrame
    denorm_df = df_bbox.copy()
    #print(denorm_df)
    #print(img_width)

    # Denormalize the bounding box coordinates
    #print(denorm_df['bbox_c_x'])# * img_width)
    denorm_df['bbox_c_x'] = denorm_df['bbox_c_x'] * img_width
    denorm_df['bbox_c_y'] = denorm_df['bbox_c_y'] * img_height
    denorm_df['bbox_w'] = denorm_df['bbox_w'] * img_width
    denorm_df['bbox_h'] = denorm_df['bbox_h'] * img_height

    return denorm_df

In [None]:
# find the bbox that will contain the most keypoints from the bbox df
#   find how many keypoints are outside the bbox
#   find how many keypoints are missing
#   find whether the primary kp are missing (True/False)

def find_best_bbox(bbox_df, keypoints_df):
    """
    Finds the bounding box that contains the most keypoints and returns it along with the number
    of keypoints that fall outside that bounding box.

    Parameters:
    bbox_df (pd.DataFrame): DataFrame with bounding boxes in absolute coordinates. 
                            
    keypoints_df (pd.DataFrame): DataFrame with keypoints.
                                

    Returns:
    dict: Dictionary containing the best bounding box and the number of keypoints outside it.
    """
    best_bbox = None
    max_keypoints_inside = -1
    min_distance_to_origin = float('inf')
    keypoints_outside_best_bbox = 0
    nan_keypoint_pairs = 0
    missing_primary_keypoint = False

    # Extract keypoints and check for NaN pairs and missing primary keypoints
    keypoints = []
    for i in range(0, 16, 2):  # Since there are 8 keypoints (16 columns), we step by 2
        #print(i)
        x = keypoints_df.iloc[i+2]  
        y = keypoints_df.iloc[i+3]
        keypoints.append((x, y))

        # Check if either x or y is NaN
        if pd.isna(x) or pd.isna(y):
            nan_keypoint_pairs += 1
            if i == 4 or i == 10:
                missing_primary_keypoint = True

        #break
    count =0
    for _, bbox in bbox_df.iterrows():
        #vid_id, img_id = bbox['vid_id'], bbox['img_id']
        count=+1
        #print(count)
        bbox_c_x, bbox_c_y, bbox_w, bbox_h = bbox['bbox_c_x'], bbox['bbox_c_y'], bbox['bbox_w'], bbox['bbox_h']
        
        # Calculate the bounding box corners (x_min, y_min, x_max, y_max)
        x_min = bbox_c_x - bbox_w / 2
        y_min = bbox_c_y - bbox_h / 2
        x_max = bbox_c_x + bbox_w / 2
        y_max = bbox_c_y + bbox_h / 2

        # print('xy minmax')
        # print(x_min, y_min, x_max, y_max)
        # print('keypoints')
        # print(keypoints)
        
        # Count keypoints inside the current bbox
        keypoints_inside = sum(x_min <= x <= x_max and y_min <= y <= y_max for x, y in keypoints)
        #print('keypoint inside')
        #print(keypoints_inside)
        
        # Calculate the distance of the bbox to the origin (0,0)
        distance_to_origin = (x_min**2 + y_min**2)**0.5
        
        # Update the best bbox if this one has more keypoints inside, or the same number but is closer to the origin
        if (keypoints_inside > max_keypoints_inside) or \
           (keypoints_inside == max_keypoints_inside and distance_to_origin < min_distance_to_origin):
            best_bbox = bbox
            max_keypoints_inside = keypoints_inside
            min_distance_to_origin = distance_to_origin
            
            keypoints_outside_best_bbox = len(keypoints) - nan_keypoint_pairs - keypoints_inside
    
    # Convert the best bbox to a DataFrame or a list
    if best_bbox is not None:
        best_bbox_df = pd.DataFrame([{
            'vid_id': best_bbox['vid_id'],
            'img_id': best_bbox['img_id'],
            'bbox_c_x': best_bbox['bbox_c_x'],
            'bbox_c_y': best_bbox['bbox_c_y'],
            'bbox_w': best_bbox['bbox_w'],
            'bbox_h': best_bbox['bbox_h'],
        }])
        return best_bbox_df, keypoints_outside_best_bbox, nan_keypoint_pairs, missing_primary_keypoint
    else:
        return None, 8, 0, False


In [None]:
def find_closest_bbox_id(best_bbox_df, final_df_prev):
    """
    Finds the bbox_id in final_df_prev that is closest in distance to the bounding box in best_bbox_df 
    UNLESS it is more that 20% of image size off, then it returns max bbox_id +1.

    Parameters:
    best_bbox_df (pd.DataFrame): DataFrame with a single entry for the best bounding box.
    final_df_prev (pd.DataFrame): DataFrame with multiple entries, each having a bounding box.

    Returns:
    str: The bbox_id of the closest bounding box in final_df_prev UNLESS 
    """
    # Extract the values from the single entry in best_bbox_df
    best_bbox_c_x = best_bbox_df['bbox_c_x'].iloc[0]
    best_bbox_c_y = best_bbox_df['bbox_c_y'].iloc[0]
    best_bbox_w = best_bbox_df['bbox_w'].iloc[0]
    best_bbox_h = best_bbox_df['bbox_h'].iloc[0]

    # Initialize variables to track the closest bbox
    min_distance = float('inf')
    closest_bbox_id = None

    # Find the max distance can be (rsm of the image size x 0.25 - this is 25% of image size)
    max_distance = np.sqrt(
        final_df_prev['img_width'].iloc[0] ** 2 +
        final_df_prev['img_height'].iloc[0] ** 2
    ) * 0.25

    # Iterate through each entry in final_df_prev to calculate the distance
    for index, row in final_df_prev.iterrows():
        # Calculate the Euclidean distance (root squared mean)
        distance = np.sqrt(
            (row['bbox_c_x'] - best_bbox_c_x) ** 2 +
            (row['bbox_c_y'] - best_bbox_c_y) ** 2 +
            (row['bbox_w'] - best_bbox_w) ** 2 +
            (row['bbox_h'] - best_bbox_h) ** 2
        )
        
        # Update the closest_bbox_id if this distance is the smallest found
        if distance < min_distance:
            min_distance = distance
            closest_bbox_id = row['bbox_id']
    
    # return max bbox_id + 1 if the distance is greater than the max distance
    if min_distance > max_distance:
        closest_bbox_id = str(int(final_df_prev['bbox_id'].max())+1)

    return closest_bbox_id

In [None]:
# loop through all keypoint entries in df and use the above functions to create final df
def process_dataframe(df_kp, df_bbox, df_imgsize):
    """
    Iterates over each row in the DataFrame and processes the data.

    Parameters:
    df_kp, df_bbox, df_imgsize: keypoint df, bbox df, img size df 

    Returns:
    df_full_annotation: Full annotation compiled df
    """
    #test
    count = 0
    bbox_count = 0
    prev_img_id = -1
    df_final = pd.DataFrame(columns=[
        'vid_id', 'img_id', 'bbox_id', 'bbox_c_x', 'bbox_c_y', 'bbox_w', 'bbox_h',
        'Head_x', 'Head_y', 'Beak_x', 'Beak_y','Body_top_x', 'Body_top_y','RFlipper_mid_x',	
        'RFlipper_mid_y', 'LFlipper_mid_x', 'LFlipper_mid_y', 'Body_bottom_x', 'Body_bottom_y', 
        'RFoot_x', 'RFoot_y', 'LFoot_x', 'LFoot_y','kp_outside_best_bbox', 'kp_missing', 'kp_primary_missing',
        'img_width', 'img_height',
    ])

    for index, row in df_kp.iterrows():
        #test
        count += 1

        # Access data in each row using row['column_name'] - get the vid_id and img_id
        vid_id = row['vid_id']
        img_id = row['image_id']

        # print(row)

        #filter the bbox and img_size df to only have specific img and vid id
        df_bbox_filtered = filter_dataframe_based_on_another(vid_id, img_id, df_bbox)
        df_imgsize_filtered = filter_dataframe_based_on_another(vid_id, img_id, df_imgsize)

        # get image size 
        img_width = df_imgsize_filtered['img_wid']
        img_height = df_imgsize_filtered['img_height']
        # convert them to scalars that can be used in math operations
        img_width = img_width.iloc[0]  # Convert to scalar
        img_height = img_height.iloc[0]  # Convert to scalar

        # denormalise the bbox so that the they are absolute coords
        df_bbox_filtered_abs = denorm_bbox_df(df_bbox_filtered, img_width, img_height)

        # find the bbox that will contain the most keypoints from the bbox df
        # and find how many keypoints are outside the bbox
        df_best_bbox, kp_outside_best_bbox, kp_missing, kp_primary_missing = find_best_bbox(df_bbox_filtered_abs, row)

        # check if first image in video sequence (for matching bboxes and kp, if it is first then we don't need matching)
        if img_id == '0':
            #yes: just increment bbox_id starting at 0
            if prev_img_id != img_id: # if we are on the first bbox of an img
                bbox_count = 0
            else: # if we not on the first one
                bbox_count += 1
            # set the bbox id
            bbox_id = str(bbox_count)  
        
        else:
            # no: find bbox from previous vid_id, img_id - 1 in final df that has the best fit 
            # and make bbox id = to that bbox_id, unless distance is over 20% of img size, 
            # then make bbox_id last bbox_id + 1

            # 1. filter for all the entries in the df_final that are from the previous image
            df_final_filtered_prev = filter_dataframe_based_on_another(prev_vid_id, prev_img_id, df_final)

            # 2. find bbox df_final_filtered_prev that has the best fit to current best bbox 
            # and make bbox id = to that bbox_id, unless distance is over 25% of img size, 
            # then make bbox_id last bbox_id + 1
            bbox_id = find_closest_bbox_id(df_best_bbox, df_final_filtered_prev)


        # Store the result in a dictionary and then append to the DataFrame
        result = {
            'vid_id': vid_id,
            'img_id': img_id,
            'bbox_id': bbox_id,
            'bbox_c_x': df_best_bbox['bbox_c_x'].iloc[0],
            'bbox_c_y': df_best_bbox['bbox_c_y'].iloc[0],
            'bbox_w': df_best_bbox['bbox_w'].iloc[0],
            'bbox_h': df_best_bbox['bbox_h'].iloc[0],
            'Head_x': row.iloc[2],
            'Head_y': row.iloc[3],
            'Beak_x': row.iloc[4],
            'Beak_y': row.iloc[5],
            'Body_top_x': row.iloc[6],
            'Body_top_y': row.iloc[7],
            'RFlipper_mid_x': row.iloc[8],
            'RFlipper_mid_y': row.iloc[9],
            'LFlipper_mid_x': row.iloc[10],
            'LFlipper_mid_y': row.iloc[11],
            'Body_bottom_x': row.iloc[12],
            'Body_bottom_y': row.iloc[13],
            'RFoot_x': row.iloc[14],
            'RFoot_y': row.iloc[15],
            'LFoot_x': row.iloc[16],
            'LFoot_y': row.iloc[17],
            'kp_outside_best_bbox': float(kp_outside_best_bbox),
            'kp_missing': float(kp_missing),
            'kp_primary_missing': kp_primary_missing,
            'img_width': img_width,
            'img_height': img_height
        }
        
        df_final = df_final.append(result, ignore_index=True)

        # keep track of the last img_id so if we are on the second bbox for an image we know
        prev_img_id = img_id
        prev_vid_id = vid_id
        
    return df_final
        


        #test
        # if count == 50:
        #     return df_final


In [None]:
df_full_annotation_abs = process_dataframe(df_reduced_kp_raw, bbox_df_raw, imgsize_df_raw)

In [None]:
df_full_annotation_abs.head()

In [None]:
df_full_annotation_abs.info()

### check that all kp are contained in the bbox
A. in the simple we will adjust bbox 
B. in others we will adjust bboxs to make them slightly bigger

In [None]:
# get the bboxs that have kps outside of the box
df_kp_outside_bbox = df_full_annotation_abs[(df_full_annotation_abs['kp_outside_best_bbox'] > 0)]

In [None]:
df_kp_outside_bbox.info()

#### Simple
we will just show all the bbox that are an issue and adjust the bbox and save those

In [None]:
# display all the issue bbox rows
df_kp_outside_bbox.head(44)

##### IMPORTANT: display all cols

In [None]:
with pd.option_context('display.max_columns', None):
    print(df_kp_outside_bbox.head())


In [None]:
# i need to adjust the Head_y value of row 390 as this is a negative (which it cant be)
# first lets see that we have the right row number (390) -> seen in the above output
print(df_full_annotation_abs.loc[390, 'Head_y'])

In [None]:
# okay now just set it to 1.5
df_full_annotation_abs.loc[390, 'Head_y'] = float(1.5)

In [None]:
# change the kp_outside flag
df_full_annotation_abs.loc[390, 'kp_outside_best_bbox'] = float(0.0)

In [None]:
# check
with pd.option_context('display.max_columns', None):
    print(df_full_annotation_abs.loc[390])

In [None]:
# lets just check that it is the correct dtype
df_full_annotation_abs.info()

### save df_final as a json file

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/df_full_annotation_abs_Simple.json'
df_to_json(df_full_annotation_abs, path)

## 1.4. Get json annotation abs to df

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/df_full_annotation_abs_Simple.json'
df_full_annotation_abs = json_to_df(path)

In [None]:
df_full_annotation_abs = set_dtypes_df_full_annotation_abs(df_full_annotation_abs)

In [None]:
df_full_annotation_abs.head()

In [None]:
df_full_annotation_abs.info()

## 1.5. Crop images by bbox
save them with the name vid_id_img_id_bbox_id_raw

In [None]:
# loop through all kp entries in the df_full_annotation_abs
# for each entry crop and save image with the naming criteria using a function that takes bbox coords as input and ids 

In [None]:
# function to crop image by bbox and save using id naming convention
def crop_and_save_image(image_path, save_directory, vid_id, img_id, bbox_id, bbox_c_x, bbox_c_y, bbox_w, bbox_h):
    """
    Crops an image based on the provided bounding box coordinates and saves it with a specific naming convention.
    
    Parameters:
    - image_path (str): Path to the input image.
    - save_directory (str): Directory where the cropped image will be saved.
    - vid_id (str): Video ID used for naming the cropped image.
    - img_id (str): Image ID used for naming the cropped image.
    - bbox_id (str): Bounding box ID used for naming the cropped image.
    - bbox_c_x (float): X-coordinate of the bounding box center.
    - bbox_c_y (float): Y-coordinate of the bounding box center.
    - bbox_w (float): Width of the bounding box.
    - bbox_h (float): Height of the bounding box.
    
    The cropped image will be saved as `vid_id_img_id_bbox_id_crop_raw.jpg` in the save directory.
    """
    # Open the image
    image = Image.open(image_path)
    
    # Calculate the bounding box corners
    left = bbox_c_x - (bbox_w / 2)
    top = bbox_c_y - (bbox_h / 2)
    right = bbox_c_x + (bbox_w / 2)
    bottom = bbox_c_y + (bbox_h / 2)
    
    # Crop the image
    cropped_image = image.crop((left, top, right, bottom))
    
    # Ensure the save directory exists
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    
    # Create the file name
    save_filename = f"{vid_id}_{img_id}_{bbox_id}_crop_raw.jpg"
    save_path = os.path.join(save_directory, save_filename)
    
    # Save the cropped image
    cropped_image.save(save_path, format='JPEG')
    
    print(f"Cropped image saved as {save_path}")

In [None]:
# function that gets the path to the image 
def find_image_path(image_directory, vid_id, img_id):
    """
    Searches through all .jpg files in the specified directory and returns the path to the image
    that matches the provided vid_id and img_id.

    Parameters:
    - image_directory (str): Path to the directory containing the images.
    - vid_id (str): The video ID to match in the image file name.
    - img_id (str): The image ID to match in the image file name.

    Returns:
    - str: The path to the matching image file, or None if no match is found.
    """
    for filename in os.listdir(image_directory):
        if filename.endswith('.jpg'):
            # Split the filename and check if it matches the vid_id and img_id
            parts = filename.split('_')
            if len(parts) >= 3:  # Ensure there are enough parts to avoid index errors
                file_vid_id = parts[1].split('.')[0]
                file_img_id = parts[-1].split('.')[0]
                if file_vid_id == vid_id and file_img_id == img_id:
                    return os.path.join(image_directory, filename)
    
    # If no matching image is found, return None
    return None


In [None]:
# function that steps through df and calls above function
def crop_img_from_df(df, img_dir, save_dir):
    for index, row in df.iterrows():
        
        # Access data in each row using row['column_name'] - get the vid_id and img_id
        vid_id = row['vid_id']
        img_id = row['img_id']
        bbox_id = row['bbox_id']
        bbox_c_x = row['bbox_c_x']
        bbox_c_y = row['bbox_c_y']
        bbox_w = row['bbox_w']
        bbox_h = row['bbox_h']

        #print(type(vid_id))
        #print(type(img_id))

        # get the relevant image path
        img_path = find_image_path(img_dir, vid_id, img_id)
        #print(img_path)

        #crop and save the relevant bbox in the save directory
        crop_and_save_image(img_path, save_dir, vid_id, img_id, bbox_id, bbox_c_x, bbox_c_y, bbox_w, bbox_h)

In [None]:
img_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/Simple_ObjectDetect1/raw_images'
save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/Cropped_bbox_img_raw'
#df_full_annotation_abs.info()

crop_img_from_df(df_full_annotation_abs, img_dir, save_dir)

## 1.6. Resize cropped images and add padding
resize bbox_img to fit into 220x220 but do not allow distortion of the img. Use padding rather

In [None]:
def resize_and_pad_images(source_dir, save_dir):
    """
    Resizes and pads images from the source directory to 220x220 pixels and saves them to the save directory.
    
    Parameters:
    - source_dir (str): Path to the directory containing the source images.
    - save_dir (str): Path to the directory where the resized images will be saved.
    """
    # Ensure the save directory exists
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # Process each image in the source directory
    for filename in os.listdir(source_dir):
        if filename.endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(source_dir, filename)
            img = Image.open(img_path)
            
            # Resize while maintaining aspect ratio
            img.thumbnail((220, 220), Image.Resampling.LANCZOS)
            
            # Calculate padding to make the image 220x220
            delta_w = 220 - img.size[0]
            delta_h = 220 - img.size[1]
            padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
            
            # Add padding to the image
            padded_img = ImageOps.expand(img, padding, fill='black')
            
            # Rename the image
            parts = filename.split('_')
            base_name = '_'.join(parts[:-1])
            extension = filename.split('.')[-1]
            new_filename = f"{base_name}_220x220.{extension}"
            
            # Save the new image
            save_path = os.path.join(save_dir, new_filename)
            padded_img.save(save_path, format='JPEG')
            print(f"Saved resized image as {save_path}")

In [None]:
source_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/Cropped_bbox_img_raw'
save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/Cropped_bbox_img_crop_220'
resize_and_pad_images(source_dir, save_dir)

## 1.7. Normalise the df

### Normalise the keypoints (normalise by the abs value of the bbox)

In [None]:
# find the max(w/h) of the bbox
# shift x and y coords for each annotation by the centre of the bbox
# devide by the max

In [None]:
def norm_kp_by_bbox_df(df_orginal):

    # copy so don't effect the original df
    df = df_orginal.copy()
    
    # first create a new col that is the max value of the height and width
    df['bbox_max_h_w'] = df[['bbox_w', 'bbox_h']].max(axis=1) 

    ## shift coords 
    # list of kp y cols
    y_columns = [
        'Head_y', 'Beak_y', 'Body_top_y', 'RFlipper_mid_y', 
        'LFlipper_mid_y', 'Body_bottom_y', 'RFoot_y', 'LFoot_y'
    ]
    # list of kp x cols
    x_columns = [
        'Head_x', 'Beak_x', 'Body_top_x', 'RFlipper_mid_x', 
        'LFlipper_mid_x', 'Body_bottom_x', 'RFoot_x', 'LFoot_x'
    ]
    # Subtract bbox_c_y from the selected '_y' columns
    df[y_columns] = df[y_columns].subtract(df['bbox_c_y'], axis=0)
    # Subtract bbox_c_x from the selected '_x' columns
    df[x_columns] = df[x_columns].subtract(df['bbox_c_x'], axis=0)

    # scale (devide) by the max of bbox width and hight (bbox_max_h_w)
    df[y_columns] = df[y_columns].div(df['bbox_max_h_w'], axis=0)
    df[x_columns] = df[x_columns].div(df['bbox_max_h_w'], axis=0)

    return df    

In [None]:
df_full_annotation_kpnorm = norm_kp_by_bbox_df(df_full_annotation_abs)


In [None]:
# check 
# with pd.option_context('display.max_columns', None):
#     print(df_full_annotation_abs.head(2))

# with pd.option_context('display.max_columns', None):
#     print(df_test.head(2))

### Normalise the bbox (normalise by the size of the img)
normalising how it is done in the obj dect (between 0 and 1)

In [None]:
def norm_bbox_by_img_df(df_orginal):

    # copy so don't effect the original df
    df = df_orginal.copy()

    ## shift coords 
    # list of kp y cols
    y_columns = [
        'bbox_c_y', 'bbox_h'
    ]
    # list of kp x cols 
    # I AM NORMILISING THE bbox_max_h_w BY THE IMAGE WIDTH
    x_columns = [
        'bbox_c_x', 'bbox_w', 'bbox_max_h_w'
    ]

    # scale (devide) by the width and hight of the image
    df[y_columns] = df[y_columns].div(df['img_height'], axis=0)
    df[x_columns] = df[x_columns].div(df['img_width'], axis=0)

    return df   

In [None]:
df_full_annotation_norm = norm_bbox_by_img_df(df_full_annotation_kpnorm)

In [None]:
# check 
with pd.option_context('display.max_columns', None):
    print(df_full_annotation_abs.head(2))

with pd.option_context('display.max_columns', None):
    print(df_full_annotation_norm.head(2))

### Save the norm full annotation df

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/df_full_annotation_norm_Simple.json'
df_to_json(df_full_annotation_norm, path)

# 2. Build Basic Regression PE Model (DeepPose based)  

## 2.1. load the normalised annotations

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/df_full_annotation_norm_Simple.json'
df_full_annotation_norm = json_to_df(path)

In [None]:
df_full_annotation_norm = set_dtypes_df_full_annotation_abs(df_full_annotation_norm)

In [None]:
df_full_annotation_norm.head()

In [None]:
df_full_annotation_norm.info()

In [None]:
# to make the db easier to work with I am going to create a list with the kp col names, bbox col names, id col names
id_cols = df_full_annotation_norm.iloc[:, :3].columns.to_list()
bbox_cols = df_full_annotation_norm.iloc[:, 3:7].columns.to_list()
kp_cols = df_full_annotation_norm.iloc[:, 7:23].columns.to_list()

In [None]:
print(id_cols)
print(bbox_cols)
print(kp_cols)

## 2.2. Remove rows where too many (or primary) keypoints are missing

In [None]:
print(df_full_annotation_norm.iloc[359])

In [None]:
# removing any examples with more NaN values than the chosen threshold
# The nan values are there when a keypoint is occluded

def remove_rows_with_too_many_nans(df, columns_to_check, nan_threshold):
    """
    Remove rows from the DataFrame where the number of NaN values in specified columns exceeds the threshold.

    Parameters:
    - df: The DataFrame to process.
    - columns_to_check: A list of column names to check for NaN values.
    - nan_threshold: The maximum allowed number of NaN values in the specified columns. Rows with more NaNs will be removed.

    Returns:
    - A new DataFrame with rows exceeding the NaN threshold removed.
    """
    # Count NaNs only in the specified columns
    nan_counts = df[columns_to_check].isna().sum(axis=1)
    print(type(nan_counts))

    # Identify rows where NaN count is below or equal to the threshold
    rows_to_keep = nan_counts <= nan_threshold
    print(rows_to_keep[rows_to_keep==False].index)

    # Filter the DataFrame to keep only the desired rows
    filtered_df = df[rows_to_keep]

    return filtered_df


In [None]:
# calling the function to remove the rows with more than 14 keypoint coords missing
# the number of keypoints is 14 and each has 2 coords so there are 28 coords
df_full_annotation_norm = remove_rows_with_too_many_nans(df_full_annotation_norm, kp_cols, 8)

In [None]:
df_full_annotation_norm.info()

In [None]:
print(df_full_annotation_norm)

In [None]:
# removing any examples where the primary kp are missing. When these are missing we will not be able to use our PCK metric
# The nan values are there when a keypoint is occluded

def remove_rows_with_missing_primary_kp(df):
    """
    Remove rows from the DataFrame where kp_primary_missing is set to true

    Parameters:
    - df: The DataFrame to process.

    Returns:
    - A new DataFrame with rows not missing the primary kp.
    """
    with pd.option_context('display.max_columns', None):
        print(df[df['kp_primary_missing'] == True])

    # Identify rows where NaN count is below or equal to the threshold
    rows_to_keep = df['kp_primary_missing'] == False

    # Filter the DataFrame to keep only the desired rows
    filtered_df = df[rows_to_keep]

    return filtered_df

In [None]:
df_full_annotation_norm = remove_rows_with_missing_primary_kp(df_full_annotation_norm)

## 2.3. Replace nan with out of range (-10)

In [None]:
# In order to train the data keypoints cannot have the value nan
# this function removes the value nan from the keypoint df
def convert_nans_to_neg_ten(df, columns):

    df_adjusted = df.copy()

    # Iterate over the specified columns
    for col in columns:
        # Replace NaN values with -10
        df_adjusted[col].fillna(-10, inplace=True)

    return df_adjusted

In [None]:
df_full_annotation_norm = convert_nans_to_neg_ten(df_full_annotation_norm, kp_cols)

In [None]:
df_full_annotation_norm.info()

## 2.4. Split data into train, val and test sets

### 2.4.1. get list of ids that are in each set from obj detect folder and save to txt

In [None]:
# get list of ids from image_obj detect
def extract_image_ids(folder_path):
    """
    Extracts image IDs from filenames in the given folder. 
    The filenames are assumed to be in the format something_vidid.something_imgid.something.

    Parameters:
    folder_path (str): Path to the folder containing images.

    Returns:
    list: A list of strings in the format 'vidid_imgid'.
    """
    image_ids = []

    for filename in os.listdir(folder_path):
        if os.path.isfile(os.path.join(folder_path, filename)):  # Ensure it's a file
            parts = filename.split('_')  # Split by the underscore
            vidid = parts[1].split('.')[0]  # Extract vidid (part after first underscore and before first dot)
            imgid = parts[2].split('.')[0]  # Extract imgid (part after second underscore and before second dot)
            image_ids.append(f'{vidid}_{imgid}')

    return image_ids

In [None]:
# save unique ids list as a text file
def save_list_to_file(list_data, file_path):
    """
    Saves a list to a text file with each entry on a new line.

    :param list_data: List of strings to be saved to a file.
    :param file_path: Path to the file where the list should be saved.
    """
    # Open the file for writing
    with open(file_path, 'w') as file:
        # Write each item on a new line
        for item in list_data:
            file.write(f"{item}\n")  # Add a newline after each item

In [None]:
# test imgs
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/YoloV8_dataset_Simple_parent/YoloV8_dataset_Simple/images/test'
ids_test = extract_image_ids(path)
print(ids_test)


In [None]:
# val imgs
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/YoloV8_dataset_Simple_parent/YoloV8_dataset_Simple/images/val'
ids_val = extract_image_ids(path)


In [None]:
# train imgs
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/YoloV8_dataset_Simple_parent/YoloV8_dataset_Simple/images/train'
ids_train = extract_image_ids(path)


In [None]:
# save the lists to txt files
save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_test.txt'
save_list_to_file(ids_test, save_dir)


In [None]:
save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_val.txt'

save_list_to_file(ids_val, save_dir)


In [None]:
save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_train.txt'

save_list_to_file(ids_train, save_dir)

### 2.4.2. split df based on train, val, test split

In [None]:
def filter_df_by_id_parts(df, id_list, col1, col2):
    """
    Filters the DataFrame to include only rows where:
    - col1 matches idpart1
    - col2 matches idpart2
    The ID parts are derived from the id_list, where each ID is in the format 'idpart1_idpart2'.

    Parameters:
    df (pd.DataFrame): The DataFrame to be filtered.
    id_list (list): The list of IDs in the format 'idpart1_idpart2'.
    col1 (str): The name of the first column containing idpart1.
    col2 (str): The name of the second column containing idpart2.

    Returns:
    pd.DataFrame: A filtered DataFrame containing only the rows matching the ID parts.
    """
    # Split the IDs into idpart1 and idpart2
    id_parts = [id.split('_') for id in id_list]
    
    # Convert the list of tuples into a DataFrame
    id_df = pd.DataFrame(id_parts, columns=[col1, col2])
    
    # Perform an inner merge to filter the DataFrame
    filtered_df = pd.merge(df, id_df, how='inner', on=[col1, col2])
    
    return filtered_df

In [None]:
# test set
df_full_annotation_norm_test = filter_df_by_id_parts(df_full_annotation_norm, ids_test, 'vid_id', 'img_id')

print(df_full_annotation_norm_test.info())

display_all_cols(df_full_annotation_norm_test)


In [None]:
# val set
df_full_annotation_norm_val = filter_df_by_id_parts(df_full_annotation_norm, ids_val, 'vid_id', 'img_id')

print(df_full_annotation_norm_val.info())
display_all_cols(df_full_annotation_norm_val)

In [None]:
# train set
df_full_annotation_norm_train = filter_df_by_id_parts(df_full_annotation_norm, ids_train, 'vid_id', 'img_id')

print(df_full_annotation_norm_train.info())

In [None]:

print(df_full_annotation_norm_test.info())

display_all_cols(df_full_annotation_norm_test)

### 2.4.3. Save the df annotations to the processed folder

In [None]:
# save test
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/annotation/test_annotation_simple.json'
df_to_json(df_full_annotation_norm_test, path)

In [None]:
# save val
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/annotation/val_annotation_simple.json'
df_to_json(df_full_annotation_norm_val, path)

In [None]:
# save train
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/annotation/train_annotation_simple.json'
df_to_json(df_full_annotation_norm_train, path)

### 2.4.4. save imgs to the processed folder based on split

In [None]:
def move_images_by_ids(src_folder, dst_folder, id_list):
    """
    Moves images from the source folder to the destination folder based on the specified IDs.
    The image filenames are expected to be in the format 'something_vidid.something_imgid.something'.

    Parameters:
    src_folder (str): Path to the source folder containing the images.
    dst_folder (str): Path to the destination folder where images will be moved.
    id_list (list): List of IDs in the format 'idpart1_idpart2'.

    Returns:
    None
    """
    # Ensure the destination directory exists
    if not os.path.exists(dst_folder):
        os.makedirs(dst_folder)

    for filename in os.listdir(src_folder):
        if os.path.isfile(os.path.join(src_folder, filename)):  # Check if it's a file
            parts = filename.split('_')
            vidid = parts[0]
            imgid = parts[1]
            
            # Check if the extracted id combination is in the list
            if f'{vidid}_{imgid}' in id_list:
                src_path = os.path.join(src_folder, filename)
                dst_path = os.path.join(dst_folder, filename)
                shutil.move(src_path, dst_path)
                print(f'Moved: {filename} to {dst_folder}')

In [None]:
# test
src_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/Cropped_bbox_img_crop_220'
dst_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/images/test'

move_images_by_ids(src_folder, dst_folder, ids_test)

In [None]:
# val
src_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/Cropped_bbox_img_crop_220'
dst_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/images/val'

move_images_by_ids(src_folder, dst_folder, ids_val)

In [None]:
# train
src_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/raw/PE_Simple/Cropped_bbox_img_crop_220'
dst_folder = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/images/train'

move_images_by_ids(src_folder, dst_folder, ids_train)

### 2.4.5. save final list of all vid_id, img_id, bbox_id 

In [None]:
# create list of full ids
def full_ids_to_list(df, cols_to_combine):
    """
    Combines the values of specified columns in each row of the DataFrame, 
    separated by an underscore, and returns a list of these combined values.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    cols_to_combine (list): List of column names to combine.

    Returns:
    list: A list where each item is a combined string of the specified columns' values.
    """
    # Use DataFrame's apply method to combine the columns row-wise
    combined_list = df[cols_to_combine].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
    
    # Convert the combined Series to a list
    return combined_list.tolist()

In [None]:
# test
ids_test_bbox = full_ids_to_list(df_full_annotation_norm_test, id_cols)
print(ids_test_bbox)

In [None]:
print(len(ids_test))

In [None]:
# test
ids_val_bbox = full_ids_to_list(df_full_annotation_norm_val, id_cols)

In [None]:
# test
ids_train_bbox = full_ids_to_list(df_full_annotation_norm_train, id_cols)

In [None]:
# save the lists to txt files
save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_test_bbox.txt'
save_list_to_file(ids_test_bbox, save_dir)

save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_val_bbox.txt'

save_list_to_file(ids_val_bbox, save_dir)

save_dir = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_train_bbox.txt'

save_list_to_file(ids_train_bbox, save_dir)

## 2.5. Load image data into arr for train, val, test

### 2.5.1. load ids to list

In [None]:
# load ids into list 
def load_file_to_list(file_path):
    """
    loads a text file to a list with each entry on a new line becoming a new entry in the list.

    :param file_path: Path to the file where the list should be saved.
    :return list of data from file
    """
    # Open the file for writing
    lst = []
    with open(file_path, 'r') as file:
        # Write each item on a new line
        for line in file:
            lst.append(line.strip())
    return lst

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_test_bbox.txt'
ids_test_bbox = load_file_to_list(path)

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_val_bbox.txt'
ids_val_bbox = load_file_to_list(path)

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_train_bbox.txt'
ids_train_bbox = load_file_to_list(path)

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_test.txt'
ids_test = load_file_to_list(path)

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_val.txt'
ids_val = load_file_to_list(path)

In [None]:
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/ids_train.txt'
ids_train = load_file_to_list(path)

### 2.5.2. load image data to arr

In [None]:
# function to load the image data into an arr
# in the same order as the annotations and ids are stored (use id list for this)

# The load image data function may take a while to run

def load_image_data(ids_to_load, image_folder, crop_ext):

  # list for loading image data
  selected_imgs = []

  # for loop for loading image data that is present in the list of ids
  for i, img_id in enumerate(ids_to_load):

    # load the image
    img_path = os.path.join(image_folder, img_id+crop_ext)
    print(img_path)
    img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
    #print(img)

    # change the img to RGB from BGR as plt uses RGB colour scale
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # scaling the pixel values to [0, 1] (you don't need to scal them back)
    img = img/255

    selected_imgs.append(img)

  # Convert the list of images to a NumPy array
  selected_imgs_array = np.array(selected_imgs)
  
  return selected_imgs_array

In [None]:
# test
image_folder_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/images/test'
crop_extension = '_crop_220x220.jpg'

test_imgs_array = load_image_data(ids_test_bbox, image_folder_path, crop_extension)


In [None]:
test_imgs_array.shape

In [None]:
# val
image_folder_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/images/val'
crop_extension = '_crop_220x220.jpg'

val_imgs_array = load_image_data(ids_val_bbox, image_folder_path, crop_extension)

In [None]:
# train
image_folder_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/images/train'
crop_extension = '_crop_220x220.jpg'

train_imgs_array = load_image_data(ids_train_bbox, image_folder_path, crop_extension)

### 2.5.3. load annotations to df and then keypoints to arr

#### 2.5.3.1. loading dfs

In [None]:
# load the json to a df test
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/annotation/test_annotation_simple.json'
df_full_annotation_norm_test = json_to_df(path)

In [None]:
df_full_annotation_norm_test = set_dtypes_df_full_annotation_abs(df_full_annotation_norm_test)

In [None]:
# load the json to a df val
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/annotation/val_annotation_simple.json'
df_full_annotation_norm_val = json_to_df(path)

In [None]:
df_full_annotation_norm_val = set_dtypes_df_full_annotation_abs(df_full_annotation_norm_val)

In [None]:
# load the json to a df train
path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/data/processed/PE_Simple/annotation/train_annotation_simple.json'
df_full_annotation_norm_train = json_to_df(path)

In [None]:
df_full_annotation_norm_train = set_dtypes_df_full_annotation_abs(df_full_annotation_norm_train)

In [None]:
# to make the db easier to work with I am going to create a list with the kp col names, bbox col names, id col names
id_cols = df_full_annotation_norm_test.iloc[:, :3].columns.to_list()
bbox_cols = df_full_annotation_norm_test.iloc[:, 3:7].columns.to_list()
kp_cols = df_full_annotation_norm_test.iloc[:, 7:23].columns.to_list()

In [None]:
print(kp_cols)

#### 2.5.3.2. loading the keypoint annotation into an arr 

In [None]:
def create_data_lists(df_to_list, list_of_cols):

  # create temp lists
  keypoints_temp = []

  # step through the rows and
  for _, row in df_to_list.iterrows():

    # extract the data arrays
    keypoints_data = row[list_of_cols].values

    # adding data to the list
    keypoints_temp.append(keypoints_data)

  # Convert the list to a NumPy array and make sure that they are float32
  keypoints_array = np.array(keypoints_temp, dtype=np.float32)
  
  return keypoints_array

In [None]:
# test
test_kp_array = create_data_lists(df_full_annotation_norm_test, kp_cols)

In [None]:
# val
val_kp_array = create_data_lists(df_full_annotation_norm_val, kp_cols)

In [None]:
# train
train_kp_array = create_data_lists(df_full_annotation_norm_train, kp_cols)

## 2.51. Augment train images

In [None]:
num_imgs = train_imgs_array.shape[0]
print(num_imgs)
num_kp = 8

In [None]:
# Denormalize keypoints for an array of images
def unnorm_keypoints_arr(kp_arr, img_arr):
    """
    Denormalizes keypoints for each image in the array based on the corresponding image size.
    It converts normalized keypoints (range [-1, 1]) back to pixel coordinates.

    Parameters:
    - kp_arr: Array of normalized keypoints, where each entry is a list of keypoints for an image.
              The keypoints are expected to be in the format [x1, y1, x2, y2, ...].
    - img_arr: Array of images. The size of each image is used to scale the keypoints back 
               to their pixel coordinates.

    Returns:
    - kp_abs_arr: Array of denormalized keypoints where each entry corresponds to the denormalized 
                  keypoints for the corresponding image in `img_arr`.
    """

    kp_abs_list = []  # List to store the denormalized keypoints for each image

    # Iterate through each set of keypoints and corresponding image
    for i, kp in enumerate(kp_arr):
        img_size = img_arr[i].shape  # Get the size of the current image (height, width, channels)

        # Denormalize the keypoints based on the image size
        kp_abs, missing_kp = unnorm_keypoints(img_size, kp_arr[i])

        # Save the denormalized keypoints to the list
        kp_abs_list.append(kp_abs)
    
    # Convert the list of denormalized keypoints to a NumPy array
    kp_abs_arr = np.array(kp_abs_list)

    return kp_abs_arr


In [None]:
# Normalize keypoints for an array of images
def norm_keypoints_arr(kp_arr, img_arr):
    """
    Normalizes keypoints for each image in the array based on the corresponding image size.
    It converts keypoints from pixel coordinates back to normalized coordinates (range [-1, 1]).

    Parameters:
    - kp_arr: Array of keypoints, where each entry is a list of keypoints for an image.
              The keypoints are expected to be in the format [x1, y1, x2, y2, ...] 
              with pixel coordinates.
    - img_arr: Array of images. The size of each image is used to scale the keypoints 
               to normalized coordinates.

    Returns:
    - kp_norm_arr: Array of normalized keypoints where each entry corresponds to the normalized 
                   keypoints for the corresponding image in `img_arr`.
    """
        
    kp_norm_list = []  # List to store the normalized keypoints for each image

    # Iterate through each set of keypoints and corresponding image
    for i, kp in enumerate(kp_arr):
        img_size = img_arr[i].shape  # Get the size of the current image (height, width, channels)

        # Normalize the keypoints based on the image size
        kp_norm = norm_keypoints(img_size, kp_arr[i])

        # Save the normalized keypoints to the list
        kp_norm_list.append(kp_norm)
    
    # Convert the list of normalized keypoints to a NumPy array
    kp_norm_arr = np.array(kp_norm_list)  

    return kp_norm_arr

In [None]:
# Apply augmentation to images and keypoints
def apply_aug(img_arr_orig, kp_arr_orig, aug, num_of_kp=8):
    """
    Applies augmentation to a batch of images and their corresponding keypoints.

    Parameters:
    - img_arr_orig: Original array of images. Shape should be (num_imgs, height, width, channels).
    - kp_arr_orig: Original array of keypoints. Shape should be (num_imgs, num_of_kp*2), where each 
                   keypoint is represented by its x and y coordinates in pixel values.
    - aug: An imgaug augmentation sequence or augmenter to apply to the images and keypoints.
    - num_of_kp: Optional. Number of keypoints per image (default is 8).

    Returns:
    - img_arr_aug: Augmented array of images. Same shape as `img_arr_orig`.
    - kp_arr_aug: Augmented array of keypoints. Same shape as `kp_arr_orig`.
    """
    # print(img_arr_orig.shape)
    #print(kp_arr_orig.shape)
    
    # Initialize lists to store augmented images and keypoints
    aug_img = []  # List for augmented images
    aug_kp = []   # List for augmented keypoints

    # Get the number of images in the batch
    num_imgs = img_arr_orig.shape[0]
    #print(num_imgs)

    # Loop over each image and its corresponding keypoints
    for i in range(num_imgs):
        image = img_arr_orig[i]  # Extract the i-th image
        #print(image.shape)
        
        # Convert keypoints to KeypointsOnImage format for imgaug
        keypoints = kp_arr_orig[i]
        #print(keypoints)
        kps = [Keypoint(x=keypoints[j*2], y=keypoints[j*2+1]) for j in range(num_of_kp)]
        kps_on_image = KeypointsOnImage(kps, shape=image.shape)
        
        # Apply the augmentation to the image and keypoints
        image_aug, kps_aug = aug(image=image, keypoints=kps_on_image)
        
        # Convert augmented keypoints back to the original flattened format [x1, y1, x2, y2, ...]
        keypoints_aug = []
        for kp in kps_aug.keypoints:
            keypoints_aug.extend([kp.x, kp.y])
        
        # Append the augmented image and keypoints to their respective lists
        aug_img.append(image_aug)
        aug_kp.append(keypoints_aug)

    # Convert the lists of augmented images and keypoints back to NumPy arrays
    img_arr_aug = np.array(aug_img)
    kp_arr_aug = np.array(aug_kp)

    return img_arr_aug, kp_arr_aug


### 2.51.1. Apply a lrflip to the images

In [None]:
# specify augmentation
seq_lrflip = iaa.Sequential([
    iaa.Fliplr(1.0)
])

In [None]:
# apply augmentation

# unnorm kp
train_kp_array_abs = unnorm_keypoints_arr(train_kp_array, train_imgs_array)

# apply augmentation
train_imgs_array_aug_lrflip, train_kp_array_aug_lrflip_abs = apply_aug(train_imgs_array, train_kp_array_abs, seq_lrflip)

# norm the aug kp
train_kp_array_aug_lrflip_norm = norm_keypoints_arr(train_kp_array_aug_lrflip_abs, train_imgs_array_aug_lrflip)

In [None]:
# check lrflip
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array_aug_lrflip[150], train_kp_array_aug_lrflip_abs[150], nkeypoints=8, keypoint_labels=labels)

In [None]:
#check original
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array[150], train_kp_array_abs[150], nkeypoints=8, keypoint_labels=labels)

### 2.51.2. Apply a random rotation (5:20 deg) clockwise and anticlockwise(-20:-5 deg) 

In [None]:
# specify augmentation
seq_rotate_clock = iaa.Sequential([
    iaa.Affine(rotate=(5, 20)),
])

In [None]:
# apply augmentation

# unnorm kp
train_kp_array_abs = unnorm_keypoints_arr(train_kp_array, train_imgs_array)

# apply augmentation
train_imgs_array_aug_rclock, train_kp_array_aug_rclock_abs = apply_aug(train_imgs_array, train_kp_array_abs, seq_rotate_clock)

# norm the aug kp
train_kp_array_aug_rclock_norm = norm_keypoints_arr(train_kp_array_aug_rclock_abs, train_imgs_array_aug_rclock)

In [None]:
# check rclock
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array_aug_rclock[300], train_kp_array_aug_rclock_abs[300], nkeypoints=8, keypoint_labels=labels)

In [None]:
#check original
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array[300], train_kp_array_abs[300], nkeypoints=8, keypoint_labels=labels)

In [None]:
# specify augmentation
seq_rotate_anticlock = iaa.Sequential([
    iaa.Affine(rotate=(-20, -5)),
])

In [None]:
# apply augmentation

# unnorm kp
train_kp_array_abs = unnorm_keypoints_arr(train_kp_array, train_imgs_array)

# apply augmentation
train_imgs_array_aug_ranticlock, train_kp_array_aug_ranticlock_abs = apply_aug(train_imgs_array, train_kp_array_abs, seq_rotate_anticlock)

# norm the aug kp
train_kp_array_aug_ranticlock_norm = norm_keypoints_arr(train_kp_array_aug_ranticlock_abs, train_imgs_array_aug_ranticlock)

In [None]:
# check rclock
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array_aug_ranticlock[300], train_kp_array_aug_ranticlock_abs[300], nkeypoints=8, keypoint_labels=labels)

In [None]:
#check original
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array[300], train_kp_array_abs[300], nkeypoints=8, keypoint_labels=labels)

### 2.51.3. Apply a translation either up and down or left and right by the amount of padding in img

In [None]:
# OLD FUNCTION
# def detect_padding(image):
#     """
#     Detects if padding is on the x-axis (left and right) or y-axis (top and bottom) 
#     of the image and calculates the padding size on one side.

#     Parameters:
#     - image: A NumPy array representing the image. The shape should be (height, width, channels).

#     Returns:
#     - is_padding_x: True if padding is on the x-axis, False if padding is on the y-axis.
#     - padding_size: The size of the padding on one side in pixels.
#     """

#     height, width, _ = image.shape
    
#     # Check for padding along the x-axis (left and right)
#     left_column = image[:, 0, :]  # The first column (left side)
#     right_column = image[:, -1, :]  # The last column (right side)
#     # Check for padding along the x-axis (left and right)
#     top_row = image[0, :, :]  # The first column (left side)
#     bottom_row = image[-1, :, :]  # The last column (right side)
#     print(left_column)
#     #print(right_column)
    
#     # Check if the columns are fully black (indicating padding)
#     if np.all(left_column < 1) and np.all(right_column < 1):
#         # Padding is along the x-axis
#         is_padding_x = True
#         # Calculate padding size
#         print(image[:, 30, 0]*255)
#         plot_img(image)
#         padding_size = np.sum(image[:, 0, 0] < 1) // 2  # Count black pixels on one side
#     else:
#         #plot_img(image)
#         # Padding is along the y-axis (top and bottom)
#         is_padding_x = False
#         # Calculate padding size
#         padding_size = np.sum(image[0, :, 0] < 1) // 2  # Count black pixels on one side
#         #print(padding_size)

#     return is_padding_x, padding_size

In [None]:
def detect_padding(image):
    """
    Detects if padding is on the x-axis (left and right) or y-axis (top and bottom)
    of the image and calculates the padding size on one side.

    Parameters:
    - image: A NumPy array representing the image. The shape should be (width, height, channels).

    Returns:
    - is_padding_x: True if padding is on the x-axis, False if padding is on the y-axis.
    - padding_size: The size of the padding on one side in pixels.
    """

    width, height, _ = image.shape
    
    # Check for padding along the x-axis (left and right)
    left_column = image[:, 0, :]#image[0, :, :]  # The first column (left side)
    right_column = image[:, -1, :] #image[-1, :, :]  # The last column (right side)

    # Check for padding along the y-axis (top and bottom)
    top_row = image[:, 0, :]  # The first row (top side)
    bottom_row = image[:, -1, :]  # The last row (bottom side)
    #print(image[:, 5, :] *255)
    #print(left_column*255)
    
    # Check if the columns are fully black (indicating padding)
    if np.all(left_column*255 < 30) and np.all(right_column*255 < 30):
        # Padding is along the x-axis
        is_padding_x = True
        #plot_img(image)
        # Calculate padding size
        #padding_size = np.sum(image[0, :, 0]*255 < 30) // 2  # Count black pixels on one side
        # if padding_size > 60:
        sum1 = np.sum(image[5, :, 0]*255 < 20) // 2
        sum2 = np.sum(image[10, :, 0]*255 < 20) // 2
        sum3 = np.sum(image[60, :, 0]*255 < 20) // 2
        sum4 = np.sum(image[110, :, 0]*255 < 20) // 2
        sum5 = np.sum(image[-60, :, 0]*255 < 20) // 2
        sum6 = np.sum(image[-10, :, 0]*255 < 20) // 2
        sum7 = np.sum(image[-5, :, 0]*255 < 20) // 2
        padding_size = min(sum1, sum2, sum3, sum4, sum5, sum6, sum7)
        #     padding_size = max(average - 5, 1)
        # if padding_size > 60:
        #     sum1 = np.sum(image[5, :, 0]*255 < 10) // 2
        #     sum2 = np.sum(image[10, :, 0]*255 < 10) // 2
        #     sum3 = np.sum(image[60, :, 0]*255 < 10) // 2
        #     sum4 = np.sum(image[110, :, 0]*255 < 10) // 2
        #     sum5 = np.sum(image[-60, :, 0]*255 < 10) // 2
        #     sum6 = np.sum(image[-10, :, 0]*255 < 10) // 2
        #     sum7 = np.sum(image[-5, :, 0]*255 < 10) // 2
        #     average = (sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7) // 7  # Floor division for rounding down
        #     padding_size = max(average - 5, 1)
        if padding_size > 20: 
            padding_size = 20

    else:
        # Padding is along the y-axis (top and bottom)
        is_padding_x = False
        # Calculate padding size
        padding_size = np.sum(image[:, 0, 0]*255 < 30) // 2  # Count black pixels on one side
        # if padding_size > 60:
        sum1 = np.sum(image[:, 5, 0]*255 < 20) // 2
        sum2 = np.sum(image[:, 10, 0]*255 < 20) // 2
        sum3 = np.sum(image[:, 60, 0]*255 < 20) // 2
        sum4 = np.sum(image[:, 110, 0]*255 < 20) // 2
        sum5 = np.sum(image[:, -60, 0]*255 < 20) // 2
        sum6 = np.sum(image[:, -10, 0]*255 < 20) // 2
        sum7 = np.sum(image[:, -5, 0]*255 < 20) // 2
        padding_size = min(sum1, sum2, sum3, sum4, sum5, sum6, sum7)
        #     padding_size = max(average - 5, 1)
        # if padding_size > 60:
        #     sum1 = np.sum(image[:, 5, 0]*255 < 10) // 2
        #     sum2 = np.sum(image[:, 10, 0]*255 < 10) // 2
        #     sum3 = np.sum(image[:, 60, 0]*255 < 10) // 2
        #     sum4 = np.sum(image[:, 110, 0]*255 < 10) // 2
        #     sum5 = np.sum(image[:, -60, 0]*255 < 10) // 2
        #     sum6 = np.sum(image[:, -10, 0]*255 < 10) // 2
        #     sum7 = np.sum(image[:, -5, 0]*255 < 10) // 2
        #     average = (sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7) // 7  # Floor division for rounding down
        #     padding_size = max(average - 5, 1)
        if padding_size > 20: 
            padding_size = 20

    return is_padding_x, padding_size

In [None]:
# test_img_arr = train_imgs_array[0:2]
# test_img_arr.shape

In [None]:
# test_kp_arr = train_kp_array[0:2]
# test_kp_arr.shape

In [None]:
# unnorm kp
train_kp_array_abs = unnorm_keypoints_arr(train_kp_array, train_imgs_array)
# print(train_kp_array_abs)
# apply augmentation

# Get the number of images in the batch
num_imgs = train_imgs_array.shape[0]
# print(num_imgs)

# creat empty arrays
train_imgs_array_aug_trans = np.empty((0, train_imgs_array.shape[1], train_imgs_array.shape[2], train_imgs_array.shape[3]), dtype=train_imgs_array.dtype)
train_kp_array_aug_trans = np.empty((0, train_kp_array_abs.shape[1]), dtype=train_kp_array_abs.dtype)

# print(train_imgs_array_aug_trans.shape)
# print(train_kp_array_aug_trans.shape)

# Loop over each image and its corresponding keypoints
for i in range(num_imgs):
    image = train_imgs_array[i]  # Extract the i-th image
    kp = train_kp_array_abs[i]
    # print(i)
    # print(image.shape)
    # print(kp.shape)

    is_padding_x, padding_size = detect_padding(image)
    print(f'this: {i}')
    print(is_padding_x)
    print(padding_size)

    if is_padding_x:
        seq_trans_x_left = iaa.Sequential([
            iaa.TranslateX(px=(-padding_size, -padding_size)),
        ])
        seq_trans_x_right = iaa.Sequential([
            iaa.TranslateX(px=(padding_size, padding_size)),
        ])

        # Convert to shape (1, 220, 220, 3) and (1, 16)
        image = np.expand_dims(image, axis=0)
        #print(is_padding_x)
        #print(image.shape)
        #print(i)
        kp = np.expand_dims(kp, axis=0)

        # apply augmentations
        single_trans_x_left_img_arr, single_trans_x_left_kp_arr = apply_aug(image, kp, seq_trans_x_left)
        single_trans_x_right_img_arr, single_trans_x_right_kp_arr = apply_aug(image, kp, seq_trans_x_right)

        #save to image array
        train_imgs_array_aug_trans = np.concatenate((train_imgs_array_aug_trans, single_trans_x_left_img_arr), axis=0)
        train_imgs_array_aug_trans = np.concatenate((train_imgs_array_aug_trans, single_trans_x_right_img_arr), axis=0)
        #save to kp array
        train_kp_array_aug_trans = np.concatenate((train_kp_array_aug_trans, single_trans_x_left_kp_arr), axis=0)
        train_kp_array_aug_trans = np.concatenate((train_kp_array_aug_trans, single_trans_x_right_kp_arr), axis=0)

    else :
        seq_trans_y_up = iaa.Sequential([
            iaa.TranslateY(px=(-padding_size, -padding_size)),
        ])
        seq_trans_y_down = iaa.Sequential([
            iaa.TranslateY(px=(padding_size, padding_size)),
        ])

        # Convert to shape (1, 220, 220, 3) and (1, 16)
        image = np.expand_dims(image, axis=0)
        #print(is_padding_x)
        #print(image.shape)
        #print(i)
        kp = np.expand_dims(kp, axis=0)

        # apply augmentations
        single_trans_y_up_img_arr, single_trans_y_up_kp_arr = apply_aug(image, kp, seq_trans_y_up)
        single_trans_y_down_img_arr, single_trans_y_down_kp_arr = apply_aug(image, kp, seq_trans_y_down)

        #save to image array
        train_imgs_array_aug_trans = np.concatenate((train_imgs_array_aug_trans, single_trans_y_up_img_arr), axis=0)
        train_imgs_array_aug_trans = np.concatenate((train_imgs_array_aug_trans, single_trans_y_down_img_arr), axis=0)
        #save to kp array
        train_kp_array_aug_trans = np.concatenate((train_kp_array_aug_trans, single_trans_y_up_kp_arr), axis=0)
        train_kp_array_aug_trans = np.concatenate((train_kp_array_aug_trans, single_trans_y_down_kp_arr), axis=0)

In [None]:
# image_check = 231

# print(train_imgs_array[image_check,:, -1, :]*255)
# print(train_imgs_array[image_check,0, :, :]*255)
# print(np.sum(train_imgs_array[image_check, :, 0, 0]*255 < 30))
# print(np.sum(train_imgs_array[image_check, :, 0, 0]*255 < 30)//2)
# plot_img(train_imgs_array[image_check])

In [None]:
print(train_imgs_array_aug_trans.shape)
print(train_kp_array_aug_trans.shape)

In [None]:
# norm the aug kp
train_kp_array_aug_trans_norm = norm_keypoints_arr(train_kp_array_aug_trans, train_imgs_array_aug_trans)

In [None]:
# check trans
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array_aug_trans[561], train_kp_array_aug_trans[561], nkeypoints=8, keypoint_labels=labels)

In [None]:
# check trans
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array_aug_trans[560], train_kp_array_aug_trans[560], nkeypoints=8, keypoint_labels=labels)

In [None]:
#check original
labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']
plot_img_and_keypoint(train_imgs_array[280], train_kp_array_abs[280], nkeypoints=8, keypoint_labels=labels)

### 2.51.4 Combine the simple augmentation datasets with the original dataset to create simple_aug_train_dataset

In [None]:
# creat empty arrays
#train_imgs_array_aug_simple = np.empty((0, train_imgs_array.shape[1], train_imgs_array.shape[2], train_imgs_array.shape[3]), dtype=train_imgs_array.dtype)
#train_kp_array_aug_simple = np.empty((0, train_kp_array_abs.shape[1]), dtype=train_kp_array_abs.dtype)

In [None]:
# combine arrays
#save to image array
train_imgs_array_aug_simple = np.concatenate((train_imgs_array, train_imgs_array_aug_lrflip), axis=0)
train_imgs_array_aug_simple = np.concatenate((train_imgs_array_aug_simple, train_imgs_array_aug_rclock), axis=0)
train_imgs_array_aug_simple = np.concatenate((train_imgs_array_aug_simple, train_imgs_array_aug_ranticlock), axis=0)
train_imgs_array_aug_simple = np.concatenate((train_imgs_array_aug_simple, train_imgs_array_aug_trans), axis=0)
#save to kp array
train_kp_array_aug_simple = np.concatenate((train_kp_array, train_kp_array_aug_lrflip_norm), axis=0)
train_kp_array_aug_simple = np.concatenate((train_kp_array_aug_simple, train_kp_array_aug_rclock_norm), axis=0)
train_kp_array_aug_simple = np.concatenate((train_kp_array_aug_simple, train_kp_array_aug_ranticlock_norm), axis=0)
train_kp_array_aug_simple = np.concatenate((train_kp_array_aug_simple, train_kp_array_aug_trans_norm), axis=0)

In [None]:
print(train_imgs_array_aug_simple.shape)
print(train_kp_array_aug_simple.shape)

### 2.51.5. Ensure that all kp are within the image frame and shift them in

In [None]:

def find_out_of_img_kp_rows(arr):
    """
    Finds the number and positions of rows that contain numbers lower than -0.5 but not -10 and greater than 0.5
    THese are keypoints that are outside the frame but not the missing ones.

    Parameters:
    - arr: A NumPy array of shape (n, 16).

    Returns:
    - count_neg_rows: The number of rows that contain negative numbers.
    - neg_row_indices: A list of indices of rows that contain negative numbers.
    """
    # Check which rows contain negative numbers
    neg_row_mask = np.any(((arr < -0.5) & (arr > -9.0)) | (arr > 0.5), axis=1)
    
    # Get the indices of rows that contain negative numbers
    neg_row_indices = np.where(neg_row_mask)[0]
    
    # Count the number of rows with negative numbers
    count_neg_rows = len(neg_row_indices)
    
    return count_neg_rows, neg_row_indices

In [None]:
unique_types = set(type(element) for element in train_kp_array_aug_simple.flatten())
print(unique_types)

In [None]:
# check datatypes
train_kp_array_aug_simple.dtype

In [None]:
# ensure all the kp are within the image
print(find_out_of_img_kp_rows(train_kp_array_aug_simple))

In [None]:
def replace_out_of_img_kp(arr):
    """
    Finds and replaces the elements in the array that are outside the frame but not the missing ones.
    Specifically, elements greater than 0.5 are replaced with 0.5, and elements less than -0.5 but 
    greater than -9.0 are replaced with -0.5.

    Parameters:
    - arr: A NumPy array of shape (n, 16).

    Returns:
    - modified_arr: The modified NumPy array with replaced values.
    - count_replacements: The number of elements that were replaced.
    """
    # Make a copy of the array to avoid modifying the original array
    modified_arr = arr.copy()

    # Replace elements greater than 0.5 with 0.5
    count_pos_replacements = np.sum(modified_arr > 0.5)
    modified_arr[modified_arr > 0.5] = 0.49

    # Replace elements less than -0.5 but greater than -9.0 with -0.5
    count_neg_replacements = np.sum((modified_arr < -0.5) & (modified_arr > -9.0))
    modified_arr[(modified_arr < -0.5) & (modified_arr > -9.0)] = -0.49

    # Total count of replacements
    count_replacements = count_pos_replacements + count_neg_replacements

    return modified_arr, count_replacements

In [None]:
train_kp_array_aug_simple, num_replacements = replace_out_of_img_kp(train_kp_array_aug_simple)
print(num_replacements)

In [None]:
print(find_out_of_img_kp_rows(train_kp_array_aug_simple))

## 2.6. Visualise the data

In [None]:
def plot_img(img):
  fig = plt.figure(figsize=(8, 25), dpi=100)
  plt.subplot(1,2,1)
  plt.imshow(img)
  plt.show()

In [None]:
def plot_img_and_keypoint(img, keypoints, nkeypoints, keypoint_labels):
  fig = plt.figure(figsize=(8, 8), dpi=100)
  plt.imshow(img)
  x_keypoints = keypoints[::2]
  y_keypoints = keypoints[1::2]
  plt.scatter(x_keypoints, y_keypoints, marker='.', c=np.arange(nkeypoints), cmap='jet')

    # If labels are provided, add them to the plot
  if keypoint_labels is not None:
      for i, (x, y) in enumerate(zip(x_keypoints, y_keypoints)):
          plt.text(x, y, keypoint_labels[i], fontsize=12, color='white', 
                    bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

  plt.show()

In [None]:
# def unnorm_keypoints(img_size, keypoints, kp_to_null=None):

#   readjust_x = img_size[0]
#   readjust_y = img_size[1]
#   #print(readjust_x)
#   #print(readjust_y)
#   new_keypoints = []
#   missing_kp = []

#   for i, keypoint in enumerate(keypoints):
#     # Null keypoints at specified indices
#     #print(kp_to_null)
#     if keypoint == -10 or (kp_to_null and i in kp_to_null):
#       keypoint = np.nan
#       #print(missing_kp)
#       missing_kp.append(i)

#     if i % 2 == 0:
#       keypoint = keypoint * readjust_x + readjust_x/2
#       #print(i, keypoint, 'x')
#     else:
#       keypoint = keypoint * readjust_y + readjust_y/2
#       #print(i, keypoint, 'y')
#     #print(keypoint)
#     new_keypoints.append(keypoint)
#   #print(new_keypoints)
#   return new_keypoints, missing_kp

In [None]:
display_img = 259
chosen_img = train_imgs_array[display_img]
chosen_img_size = chosen_img.shape
print(chosen_img_size)
#print(original_img_shape)
chosen_img_keypoints = train_kp_array[display_img]
nkeypoints = 8
keypoint_labels = kp_cols[::2]

display_keypoints, missing_kp = unnorm_keypoints(chosen_img_size, chosen_img_keypoints)

plot_img_and_keypoint(chosen_img, display_keypoints, 8, keypoint_labels)

In [None]:
display_img = 1013
chosen_img = train_imgs_array_aug_simple[display_img]
chosen_img_size = chosen_img.shape
print(chosen_img_size)
print(chosen_img_size)
#print(original_img_shape)
chosen_img_keypoints = train_kp_array_aug_simple_adjust[display_img]
nkeypoints = 8
keypoint_labels = kp_cols[::2]

display_keypoints, missing_kp = unnorm_keypoints(chosen_img_size, chosen_img_keypoints)

plot_img_and_keypoint(chosen_img, display_keypoints, 8, keypoint_labels)

## 2.7. Building the model

### 2.7.1. Define the Loss function


In [None]:
def masked_mse(y_true, y_pred):
    """
    Computes the mean squared error, ignoring the invisible keypoints.
    Assuming that -10.0 indicates an invisible keypoint.
    """
    # Create a mask where keypoints are visible
    mask = (y_true != -10.0).float().to(y_true.device)

    # Apply the mask to filter out invisible keypoints from both
    # the predictions and the true values
    y_true_masked = y_true * mask
    y_pred_masked = y_pred * mask

    # Compute the Mean Squared Error only on the visible keypoints
    mse = F.mse_loss(y_pred_masked, y_true_masked, reduction='sum') / mask.sum()
    return mse

In [None]:
# MANUAL IMPLEMENTATION OF THE ABOVE
# def masked_rmse_loss(y_true, y_pred):
#     """
#     Computes the Root Mean Square Error (RMSE) loss, ignoring the invisible keypoints (denoted by -10).
    
#     Parameters:
#     y_true (torch.Tensor): The ground truth keypoints (batch_size, num_keypoints*2).
#     y_pred (torch.Tensor): The predicted keypoints (batch_size, num_keypoints*2).

#     Returns:
#     torch.Tensor: The computed RMSE loss.
#     """
#     # Create a mask where keypoints are visible (not equal to -10)
#     mask = (y_true != -10.0).float()

#     # Apply the mask to filter out invisible keypoints
#     y_true_masked = y_true * mask
#     y_pred_masked = y_pred * mask

#     # Compute the squared differences
#     squared_diff = (y_pred_masked - y_true_masked) ** 2

#     # Compute the mean of squared differences for visible keypoints
#     loss = torch.sum(squared_diff) / torch.sum(mask)

#     # Return the square root of the loss to get RMSE
#     return torch.sqrt(loss)

# # Example usage:
# # Assume y_true and y_pred are your ground truth and predicted keypoints, respectively.
# # y_true = torch.tensor([...])
# # y_pred = torch.tensor([...])
# # loss = masked_rmse_loss(y_true, y_pred)

### 2.7.2. Define the evaluation metrics

In [None]:
# PCK
# put in a function that will use the max bbox if primary kp is missing
def pck_metric(y_true, y_pred, threshold=0.2):
    """
    Computes the Percentage of Correct Keypoints (PCK) metric.
    
    Parameters:
    y_true (torch.Tensor): The ground truth keypoints (batch_size, num_keypoints*2).
    y_pred (torch.Tensor): The predicted keypoints (batch_size, num_keypoints*2).
    threshold (float): The distance threshold for a keypoint to be considered correct.
                       Typically set relative to the size of the bounding box (e.g., 0.2).
    
    Returns:
    float: The percentage of correct keypoints.
    """
    # Create a mask where keypoints are visible (not equal to -10)
    mask = (y_true != -10.0).float().to(y_true.device)
    #print(mask)

    # Apply the mask to filter out invisible keypoints
    y_true_masked = y_true * mask
    y_pred_masked = y_pred * mask

    # print(y_true_masked)
    # print(y_pred_masked)

    # Compute the Euclidean distance between the predicted and true keypoints
    distances = torch.sqrt((y_pred_masked[:, ::2] - y_true_masked[:, ::2]) ** 2 +
                           (y_pred_masked[:, 1::2] - y_true_masked[:, 1::2]) ** 2)
    
    #print(distances)
    
    # Normalize the distances (relative to the max and min y coord)
    Norm_max_min_kp = torch.max(y_true_masked[:, 1::2], dim=1)[0] - torch.min(y_true_masked[:, 1::2], dim=1)[0]
    # Normalise based on the distance between the head and the bottom of the body (position 0, 1 and )
    #print(y_true[:, 0],y_true[:,10],y_true[:, 1],y_true[:, 11])
    #print((y_true[:, 0] - y_true[:,10]) ** 2)
    #print((y_true[:, 1] - y_true[:, 11]) ** 2)
    Norm_head_lowerbody = torch.sqrt((y_true[:, 0] - y_true[:,10]) ** 2 +
                        (y_true[:, 1] - y_true[:, 11]) ** 2)
    #print(Norm_head_lowerbody)
    normalized_distances = distances / Norm_head_lowerbody[:, None]
    #print(distances)
    #print(normalized_distances)

    # Count the correct keypoints (distance <= threshold)
    correct_keypoints = (normalized_distances <= threshold).float() * mask[:, ::2]
    #print(correct_keypoints)

    # Calculate the PCK as the percentage of correct keypoints
    pck = correct_keypoints.sum() / mask[:, ::2].sum()
    return pck#.item()


In [None]:
# # create two tensors to check pck

# # Create two PyTorch tensors with the sizes (1, 16)
# # Initialize them with random values between -1 and 1
# tensor1_true = torch.rand(1, 16) * 2 - 1
# tensor2_pred = tensor1_true.clone()

# # creating a tensor with 2 predictions for an image (test that it will work for multiple inputs)
# # tensor2_pred = tensor1_true.clone()

# # Introduce some differences in tensor2
# tensor2_pred[0, :8] += torch.randn(8) * 0.1  # Slightly off for the first element of the first row

# tensor2_pred[0, :8] += torch.randn(8) * 0.1  # Slightly off for the first 8 elements of the first row
# #tensor2_pred[1, 8:] += torch.randn(8) * 0.1  # Slightly off for the last 8 elements of the second row

# # Ensure the values are still within the range [-1, 1]
# tensor2_pred = torch.clamp(tensor2_pred, min=-1, max=1)

# print(tensor1_true, tensor2_pred)

In [None]:
# print(tensor1_true, tensor2_pred)

In [None]:
# tensor1_true[0, 5] = -10
# tensor1_true[0, 4] = -10

In [None]:
# print(pck_metric(tensor1_true, tensor2_pred, 0.2))

### 2.7.3. Define the model

In [None]:
# First model (not correct sizes)
# class DeepPoseModel(nn.Module):
#     def __init__(self, nkeypoints=8):
#         # Initializes the DeepPoseModel with the dataset and training configuration.
#         super(DeepPoseModel, self).__init__()
        
#         # The feature extractor part of the model, composed of several convolutional layers.
#         self.features = nn.Sequential(
#             # Conv2d: Input channels = 3 (RGB image), Output channels = 96, kernel size = 11x11,
#             # stride = 4, padding = 4. 
#             # Input: (batch_size, 3, 220, 220)
#             # Output: (batch_size, 96, 55, 55)
#             nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=4),
            
#             # Local Response Normalization (LRN) over 5 neighboring channels
#             nn.LocalResponseNorm(5),
            
#             # ReLU activation function applied in place (no extra memory allocation)
#             nn.ReLU(inplace=True),

#             # Max pooling with 3x3 kernel and stride 2
#             # output size: (batch_size, 96, 27, 27)
#             nn.MaxPool2d(kernel_size=3, stride=2),
            
#             # Conv2d: Input channels = 96, Output channels = 256, kernel size = 5x5,
#             # stride = 2, padding = 2.
#             # Input: (batch_size, 96, 27, 27)
#             # Output: (batch_size, 256, 27, 27)
#             nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            
#             # Local Response Normalization (LRN) over 5 neighboring channels
#             nn.LocalResponseNorm(5),
            
#             # ReLU activation function applied in place (no extra memory allocation)
#             nn.ReLU(inplace=True),

#             # Max pooling with 3x3 kernel and stride 2
#             # output size: (batch_size, 96, 13, 13)
#             nn.MaxPool2d(kernel_size=3, stride=2),
            
#             # Conv2d: Input channels = 256, Output channels = 384, kernel size = 3x3,
#             # stride = 1, padding = 1.
#             # Input: (batch_size, 256, 13, 13)
#             # Output: (batch_size, 384, 13, 13)
#             nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            
#             # ReLU activation function applied in place (no extra memory allocation)
#             nn.ReLU(inplace=True),
            
#             # Conv2d: Input channels = 384, Output channels = 384, kernel size = 3x3,
#             # stride = 1, padding = 1.
#             # Input: (batch_size, 384, 13, 13)
#             # Output: (batch_size, 384, 13, 13)
#             nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            
#             # ReLU activation function applied in place (no extra memory allocation)
#             nn.ReLU(inplace=True),
            
#             # Conv2d: Input channels = 384, Output channels = 256, kernel size = 3x3,
#             # stride = 1, padding = 1.
#             # Input: (batch_size, 384, 13, 13)
#             # Output: (batch_size, 256, 13, 13)
#             nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            
#             # ReLU activation function applied in place (no extra memory allocation)
#             nn.ReLU(inplace=True),
            
#             # MaxPool2d: Kernel size = 3x3, stride = 2.
#             # Input: (batch_size, 256, 13, 13)
#             # Output: (batch_size, 256, 6, 6)
#             # Max pooling with 3x3 kernel and stride 2
#             nn.MaxPool2d(kernel_size=3, stride=2)
#         )
        
#         # The classifier part of the model, composed of fully connected layers.
#         self.classifier = nn.Sequential(
#             # Flatten the input tensor
#             # Input: (batch_size, 256, 6, 6)
#             # Output: (batch_size, 256 * 6 * 6) = (batch_size, 9216)
#             nn.Flatten(),
            
#             # Linear layer with input size 6400 and output size 4096
#             # Input: (batch_size, 6400)
#             # Output: (batch_size, 4096)
#             nn.Linear(256 * 6 * 6, 4096),
            
#             # ReLU activation function applied in place (no extra memory allocation)
#             nn.ReLU(inplace=True),
            
#             # Dropout layer with 60% dropout rate
#             nn.Dropout(0.6),
            
#             # Linear layer with input size 4096 and output size 4096
#             # Input: (batch_size, 4096)
#             # Output: (batch_size, 4096)
#             nn.Linear(4096, 4096),
            
#             # ReLU activation function applied in place (no extra memory allocation)
#             nn.ReLU(inplace=True),
            
#             # Dropout layer with 60% dropout rate
#             nn.Dropout(0.6),
            
#             # Final linear layer with input size 4096 and output size nkeypoints * 2
#             # Output is (nkeypoints * 2) coordinates (x, y) for each keypoint
#             # Input: (batch_size, 4096)
#             # Output: (batch_size, nkeypoints * 2)
#             nn.Linear(4096, nkeypoints * 2)
#         )
        
#     def forward(self, x):
#         # Define the forward pass through the network.
#         # Pass input `x` through the feature extractor
#         x = self.features(x)
#         # Pass the result through the classifier to get the final output
#         x = self.classifier(x)
#         return x

In [None]:
class DeepPoseModel(nn.Module):
    def __init__(self, nkeypoints=8):
        # Initializes the DeepPoseModel with the dataset and training configuration.
        super(DeepPoseModel, self).__init__()
        
        # The feature extractor part of the model, composed of several convolutional layers.
        self.features = nn.Sequential(
            # Conv2d: Input channels = 3 (RGB image), Output channels = 48, kernel size = 11x11,
            # stride = 4, padding = 4. 
            # Input: (batch_size, 3, 220, 220)
            # Output: (batch_size, 48, 55, 55)
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=4),
            
            # Local Response Normalization (LRN) over 5 neighboring channels
            nn.LocalResponseNorm(5),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),

            # Max pooling with 3x3 kernel and stride 2
            # output size: (batch_size, 96, 27, 27)
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            # Conv2d: Input channels = 96, Output channels = 256, kernel size = 5x5,
            # stride = 2, padding = 2.
            # Input: (batch_size, 48, 27, 27)
            # Output: (batch_size, 128, 27, 27)
            nn.Conv2d(48, 128, kernel_size=5, stride=1, padding=2),
            
            # Local Response Normalization (LRN) over 5 neighboring channels
            nn.LocalResponseNorm(5),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),

            # Max pooling with 3x3 kernel and stride 2
            # output size: (batch_size, 96, 13, 13)
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            # Conv2d: Input channels = 256, Output channels = 384, kernel size = 3x3,
            # stride = 1, padding = 1.
            # Input: (batch_size, 256, 13, 13)
            # Output: (batch_size, 384, 13, 13)
            nn.Conv2d(128, 192, kernel_size=3, stride=1, padding=1),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # Conv2d: Input channels = 384, Output channels = 384, kernel size = 3x3,
            # stride = 1, padding = 1.
            # Input: (batch_size, 384, 13, 13)
            # Output: (batch_size, 384, 13, 13)
            nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # Conv2d: Input channels = 384, Output channels = 256, kernel size = 3x3,
            # stride = 1, padding = 1.
            # Input: (batch_size, 384, 13, 13)
            # Output: (batch_size, 256, 13, 13)
            nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # MaxPool2d: Kernel size = 3x3, stride = 2.
            # Input: (batch_size, 256, 13, 13)
            # Output: (batch_size, 256, 6, 6)
            # Max pooling with 3x3 kernel and stride 2
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        
        # The classifier part of the model, composed of fully connected layers.
        self.classifier = nn.Sequential(
            # Flatten the input tensor
            # Input: (batch_size, 256, 6, 6)
            # Output: (batch_size, 256 * 6 * 6) = (batch_size, 9216)
            nn.Flatten(),
            
            # Linear layer with input size 6400 and output size 4096
            # Input: (batch_size, 6400)
            # Output: (batch_size, 4096)
            nn.Linear(128 * 6 * 6, 4096),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # Dropout layer with 60% dropout rate
            nn.Dropout(0.6),
            
            # Linear layer with input size 4096 and output size 4096
            # Input: (batch_size, 4096)
            # Output: (batch_size, 4096)
            nn.Linear(4096, 4096),
            
            # ReLU activation function applied in place (no extra memory allocation)
            nn.ReLU(inplace=True),
            
            # Dropout layer with 60% dropout rate
            nn.Dropout(0.6),
            
            # Final linear layer with input size 4096 and output size nkeypoints * 2
            # Output is (nkeypoints * 2) coordinates (x, y) for each keypoint
            # Input: (batch_size, 4096)
            # Output: (batch_size, nkeypoints * 2)
            nn.Linear(4096, nkeypoints * 2)
        )
        
    def forward(self, x):
        # Define the forward pass through the network.
        # Pass input `x` through the feature extractor
        x = self.features(x)
        # Pass the result through the classifier to get the final output
        x = self.classifier(x)
        return x

In [None]:
# DeepPose Model Summary
# model = DeepPoseModel()
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = model.to(device)  # Move model to GPU
# summary(model, input_size=(3, 220, 220), device=str(device))

In [None]:
# Original ALexNet model
# class AlexNet(nn.Module):
#     def __init__(self, num_classes=1000):
#         super(AlexNet, self).__init__()
        
#         # Define the feature extractor part of the network
#         self.features = nn.Sequential(
#             # 1st Convolutional Layer: 3 input channels (RGB), 64 output channels, 11x11 kernel size, stride 4, padding 2
#             nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
#             nn.ReLU(inplace=True),
#             # Max pooling with 3x3 kernel and stride 2
#             nn.MaxPool2d(kernel_size=3, stride=2),
            
#             # 2nd Convolutional Layer: 64 input channels, 192 output channels, 5x5 kernel size, stride 1, padding 2
#             nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2),
#             nn.ReLU(inplace=True),
#             # Max pooling with 3x3 kernel and stride 2
#             nn.MaxPool2d(kernel_size=3, stride=2),
            
#             # 3rd Convolutional Layer: 192 input channels, 384 output channels, 3x3 kernel size, stride 1, padding 1
#             nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(inplace=True),
            
#             # 4th Convolutional Layer: 384 input channels, 256 output channels, 3x3 kernel size, stride 1, padding 1
#             nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(inplace=True),
            
#             # 5th Convolutional Layer: 256 input channels, 256 output channels, 3x3 kernel size, stride 1, padding 1
#             nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(inplace=True),
#             # Max pooling with 3x3 kernel and stride 2
#             nn.MaxPool2d(kernel_size=3, stride=2)
#         )
        
#         # Define the classifier part of the network
#         self.classifier = nn.Sequential(
#             # Flatten the input
#             nn.Flatten(),
#             # 1st Fully Connected Layer: input size 256 * 6 * 6, output size 4096
#             nn.Linear(256 * 6 * 6, 4096),
#             nn.ReLU(inplace=True),
#             nn.Dropout(0.5),
            
#             # 2nd Fully Connected Layer: input size 4096, output size 4096
#             nn.Linear(4096, 4096),
#             nn.ReLU(inplace=True),
#             nn.Dropout(0.5),
            
#             # 3rd Fully Connected Layer (output layer): input size 4096, output size num_classes
#             nn.Linear(4096, num_classes)
#         )
        
#     def forward(self, x):
#         # Pass the input through the feature extractor
#         x = self.features(x)
#         # Pass the result through the classifier to get the final output
#         x = self.classifier(x)
#         return x


In [None]:
# AlexNet Summary
# model = AlexNet(num_classes=1000)  # Example model

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = model.to(device)  # Move model to GPU
# summary(model, input_size=(3, 224, 224), device=str(device))

## 2.8. Training model

### 2.8.0 Functions

In [None]:
def load_data_PyTorch(img_arr, kp_arr, batch_size, train_flag=True):
    '''
    Load data into PT dataset and dataLoader in specified batch size
    
    Params
    img_arr: images loaded into an array (i,255,255,3) and are converted to (i,3,255,255)
    kp_arr: array of keypoints (i, num_kp*2)
    batch_size: batch size 

    Return:
    PT_Dataset: containing input (x) and groundtruth (y)
    PT_DataLoader: Dataloader containing dataset and batch size

    '''

    # create tensors from arrays and load them to the GPU
    img_tensor = torch.tensor(img_arr, dtype=torch.float32).permute(0, 3, 1, 2).to('cuda')
    kp_tensor = torch.tensor(kp_arr, dtype=torch.float32).to('cuda')

    # Create a TensorDataset and DataLoader for training data
    dataset = TensorDataset(img_tensor, kp_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=train_flag)

    return dataset, dataloader

In [None]:
def create_timestamped_dir(descriptor, base_dir='/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/PE/'):
    """
    Creates a directory with a timestamp appended to the base directory name.
    Returns the path to the created directory.
    
    Parameters:
    descriptor: string describing the run generally model_dataDescriptor
    base_dir (str): The base directory name. Default is './training_results'.
    
    Returns:
    str: The path to the created directory.
    """
    # Get the current datetime and format it as YYYY-MM-DD_HH-MM-SS
    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

    base_dir_descriptor = f"{base_dir}{descriptor}"
    
    # Create the final directory name with the timestamp
    final_dir = f"{base_dir_descriptor}_{timestamp}"
    
    # Create the directory
    os.makedirs(final_dir, exist_ok=True)
    
    return final_dir


In [None]:
def plot_training_curves(train_data, val_data, save_dir, data_descriptor='Loss', show_plot=False):
    # Plot the loss curves
    plt.figure(figsize=(10, 5))
    plt.plot(train_data, label=f'Training {data_descriptor}')
    plt.plot(val_data, label=f'Validation {data_descriptor}')
    plt.xlabel('Epoch')
    plt.ylabel(f'{data_descriptor}')
    plt.title(f'Training and Validation {data_descriptor} Over Epochs')
    plt.legend()

    # Save the plot
    plot_path = os.path.join(save_dir, f'{data_descriptor}_plot.png')
    plt.savefig(plot_path)
    print(f'{data_descriptor} plot saved to {plot_path}')

    # Optionally, display the plot
    if show_plot == True:
        plt.show()

In [None]:
def save_stats_and_models(model, epoch, val_loss, val_pck, save_dir, 
                     best_val_loss=None, best_val_pck=None, 
                     final_model=False, train_loss_list=None, val_loss_list=None, train_pck_list=None, val_pck_list=None):
    """
    Saves the best models based on validation loss, PCK value, and final model.
    Saves the train and val curves and results for training
    
    Parameters:
    - model (torch.nn.Module): The PyTorch model to be saved.
    - epoch (int): The current epoch number.
    - val_loss (float): The current validation loss.
    - val_pck (float): The current validation PCK value.
    - save_dir (str): The directory where the models will be saved.
    - best_val_loss (float): The best validation loss seen so far.
    - best_val_pck (float): The best validation PCK value seen so far.
    - final_model (bool): If True, saves the final model after all epochs.
    - train_loss_list (list): List of all the loss values from each epoch
    
    Returns:
    - best_val_loss (float): Updated best validation loss.
    - best_val_pck (float): Updated best validation PCK value.
    """
    
    # Check if the current model has the lowest validation loss
    if best_val_loss is None or val_loss < best_val_loss:
        best_val_loss = val_loss
        model_name = f'best_val_loss_model_epoch_{epoch}_PCK_{val_pck:.4f}_loss_{val_loss:.4f}.pth'
        model_save_path_best_val_loss = os.path.join(save_dir, model_name)
        torch.save(model.state_dict(), model_save_path_best_val_loss)
        print(f'New best model saved with lowest validation loss to {model_save_path_best_val_loss}')
    
    # Check if the current model has the highest validation PCK
    if best_val_pck is None or val_pck > best_val_pck:
        best_val_pck = val_pck
        model_save_path_best_val_pck = os.path.join(save_dir, f'best_val_pck_model_epoch_{epoch}_PCK_{val_pck:.4f}_loss_{val_loss:.4f}.pth')
        torch.save(model.state_dict(), model_save_path_best_val_pck)
        print(f'New best model saved with highest validation PCK to {model_save_path_best_val_pck}')
    
    # Save the final model and perform final stats evaluation and save
    if final_model:
        final_model_path = os.path.join(save_dir, f'final_model_epoch_{epoch}_PCK_{val_pck:.4f}_loss_{val_loss:.4f}.pth')
        torch.save(model.state_dict(), final_model_path)
        print(f'Final model saved to {final_model_path}')
        plot_training_curves(train_loss_list, val_loss_list, save_dir, 'Loss', show_plot=True)
        plot_training_curves(train_pck_list, val_pck_list, save_dir, data_descriptor='PCK@0.1', show_plot=True)
    
    return best_val_loss, best_val_pck, model_save_path_best_val_loss, model_save_path_best_val_pck, final_model_path

In [None]:
    
# this is where the training loop will go 
def train_loop(model, optimizer, train_dataloader, val_dataloader, num_epochs, descriptor):
    # Create the directory to save the results to
    save_dir = create_timestamped_dir(descriptor)

    # Assuming the model, loss function, and optimizer are already defined
    #model = DeepPoseModel(nkeypoints=8).to('cuda')  # Move the model to GPU

    # Define your optimizer
    #optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

    # Load data into PT dataset and dataloader
    #train_dataset, train_dataloader = load_data_PyTorch(train_imgs_array, train_kp_array, 8)
    #val_dataset, val_dataloader = load_data_PyTorch(val_imgs_array, val_kp_array, 8, train_flag=False)

    # Training loop (variables)
    #num_epochs = 30  # Adjust the number of epochs as needed

    # Lists to store the training and validation loss for each epoch
    train_losses = []
    val_losses = []

    train_pck_list = []
    val_pck_list = []

    best_val_loss = None
    best_val_pck = None

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_train_loss = 0.0
        running_pck_005 = 0.0
        running_pck_01 = 0.0
        running_pck_02 = 0.0
        running_pck_val_005 = 0.0
        running_pck_val_01 = 0.0
        running_pck_val_02 = 0.0
        
        for batch_images, batch_keypoints in train_dataloader:
            # Move the data to the GPU
            batch_images = batch_images.to('cuda')
            batch_keypoints = batch_keypoints.to('cuda')

            # Zero the parameter gradients
            optimizer.zero_grad()
            # Forward pass
            outputs = model(batch_images)
            # Compute the loss
            loss = masked_mse(batch_keypoints, outputs)
            #print(loss)
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            # Accumulate the loss
            running_train_loss += loss.item()
            #print(running_train_loss)

            # compute metrics
            #pck_005 = pck_metric(batch_keypoints, outputs, 0.05)
            pck_01 = pck_metric(batch_keypoints, outputs, 0.1)
            #pck_02 = pck_metric(batch_keypoints, outputs, 0.2)

            #running_pck_005 += pck_005.item()
            running_pck_01 += pck_01.item()
            #running_pck_02 += pck_02.item()

                
        avg_train_loss = running_train_loss / len(train_dataloader)
        #avg_pck_005 = running_pck_005 / len(train_dataloader)
        avg_pck_01 = running_pck_01 / len(train_dataloader)
        #avg_pck_02 = running_pck_02 / len(train_dataloader)

        # populate train losses list for evaluation
        train_losses.append(avg_train_loss)

        train_pck_list.append(avg_pck_01)
        
        # Validation phase
        model.eval()
        running_val_loss = 0.0
        
        with torch.no_grad():
            for batch_images, batch_keypoints in val_dataloader:
                # Move the data to the GPU
                batch_images = batch_images.to('cuda')
                batch_keypoints = batch_keypoints.to('cuda')

                outputs = model(batch_images)
                loss = masked_mse(batch_keypoints, outputs)
                running_val_loss += loss.item()

                # compute metrics
                #pck_005 = pck_metric(batch_keypoints, outputs, 0.05)
                pck_01_val = pck_metric(batch_keypoints, outputs, 0.1)
                #pck_02 = pck_metric(batch_keypoints, outputs, 0.2)
                #running_pck_val_005 = pck_metric(batch_keypoints, outputs, 0.05)
                running_pck_val_01 += pck_01_val.item()
                #running_pck_val_02 = pck_metric(batch_keypoints, outputs, 0.2)

        
        avg_val_loss = running_val_loss / len(val_dataloader)
        #avg_pck_005 = running_pck_005 / len(train_dataloader)
        avg_val_pck_01 = running_pck_val_01 / len(val_dataloader)
        #avg_pck_02 = running_pck_02 / len(train_dataloader)

        # populate val losses list for evaluation
        val_losses.append(avg_val_loss)

        val_pck_list.append(avg_val_pck_01)

        # save best performing models based on the PCK and loss as well as the stats
        best_val_loss, best_val_pck = save_stats_and_models(
        model, epoch + 1, avg_val_loss, avg_val_pck_01, save_dir, 
        best_val_loss, best_val_pck)
        
        print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Train PCK0.1: {avg_pck_01:.4f}, Val PCK0.1: {avg_val_pck_01:.4f}')
        
    save_stats_and_models(model, num_epochs, avg_val_loss, avg_val_pck_01, save_dir, 
                    best_val_loss, best_val_pck, final_model=True, train_loss_list=train_losses, 
                    val_loss_list=val_losses, train_pck_list=train_pck_list, val_pck_list=val_pck_list)

In [None]:
    
# this is where the training loop will go 
def train_loop_mixed_precision(model, optimizer, train_dataloader, val_dataloader, num_epochs, descriptor):
    # Create the directory to save the results to
    save_dir = create_timestamped_dir(descriptor)

    # Assuming the model, loss function, and optimizer are already defined
    #model = DeepPoseModel(nkeypoints=8).to('cuda')  # Move the model to GPU

    # Define your optimizer
    #optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

    # Load data into PT dataset and dataloader
    #train_dataset, train_dataloader = load_data_PyTorch(train_imgs_array, train_kp_array, 8)
    #val_dataset, val_dataloader = load_data_PyTorch(val_imgs_array, val_kp_array, 8, train_flag=False)

    # Training loop (variables)
    #num_epochs = 30  # Adjust the number of epochs as needed

    # Lists to store the training and validation loss for each epoch
    train_losses = []
    val_losses = []

    train_pck_list = []
    val_pck_list = []

    best_val_loss = None
    best_val_pck = None

    scaler = GradScaler() 

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_train_loss = 0.0
        running_pck_005 = 0.0
        running_pck_01 = 0.0
        running_pck_02 = 0.0
        running_pck_val_005 = 0.0
        running_pck_val_01 = 0.0
        running_pck_val_02 = 0.0
        
        for batch_images, batch_keypoints in train_dataloader:
            # Move the data to the GPU
            batch_images = batch_images.to('cuda')
            batch_keypoints = batch_keypoints.to('cuda')

            # Zero the parameter gradients
            optimizer.zero_grad()
            with autocast():
                # Forward pass
                outputs = model(batch_images)
                # Compute the loss
                loss = masked_mse(batch_keypoints, outputs)
                #print(loss)


            # Backward pass and optimize
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update
            # Accumulate the loss
            running_train_loss += loss.item()
            #print(running_train_loss)

            # compute metrics
            #pck_005 = pck_metric(batch_keypoints, outputs, 0.05)
            pck_01 = pck_metric(batch_keypoints, outputs, 0.1)
            #pck_02 = pck_metric(batch_keypoints, outputs, 0.2)

            #running_pck_005 += pck_005.item()
            running_pck_01 += pck_01.item()
            #running_pck_02 += pck_02.item()

                
        avg_train_loss = running_train_loss / len(train_dataloader)
        #avg_pck_005 = running_pck_005 / len(train_dataloader)
        avg_pck_01 = running_pck_01 / len(train_dataloader)
        #avg_pck_02 = running_pck_02 / len(train_dataloader)

        # populate train losses list for evaluation
        train_losses.append(avg_train_loss)

        train_pck_list.append(avg_pck_01)
        
        # Validation phase
        model.eval()
        running_val_loss = 0.0
        
        with torch.no_grad():
            for batch_images, batch_keypoints in val_dataloader:
                # Move the data to the GPU
                batch_images = batch_images.to('cuda')
                batch_keypoints = batch_keypoints.to('cuda')

                outputs = model(batch_images)
                loss = masked_mse(batch_keypoints, outputs)
                running_val_loss += loss.item()

                # compute metrics
                #pck_005 = pck_metric(batch_keypoints, outputs, 0.05)
                pck_01_val = pck_metric(batch_keypoints, outputs, 0.1)
                #pck_02 = pck_metric(batch_keypoints, outputs, 0.2)
                #running_pck_val_005 = pck_metric(batch_keypoints, outputs, 0.05)
                running_pck_val_01 += pck_01_val.item()
                #running_pck_val_02 = pck_metric(batch_keypoints, outputs, 0.2)

        
        avg_val_loss = running_val_loss / len(val_dataloader)
        #avg_pck_005 = running_pck_005 / len(train_dataloader)
        avg_val_pck_01 = running_pck_val_01 / len(val_dataloader)
        #avg_pck_02 = running_pck_02 / len(train_dataloader)

        # populate val losses list for evaluation
        val_losses.append(avg_val_loss)

        val_pck_list.append(avg_val_pck_01)

        # save best performing models based on the PCK and loss as well as the stats
        best_val_loss, best_val_pck = save_stats_and_models(
        model, epoch + 1, avg_val_loss, avg_val_pck_01, save_dir, 
        best_val_loss, best_val_pck)
        
        print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Train PCK0.1: {avg_pck_01:.4f}, Val PCK0.1: {avg_val_pck_01:.4f}')
        
    save_stats_and_models(model, num_epochs, avg_val_loss, avg_val_pck_01, save_dir, 
                    best_val_loss, best_val_pck, final_model=True, train_loss_list=train_losses, 
                    val_loss_list=val_losses, train_pck_list=train_pck_list, val_pck_list=val_pck_list)

### 2.8.1. Train loop

In [None]:
# Assuming the model, loss function, and optimizer are already defined
model = DeepPoseModel(nkeypoints=8).to('cuda')  # Move the model to GPU

# Define your optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

# Load data into PT dataset and dataloader
bacth_size = 2 #batch_size
train_dataset, train_dataloader = load_data_PyTorch(train_imgs_array_aug_simple, train_kp_array_aug_simple, bacth_size)
val_dataset, val_dataloader = load_data_PyTorch(val_imgs_array, val_kp_array, bacth_size, train_flag=False)

# Training loop (variables)
num_epochs = 30  # Adjust the number of epochs as needed

descriptor = 'DeepPose_Simple_SimpleAug'

#train_loop(model, optimizer, train_dataloader, val_dataloader, num_epochs, descriptor)

train_loop_mixed_precision(model, optimizer, train_dataloader, val_dataloader, num_epochs, descriptor)

In [None]:
# train loop test
# Assuming the model, loss function, and optimizer are already defined
print('start loop')
model = DeepPoseModel(nkeypoints=8).to('cuda')  # Move the model to GPU

# Define your optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

# Load data into PT dataset and dataloader
batch_size = 32 #batch_size
# create tensors from arrays 
img_tensor = torch.tensor(train_imgs_array_aug_simple, dtype=torch.float32).permute(0, 3, 1, 2)#.to('cuda')
kp_tensor = torch.tensor(train_kp_array_aug_simple, dtype=torch.float32)#.to('cuda')

# Create a TensorDataset and DataLoader for training data
dataset = TensorDataset(img_tensor, kp_tensor)
train_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

img_tensor = torch.tensor(val_imgs_array, dtype=torch.float32).permute(0, 3, 1, 2)#.to('cuda')
kp_tensor = torch.tensor(val_kp_array, dtype=torch.float32)#.to('cuda')

# Create a TensorDataset and DataLoader for training data
dataset = TensorDataset(img_tensor, kp_tensor)
val_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
#train_dataset, train_dataloader = load_data_PyTorch(train_imgs_array_aug_simple, train_kp_array_aug_simple, bacth_size)
#val_dataset, val_dataloader = load_data_PyTorch(val_imgs_array, val_kp_array, bacth_size, train_flag=False)

# Training loop (variables)
num_epochs = 30  # Adjust the number of epochs as needed

descriptor = 'DeepPose_Simple_SimpleAug'

#train_loop(model, optimizer, train_dataloader, val_dataloader, num_epochs, descriptor)

save_dir = create_timestamped_dir(descriptor)

# Assuming the model, loss function, and optimizer are already defined
#model = DeepPoseModel(nkeypoints=8).to('cuda')  # Move the model to GPU

# Define your optimizer
#optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

# Load data into PT dataset and dataloader
#train_dataset, train_dataloader = load_data_PyTorch(train_imgs_array, train_kp_array, 8)
#val_dataset, val_dataloader = load_data_PyTorch(val_imgs_array, val_kp_array, 8, train_flag=False)

# Training loop (variables)
#num_epochs = 30  # Adjust the number of epochs as needed

# Lists to store the training and validation loss for each epoch
train_losses = []
val_losses = []

train_pck_list = []
val_pck_list = []

best_val_loss = None
best_val_pck = None

for epoch in range(num_epochs):
    # Training phase
    model.train()
    running_train_loss = 0.0
    running_pck_005 = 0.0
    running_pck_01 = 0.0
    running_pck_02 = 0.0
    running_pck_val_005 = 0.0
    running_pck_val_01 = 0.0
    running_pck_val_02 = 0.0
    
    for batch_images, batch_keypoints in train_dataloader:
        #print(batch_images.shape)
        # Move the data to the GPU
        batch_images = batch_images.to('cuda')
        batch_keypoints = batch_keypoints.to('cuda')

        # Zero the parameter gradients
        optimizer.zero_grad()
        # Forward pass
        outputs = model(batch_images)
        # Compute the loss
        loss = masked_mse(batch_keypoints, outputs)
        #print(loss)
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        # Accumulate the loss
        running_train_loss += loss.item()
        #print(running_train_loss)

        # compute metrics
        #pck_005 = pck_metric(batch_keypoints, outputs, 0.05)
        pck_01 = pck_metric(batch_keypoints, outputs, 0.1)
        #pck_02 = pck_metric(batch_keypoints, outputs, 0.2)

        #running_pck_005 += pck_005.item()
        running_pck_01 += pck_01.item()
        #running_pck_02 += pck_02.item()

            
    avg_train_loss = running_train_loss / len(train_dataloader)
    #avg_pck_005 = running_pck_005 / len(train_dataloader)
    avg_pck_01 = running_pck_01 / len(train_dataloader)
    #avg_pck_02 = running_pck_02 / len(train_dataloader)

    # populate train losses list for evaluation
    train_losses.append(avg_train_loss)
    train_pck_list.append(avg_pck_01)
    
    # Validation phase
    model.eval()
    running_val_loss = 0.0
    
    with torch.no_grad():
        for batch_images, batch_keypoints in val_dataloader:
            # Move the data to the GPU
            batch_images = batch_images.to('cuda')
            batch_keypoints = batch_keypoints.to('cuda')

            outputs = model(batch_images)
            loss = masked_mse(batch_keypoints, outputs)
            running_val_loss += loss.item()

            # compute metrics
            #pck_005 = pck_metric(batch_keypoints, outputs, 0.05)
            pck_01_val = pck_metric(batch_keypoints, outputs, 0.1)
            #pck_02 = pck_metric(batch_keypoints, outputs, 0.2)
            #running_pck_val_005 = pck_metric(batch_keypoints, outputs, 0.05)
            running_pck_val_01 += pck_01_val.item()
            #running_pck_val_02 = pck_metric(batch_keypoints, outputs, 0.2)

    
    avg_val_loss = running_val_loss / len(val_dataloader)
    #avg_pck_005 = running_pck_005 / len(train_dataloader)
    avg_val_pck_01 = running_pck_val_01 / len(val_dataloader)
    #avg_pck_02 = running_pck_02 / len(train_dataloader)

    # populate val losses list for evaluation
    val_losses.append(avg_val_loss)

    val_pck_list.append(avg_val_pck_01)

    # save best performing models based on the PCK and loss as well as the stats
    best_val_loss, best_val_pck = save_stats_and_models(
    model, epoch + 1, avg_val_loss, avg_val_pck_01, save_dir, 
    best_val_loss, best_val_pck)
    
    print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Train PCK0.1: {avg_pck_01:.4f}, Val PCK0.1: {avg_val_pck_01:.4f}')
    
save_stats_and_models(model, num_epochs, avg_val_loss, avg_val_pck_01, save_dir, 
                best_val_loss, best_val_pck, final_model=True, train_loss_list=train_losses, 
                val_loss_list=val_losses, train_pck_list=train_pck_list, val_pck_list=val_pck_list)

## 2.9. Evaluating the model 

In [None]:
def load_model(model_path, model_class, device='cuda'):
    """
    Loads a PyTorch model from a .pth file.

    Parameters:
    - model_path (str): The path to the .pth model file.
    - model_class (torch.nn.Module): The class of the model to instantiate.
    - device (str): The device to load the model onto ('cuda' or 'cpu').

    Returns:
    - model (torch.nn.Module): The loaded PyTorch model.
    """
    # Instantiate the model class
    model = model_class().to(device)
    
    # Load the state dictionary into the model
    model.load_state_dict(torch.load(model_path, map_location=device))
    
    # Set the model to evaluation mode
    model.eval()
    
    return model

In [None]:

def predict(model, images, img_is_tensor=False, device='cuda'):
    """
    Generates predictions from a PyTorch model given an array of images.

    Parameters:
    - model (torch.nn.Module): The PyTorch model to use for predictions.
    - images (np.array): Array of images (e.g., shape: (num_images, 220, 220, 3)).
    - device (str): The device to run the model on ('cuda' or 'cpu').

    Returns:
    - predictions (np.array): Array of predictions (e.g., keypoints for each image).
    """
    # Convert images to PyTorch tensor and move to the specified device
    if not img_is_tensor:
        images_tensor = torch.tensor(images, dtype=torch.float32).permute(0, 3, 1, 2).to(device)
    
    # Forward pass through the model to get predictions
    with torch.no_grad():
        predictions = model(images_tensor)
    
    # Convert predictions back to a NumPy array and move to CPU if necessary
    predictions = predictions.cpu().numpy() if device == 'cuda' else predictions.numpy()
    
    return predictions

In [None]:
def plot_comparison(img, pred_keypoints, true_keypoints, save_dir, img_num, nkeypoints=8, keypoint_labels=None, connections = [(0, 1), (0, 2), (2, 3), (2, 4), (2, 5), (5, 6), (5, 7)]):
    """
    Plots predicted keypoints vs. ground truth keypoints on the same image.

    Parameters:
    - img: The image on which to plot the keypoints.
    - pred_keypoints: The predicted keypoints (flattened x, y coordinates).
    - true_keypoints: The ground truth keypoints (flattened x, y coordinates).
    - save_dir: Directory to save the result to
    - img_num: image number that is getting compared
    - nkeypoints:  Optional The number of keypoints (default=8).
    - keypoint_labels: Optional list of keypoint labels to display next to the keypoints.
    - connections: OPtional list of tupels defining the connections between kps
    """

    fig = plt.figure(figsize=(8, 8), dpi=100)
    plt.imshow(img)
    
    # Extract x and y coordinates for predicted keypoints
    pred_x_keypoints = pred_keypoints[::2]
    pred_y_keypoints = pred_keypoints[1::2]
    
    # Extract x and y coordinates for ground truth keypoints
    true_x_keypoints = true_keypoints[::2]
    true_y_keypoints = true_keypoints[1::2]

    # Plot skeleton for true keypoints
    for (i, j) in connections:
        plt.plot([true_x_keypoints[i], true_x_keypoints[j]], 
                 [true_y_keypoints[i], true_y_keypoints[j]], 
                 'r-', linewidth=1)

    # Plot skeleton for predicted keypoints
    for (i, j) in connections:
        plt.plot([pred_x_keypoints[i], pred_x_keypoints[j]], 
                 [pred_y_keypoints[i], pred_y_keypoints[j]], 
                 'g-', linewidth=1)
    
    # Plot predicted keypoints
    plt.scatter(pred_x_keypoints, pred_y_keypoints, marker='o', c='g', s=100, label='Predicted', edgecolor='black')
    
    # Plot ground truth keypoints
    plt.scatter(true_x_keypoints, true_y_keypoints, marker='x', c='r', s=100, label='Ground Truth')
    
    # If labels are provided, add them to the plot
    if keypoint_labels is not None:
        for i, (x, y) in enumerate(zip(true_x_keypoints, true_y_keypoints)):
            plt.text(x, y, keypoint_labels[i], fontsize=8, color='white',
                     bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

    # If labels are provided, add them to the plot
    if keypoint_labels is not None:
        for i, (x, y) in enumerate(zip(pred_x_keypoints, pred_y_keypoints)):
            plt.text(x, y, keypoint_labels[i], fontsize=8, color='white',
                     bbox=dict(facecolor='black', alpha=0.5, boxstyle='round,pad=0.3'))

    # Add a legend to differentiate between predicted and ground truth keypoints
    plt.legend()

    # Save the plot
    plot_path = os.path.join(save_dir, f'Comparison of predicted and ground truth for img {img_num}.png')
    plt.savefig(plot_path)
    #print(f'{data_descriptor} plot saved to {plot_path}')

    plt.show()


In [None]:
def predict_and_plot(model_path, start_img, end_img, model_class=DeepPoseModel, device='cuda'):
    """
    Loads a model, predicts keypoints for a range of images, and plots the predicted keypoints 
    versus ground truth keypoints on the same image. The images with plotted keypoints are then 
    saved to a specified directory.

    Parameters:
    - model_path: The file path to the saved model's .pth file.
    - start_img: The starting index of the images in the validation set to process.
    - end_img: The ending index of the images in the validation set to process (exclusive).
    - model_class: Optional. The class of the model architecture to instantiate and load 
                   with the saved weights (default=DeepPoseModel).
    - device: Optional. The device to run the model on ('cuda' for GPU, 'cpu' for CPU; default='cuda').
    
    Returns:
    - None. The function saves the images with plotted keypoints to the directory derived from the 
            model path.
    """

    # get img lists
    img_arr = val_imgs_array[start_img:end_img,:,:,:]
    true_kp_arr = val_kp_array[start_img:end_img,:]

    # Load the model
    model = load_model(model_path, model_class, device=device)

    # Get predictions
    predictions = predict(model, img_arr, device=device)
    #print(predictions)

    # DeNorm predictions 
    predictions_abs = []
    true_kp_arr_abs = []
    for i, kp in enumerate(predictions):

        img_size = img_arr[i].shape
        #print(img_size)

        #unNorm each prediction
        true_kp_abs, missing_kp = unnorm_keypoints(img_size, true_kp_arr[i])
        #print(missing_kp)
        kp_abs, missing_kp = unnorm_keypoints(img_size, kp, kp_to_null=missing_kp)
        #print(missing_kp)
        

        # save result to new list
        predictions_abs.append(kp_abs)
        true_kp_arr_abs.append(true_kp_abs)

    #print(predictions_abs)

    # get the save directory parent (where the images will be saved)
    save_dir = model_path.rsplit('/',1)[0]

    # labels
    labels = ['Head', 'Beak', 'Body_top', 'RFlipper', 'LFlipper', 'Body_bottom', 'LFoot', 'RFoot']

    for i, kp in enumerate(predictions_abs):

        plot_comparison(img_arr[i], predictions_abs[i], true_kp_arr_abs[i], save_dir, img_num=i+start_img)#, keypoint_labels=labels)

In [None]:
# get predictions and draw them 
model_path = '/home/matthew/Desktop/Master_Dev/masters_penguin_pose_estimation/runs/PE/DeepPose_Simple_SimpleAug_2024-08-22_15-17-20/final_model_epoch_30_PCK_0.5700_loss_0.0083.pth'
start_img = 55
end_img = 58

predict_and_plot(model_path, start_img, end_img)

In [None]:
# compare some poses
val_imgs_array.shape

In [None]:
val_kp_array.shape