In [1]:
import os, json, cv2, numpy as np, matplotlib.pyplot as plt, yaml
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as F
import pandas as pd
from PIL import Image
import torch.optim as optim
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.nn.functional as Func
from datetime import datetime
from torchvision.io import read_image
import scipy.linalg
import time

In [2]:
def load_class_info(yaml_file):
    with open(yaml_file, 'r') as file:
            class_info = yaml.safe_load(file)
    return class_info['classes']

def label_transform(classes, labels, num_classes): #One-hot encoding
    class_to_index = {class_name: index for index, class_name in enumerate(classes)}
    target = np.zeros(num_classes, dtype=int)
    for label in labels:
        if label in class_to_index:
            index = class_to_index[label]
            target[index] = 1
    return target

def generate_ecc_codes(num_classes, code_length=None):
    hadamard_size = 1
    while hadamard_size < num_classes:
        hadamard_size *= 2

    # Generate the Hadamard matrix
    M = scipy.linalg.hadamard(hadamard_size).astype(np.float32)
    print("Scipy hadamard ", M.shape)
    
    # Replace the first column for every second row
    M[np.arange(0, hadamard_size, 2), 0] = -1
    print("Replaced ", M.shape)
    
    # Shuffle the rows and columns
    np.random.seed(12754)
    np.random.shuffle(M)
    idx = np.random.permutation(hadamard_size)
    
    # Select the required number of rows and columns
    M = M[0:num_classes, idx[0:code_length]]
    
    return M
    
def label_transform_ecc(classes, labels, codewords):
    class_to_index = {class_name: index for index, class_name in enumerate(classes)}
    
    if not isinstance(codewords, (list, np.ndarray)):
        raise ValueError("codewords should be a list or numpy array")
    
    if len(codewords) == 0 or not isinstance(codewords[0], (list, np.ndarray)):
        raise ValueError("codewords should be a non-empty list of lists or 2D numpy array")
    
    codeword_length = len(codewords[0])
    target = np.zeros(codeword_length, dtype=int)
    
    for label in labels:
        if label in class_to_index:
            index = class_to_index[label]
            if index < len(codewords):
                target = codewords[index]
            else:
                print(f"Warning: Index {index} is out of range for codewords")
    
    return target

def ecc_encode(label, ecc_codes):
    class_index = label.index(1)  # Assuming label is initially one-hot
    return ecc_codes[class_index]
    
def transform(image, keypoints, one_hot_label, num_classes, image_size=(224, 224), fusion='concat'):
    transform_ops = transforms.Compose([
        transforms.Resize(image_size),
    ])
    image = transform_ops(image)
    keypoints = torch.tensor(keypoints).float()
    one_hot_label = torch.tensor(one_hot_label).float()
    one_hot_channel = one_hot_label.unsqueeze(1).unsqueeze(2).expand(len(one_hot_label), image_size[0], image_size[1])
    one_hot_channel = one_hot_channel.sum(dim=0, keepdim=True)
    
    input_combined = torch.cat((image, one_hot_channel), dim=0)

    if fusion == 'concat':
        input_combined = torch.cat((image, one_hot_channel), dim=0)

    elif fusion == 'add':
        if image.shape[0] == 3:
            padding = torch.zeros_like(image[0]).unsqueeze(0)
            padded_one_hot_channel = torch.cat([one_hot_channel, padding, padding], dim=0)
        input_combined = image + padded_one_hot_channel

    elif fusion == 'multiply':
        if image.shape[0] == 3:
            one_hot_channel = one_hot_channel.repeat(3, 1, 1)
        input_combined = image * one_hot_channel
        
    if fusion == 'concat':
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406, 0.0], std=[0.229, 0.224, 0.225, 1.0])
    else:
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        
    input_combined = normalize(input_combined)
    
    return input_combined, keypoints

def denormalize_image(image):
    denormalize = transforms.Normalize(
        mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225],
        std=[1 / 0.229, 1 / 0.224, 1 / 0.225]
    )
    image = denormalize(image)
    image = torch.clamp(image, 0, 1)
    return image

def visualize_keypoints(image, keypoints):
    image = denormalize_image(image)
    image = transforms.ToPILImage()(image)
    plt.imshow(image)
    plt.scatter(keypoints[:, 0], keypoints[:, 1], s=10, marker='.', c='r')
    plt.show()

In [3]:
class CustomDataLoaderOneHot(Dataset):
    def __init__(self, dataset_folder, class_info_file, transform=None, label_transform=None, fusion_type='concat'):
        self.dataset_folder = dataset_folder
        self.transform = transform
        self.label_transform = label_transform
        self.imgs_files = self.load_data(dataset_folder)
        self.class_names = load_class_info(class_info_file)
        self.num_classes = len(self.class_names)
        self.fusion_type = fusion_type

    def load_data(self, dataset_folder):
        images_path = os.path.join(self.dataset_folder,"images/")
        annotations_path = os.path.join(self.dataset_folder,"annotations/")
        j_data = []
        for file in os.listdir(images_path):
            if file.endswith(".jpg"):
                json_path = os.path.join(annotations_path, file.split('.')[0] + '.json')
                with open(json_path) as f:
                    json_load = json.load(f)
                    for item in json_load['shapes']:
                        points = [value for row in item['points'] for value in row]
                        j_data.append({'image':  os.path.join(images_path,file),
                                     'label': item['label'],
                                     'points':points })
        json_data = pd.DataFrame(j_data)
        return json_data

    def get_keypoint(self, bboxes):
        centers = []
        for bbox in bboxes:
            center_x = (bbox[0] + bbox[2]) / 2
            center_y = (bbox[1] + bbox[3]) / 2
            centers.append((center_x, center_y))
        return centers
    
    def __len__(self):
        return len(self.imgs_files)

    def __getitem__(self, idx):
        target = {}
        img_path, label, bboxes_original = self.imgs_files.iloc[idx]
        label = [label]
        bboxes_original = [bboxes_original]
        keypoint_original = self.get_keypoint(bboxes_original)
        img_original = read_image(img_path).float() / 255.0

        if self.transform:
            label_transform = self.label_transform(self.class_names,label, self.num_classes)
            img, target['keypoints'] = self.transform(img_original, keypoint_original, label_transform, self.num_classes, fusion=self.fusion_type)
            target['labels'] = torch.as_tensor(label_transform, dtype=torch.int64)
        else:
            # img, bboxes = img_original, bboxes_original
            img, target['keypoints'] = img_original, keypoint_original
            target['labels'] = label

        return img, target, img_original

In [4]:
class CustomDataLoaderECC(Dataset):
    def __init__(self, dataset_folder, class_info_file, fusion_type, transform=None, label_transform=None, codeWords=None, ):
        self.dataset_folder = dataset_folder
        self.transform = transform
        self.label_transform = label_transform
        self.imgs_files = self.load_data(dataset_folder)
        self.class_names = load_class_info(class_info_file)
        self.num_classes = len(self.class_names)
        self.codeWords = codeWords
        self.fusion_type = fusion_type
        # print(self.fusion_type)

    def load_data(self, dataset_folder):
        images_path = os.path.join(self.dataset_folder,"images/")
        annotations_path = os.path.join(self.dataset_folder,"annotations/")
        j_data = []
        for file in os.listdir(images_path):
            if file.endswith(".jpg"):
                json_path = os.path.join(annotations_path, file.split('.')[0] + '.json')
                with open(json_path) as f:
                    json_load = json.load(f)
                    for item in json_load['shapes']:
                        points = [value for row in item['points'] for value in row]
                        j_data.append({'image':  os.path.join(images_path,file),
                                     'label': item['label'],
                                     'points':points })
        json_data = pd.DataFrame(j_data)
        return json_data

    def get_keypoint(self, bboxes):
        centers = []
        for bbox in bboxes:
            center_x = (bbox[0] + bbox[2]) / 2
            center_y = (bbox[1] + bbox[3]) / 2
            centers.append((center_x, center_y))
        return centers
    
    def __len__(self):
        return len(self.imgs_files)

    def __getitem__(self, idx):
        target = {}
        img_path, label, bboxes_original = self.imgs_files.iloc[idx]
        label = [label]
        bboxes_original = [bboxes_original]
        keypoint_original = self.get_keypoint(bboxes_original)
        img_original = read_image(img_path).float() / 255.0

        if self.transform:
            label_transform = self.label_transform(self.class_names,label, self.codeWords)
            img, target['keypoints'] = self.transform(img_original, keypoint_original, label_transform, self.num_classes, fusion=self.fusion_type)
            target['labels'] = torch.as_tensor(label_transform, dtype=torch.int64)
        else:
            # img, bboxes = img_original, bboxes_original
            img, target['keypoints'] = img_original, keypoint_original
            target['labels'] = label

        return img, target, img_original
    

In [5]:
class CustomDataLoaderNormal(Dataset):
    def __init__(self, dataset_folder, class_info_file, transform=None, demo=False):
        self.dataset_folder = dataset_folder
        self.transform = transform
        # self.label_transform = label_transform
        self.demo = demo
        self.imgs_files = self.load_data(dataset_folder)
        self.class_names = load_class_info(class_info_file)
        self.num_classes = len(self.class_names)
        # self.normalize_keypoints = normalize_keypoints
        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(self.class_names)}

    def load_data(self, dataset_folder):
        images_path = os.path.join(self.dataset_folder,"images/")
        annotations_path = os.path.join(self.dataset_folder,"annotations/")
        j_data = []
        for file in os.listdir(images_path):
            if file.endswith(".jpg"):
                json_path = os.path.join(annotations_path, file.split('.')[0] + '.json')
                with open(json_path) as f:
                    json_load = json.load(f)
                    for item in json_load['shapes']:
                        points = [value for row in item['points'] for value in row]
                        j_data.append({'image':  os.path.join(images_path,file),
                                     'label': item['label'],
                                     'points':points })
        json_data = pd.DataFrame(j_data)
        return json_data

    def get_keypoint(self, bboxes):
        centers = []
        for bbox in bboxes:
            center_x = (bbox[0] + bbox[2]) / 2
            center_y = (bbox[1] + bbox[3]) / 2
            centers.append((center_x, center_y))
        return centers
    
    def __len__(self):
        return len(self.imgs_files)

    def __getitem__(self, idx):
        target = {}
        img_path, label, bboxes_original = self.imgs_files.iloc[idx]
        bboxes_original = [bboxes_original]
        label = torch.tensor(self.class_to_idx[label], dtype=torch.int32)
        keypoint_original = torch.tensor(self.get_keypoint(bboxes_original), dtype=torch.float32)
        img_original = Image.open(img_path).convert('RGB')

        if self.transform:
            img_original = self.transform(img_original)

        return img_original, keypoint_original, label

In [6]:
class CustomResNet18(nn.Module):
    def __init__(self, num_classes=20, num_keypoints=1, num_channels=4): 
        super(CustomResNet18, self).__init__()
        
        self.num_keypoints = num_keypoints
        self.resnet18 = models.resnet18(pretrained=True)
        self.resnet18.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.fc_cls = nn.Linear(self.resnet18.fc.in_features, num_classes)
        self.fc_kpts = nn.Linear(self.resnet18.fc.in_features, self.num_keypoints * 2)
        self.resnet18.fc = nn.Identity()
        
    def forward(self, x):
        features = self.resnet18(x)
        class_outputs = self.fc_cls(features)
        keypoint_outputs = self.fc_kpts(features)
        return class_outputs, keypoint_outputs.reshape(-1, self.num_keypoints, 2)


class CustomResNet50(nn.Module):
    def __init__(self, num_classes=20, num_keypoints=1, num_channels=4): 
        super(CustomResNet50, self).__init__()
        
        self.num_keypoints = num_keypoints
        self.resnet50 = models.resnet50(pretrained=True)
        self.resnet50.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.fc_cls = nn.Linear(self.resnet50.fc.in_features, num_classes)
        self.fc_kpts = nn.Linear(self.resnet50.fc.in_features, self.num_keypoints * 2)
        self.resnet50.fc = nn.Identity()
        
    def forward(self, x):
        features = self.resnet50(x)
        class_outputs = self.fc_cls(features)
        keypoint_outputs = self.fc_kpts(features)
        return class_outputs, keypoint_outputs.reshape(-1, self.num_keypoints, 2)

In [7]:
class_config_path = './../config/formated_class.yaml'
DATASET_FOLDER_EVAL = './../../../RnD_datasets/robocup_dataset/evaluation'

num_classes = len(load_class_info(class_config_path))
codewords = generate_ecc_codes(num_classes, 16)

fusion_types = ['concat','add','multiply']
batch_size = 1

eval_dataset_ecc= CustomDataLoaderECC(DATASET_FOLDER_EVAL, class_config_path, 'concat', transform=transform, label_transform=label_transform_ecc, codeWords=codewords)
dataloader_ecc = DataLoader(eval_dataset_ecc, batch_size=batch_size, shuffle=False)
# print(len(dataloader_ecc))
eval_dataset_onehot = CustomDataLoaderOneHot(DATASET_FOLDER_EVAL, class_config_path, transform=transform, label_transform=label_transform, fusion_type='concat')
dataloader_onehot = DataLoader(eval_dataset_onehot, batch_size=batch_size, shuffle=False)

eval_dataset_normal = CustomDataLoaderNormal(DATASET_FOLDER_EVAL, class_config_path, transform=transform, demo=True)
dataloader_normal = DataLoader(eval_dataset_normal, batch_size=batch_size, shuffle=False)

Scipy hadamard  (32, 32)
Replaced  (32, 32)


In [8]:
model_18_ecc = CustomResNet18(18, 1, 4)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model.to(device)
model_18_ecc.load_state_dict(torch.load('./resnet18/models/updated_models/error_correcting/resnet18_conditional_concat_100_epochs_ecc.pth'))
model_18_ecc.eval()

print('model loaded successfully')



model loaded successfully


In [9]:
results = []
inference_times = []

with torch.no_grad():
    for images, targets, label in dataloader_ecc:
        # print(images.shape)
        start_time = time.time()
        class_outputs, keypoint_outputs = model_18_ecc(images)
        end_time = time.time()
        # print(targets['labels'])
        # print(predictions)
        inference_time = (end_time - start_time) * 1000
        inference_times.append(inference_time)
        # print(f'Inference time: {inference_time:.2f} ms')
        results.append((keypoint_outputs, targets['keypoints']))

# print(np.average(inference_times))
average_inference_time = sum(inference_times) / len(inference_times)
print(f"Average inference time: {average_inference_time:.4f} milli seconds")
type(results[0])

Average inference time: 12.0755 milli seconds


tuple

In [10]:
def calculate_oks(pred_keypoints, gt_keypoints, scale=224, sigma=0.026):
    
    pred_keypoints = np.array(pred_keypoints).squeeze()
    gt_keypoints = np.array(gt_keypoints).squeeze()
    
    if pred_keypoints.ndim == 1:
        pred_keypoints = pred_keypoints.reshape(1, -1)
    if gt_keypoints.ndim == 1:
        gt_keypoints = gt_keypoints.reshape(1, -1)
    
    dx = pred_keypoints[:, 0] - gt_keypoints[:, 0]
    dy = pred_keypoints[:, 1] - gt_keypoints[:, 1]
    
    distances = np.sqrt(dx**2 + dy**2)
    # scale = np.sqrt(2) * image_size
    # print(scale)
    
    normalized_distances = distances / scale
    oks = np.mean(np.exp(-normalized_distances**2 / (2 * sigma**2)))
    return oks

def calculate_precision_recall(oks_scores, gt_keypoints, thresholds):
    precisions = []
    recalls = []
    
    for threshold in thresholds:
        tp = np.sum(oks_scores >= threshold)
        fp = np.sum(oks_scores < threshold)
        fn = len(gt_keypoints) - tp
        
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        
        precisions.append(precision)
        recalls.append(recall)
    
    return precisions, recalls

def calculate_ap(precisions, recalls):

    precisions = np.array(precisions)
    recalls = np.array(recalls)
    
    # Sort by recall
    sorted_indices = np.argsort(recalls)
    precisions = precisions[sorted_indices]
    recalls = recalls[sorted_indices]
    
    # Compute AP
    ap = np.trapz(precisions, recalls)
    return ap

def calculate_map(predictions, ground_truths, scale=224, sigma=0.026, thresholds=np.linspace(0.5, 0.95, 10)):
    
    all_aps = []
    
    for pred_keypoints, gt_keypoints in zip(predictions, ground_truths):
        oks_scores = []
        
        for pred_kpts, gt_kpts in zip(pred_keypoints, gt_keypoints):
            oks = calculate_oks(pred_kpts, gt_kpts, scale=scale, sigma=sigma)
            oks_scores.append(oks)
        
        precisions, recalls = calculate_precision_recall(oks_scores, gt_keypoints, thresholds)
        ap = calculate_ap(precisions, recalls)
        all_aps.append(ap)
    
    mAP = np.mean(all_aps)
    return mAP

In [11]:
num_instances = 0
total_oks = 0
total_mAp = 0
image_size = 224

for predictions, targets in results:
    # Ensure pred_kpts and target_kpts are numpy arrays
    pred_kpts = predictions.numpy() if isinstance(predictions, torch.Tensor) else predictions
    target_kpts = targets.numpy() if isinstance(targets, torch.Tensor) else targets
    
    # Calculate scale factor for each instance
    scale_factor = np.sqrt(2) * image_size
    
    oks = calculate_oks(pred_kpts, target_kpts, scale_factor)
    mAp = calculate_map(pred_kpts, target_kpts, scale_factor)

    total_mAp += mAp
    total_oks += oks
    num_instances += 1

average_oks = total_oks / num_instances
average_mAp = total_mAp / num_instances
print(f"Object Keypoint Similarity (OKS): {average_oks:.4f}")
print(f"Mean Average Precision (mAP): {average_mAp:.4f}")

Object Keypoint Similarity (OKS): 0.0145
Mean Average Precision (mAP): 0.0067


In [25]:
def avg_speed_inference(dataloader, model_path, custom_model, encoding_type, model_name, fusion, epoch):
    results = []
    inference_times = []
    
    if fusion != 'concat':
        model = custom_model(18, 1, 3)
    else:
        model = custom_model(18, 1, 4)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # model.to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    
    print('model loaded successfully')

    with torch.no_grad():
        for images, targets, label in dataloader:
            # images.to(device)
            # print(images.shape)
            start_time = time.time()
            class_outputs, keypoint_outputs = model(images)
            end_time = time.time()
            inference_time = (end_time - start_time) * 1000
            inference_times.append(inference_time)
            results.append((keypoint_outputs, targets['keypoints']))
    
    average_inference_time = sum(inference_times) / len(inference_times)
    print(f"Average inference time for {model_name} - {encoding_type} - {fusion} - {epoch} : {average_inference_time:.4f} milli seconds")
    # print(results)
    return results

In [26]:
dataloaders_list = [dataloader_ecc, dataloader_onehot, dataloader_normal]
epochs_list = [100, 250]
fusion_types = ['concat','add','multiply']
custom_models = [CustomResNet18, CustomResNet50]
model_names = ['resnet18', 'resnet50'] #add SSD
encoding_types = ['error_correcting','one_hot']
enc_shorts = ['ecc', 'onehot']
# encoding_types = ['ecc', 'onehot', 'normal']
batch_size = 1

# './'+self.model_name+'/models/updated_models/error_correcting/resnet18_conditional_'+fusion+'_'+str(epochs)+'_epochs_ecc.pth'
path = None
num_instances = 0
total_oks = 0
total_mAp = 0
image_size = 224

for model_name in model_names:
    print('*****'+model_name+'******')
    for custom_model in custom_models:
        # print('*****'+custom_model+'******')
        for encoding_type in encoding_types:
            print('*****'+encoding_type+'******')
            for fusion in fusion_types:
                print('\n*****'+fusion+'******\n')
                for epoch in epochs_list:
                    results = []
                    eval_dataset = None
                    if(encoding_type=='error_correcting'):
                        eval_dataset = CustomDataLoaderECC(DATASET_FOLDER_EVAL, class_config_path, fusion, transform=transform, 
                                                           label_transform=label_transform_ecc, codeWords=codewords)
                        path = './'+model_name+'/models/updated_models/'+encoding_type+'/'+model_name+'_conditional_'+fusion+'_'+str(epoch)+'_epochs_ecc.pth'
                    
                    elif(encoding_type=='one_hot'):
                        eval_dataset = CustomDataLoaderECC(DATASET_FOLDER_EVAL, class_config_path, fusion, transform=transform, 
                                                           label_transform=label_transform_ecc, codeWords=codewords)
                        path = './'+model_name+'/models/updated_models/'+encoding_type+'/'+model_name+'_conditional_'+fusion+'_'+str(epoch)+'_epochs_onehot.pth'

                    print(path)
                    dataloader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False)
                    results = avg_speed_inference(dataloader, path, custom_model, encoding_type, model_name, fusion, epoch)
                    for predictions, targets in results:
                        # Ensure pred_kpts and target_kpts are numpy arrays
                        pred_kpts = predictions.numpy() if isinstance(predictions, torch.Tensor) else predictions
                        target_kpts = targets.numpy() if isinstance(targets, torch.Tensor) else targets
                        
                        # Calculate scale factor for each instance
                        scale_factor = np.sqrt(2) * image_size
                        
                        oks = calculate_oks(pred_kpts, target_kpts, scale_factor)
                        mAp = calculate_map(pred_kpts, target_kpts, scale_factor)
                    
                        total_mAp += mAp
                        total_oks += oks
                        num_instances += 1

                    average_oks = total_oks / num_instances
                    average_mAp = total_mAp / num_instances
                    print(f"Object Keypoint Similarity (OKS): {average_oks:.4f}")
                    print(f"Mean Average Precision (mAP): {average_mAp:.4f} \n")
        

*****resnet18******
*****error_correcting******

*****concat******

./resnet18/models/updated_models/error_correcting/resnet18_conditional_concat_100_epochs_ecc.pth
model loaded successfully
Average inference time for resnet18 - error_correcting - concat - 100 : 13.0408 milli seconds
Object Keypoint Similarity (OKS): 0.0145
Mean Average Precision (mAP): 0.0067 

./resnet18/models/updated_models/error_correcting/resnet18_conditional_concat_250_epochs_ecc.pth
model loaded successfully
Average inference time for resnet18 - error_correcting - concat - 250 : 10.9500 milli seconds
Object Keypoint Similarity (OKS): 0.0107
Mean Average Precision (mAP): 0.0044 


*****add******

./resnet18/models/updated_models/error_correcting/resnet18_conditional_add_100_epochs_ecc.pth
model loaded successfully
Average inference time for resnet18 - error_correcting - add - 100 : 10.5539 milli seconds
Object Keypoint Similarity (OKS): 0.0078
Mean Average Precision (mAP): 0.0030 

./resnet18/models/updated_mode

FileNotFoundError: [Errno 2] No such file or directory: './resnet18/models/updated_models/one_hot/resnet18_conditional_concat_250_epochs_onehot.pth'