In [None]:
import os
import time
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
from os import listdir
import pandas as pd
import numpy as np
import glob
import cv2
import json
from os.path import expanduser
import splitfolders
import shutil
from define_path import Def_Path

from tqdm import tqdm

import torch 
import torchvision
from torchvision import models
from torchvision.models.detection.rpn import AnchorGenerator
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn 
import torchvision.transforms as T
from torchvision.transforms import functional as F
from torchsummary import summary

from sklearn.model_selection import train_test_split

import albumentations as A # Library for augmentations

import matplotlib.pyplot as plt 
from PIL import Image

import transforms, utils, engine, train
from utils import collate_fn
from engine import train_one_epoch, evaluate

t = torch.cuda.get_device_properties(0).total_memory
print(t)
torch.cuda.empty_cache()

r = torch.cuda.memory_reserved(0)
print(r)
a = torch.cuda.memory_allocated(0)
print(a)
# f = r-a  # free inside reserved

weights_path = '/home/jc-merlab/Pictures/Data/trained_models/keypointsrcnn_weights_sim_b1_e25_v0.pth'

In [None]:
# to generalize home directory. User can change their parent path without entering their home directory
path = Def_Path()

parent_path =  path.home + "/Pictures/" + "Data/"

root_dir = parent_path + path.year + "-" + path.month + "-" + path.day + "/"

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# torch.cuda.set_per_process_memory_fraction(0.9, 0)
print(device)

In [None]:
def train_transform():
    return A.Compose([
        A.Sequential([
            A.RandomRotate90(p=1), # Random rotation of an image by 90 degrees zero or more times
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.3, brightness_by_max=True, always_apply=False, p=1), # Random change of brightness & contrast
        ], p=1)
#         A.Resize(640, 480)  # Resize all images to be 640x480
    ],
    keypoint_params=A.KeypointParams(format='xy'), # More about keypoint formats used in albumentations library read at https://albumentations.ai/docs/getting_started/keypoints_augmentation/
    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bboxes_labels']) # Bboxes should have labels, read more at https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/
    )

In [None]:
def train_test_split(src_dir):
    dst_dir_img = src_dir + "images"
    dst_dir_anno = src_dir + "annotations"
    
    if os.path.exists(dst_dir_img) and os.path.exists(dst_dir_anno):
        print("folders exist")
    else:
        os.mkdir(dst_dir_img)
        os.mkdir(dst_dir_anno)
        
    for jpgfile in glob.iglob(os.path.join(src_dir, "*.jpg")):
        shutil.copy(jpgfile, dst_dir_img)

    for jsonfile in glob.iglob(os.path.join(src_dir, "*.json")):
        shutil.copy(jsonfile, dst_dir_anno)
        
    output = parent_path + "split_folder_output" + "-" + path.year + "-" + path.month + "-" + path.day 
    
    splitfolders.ratio(src_dir, # The location of dataset
                   output=output, # The output location
                   seed=42, # The number of seed
                   ratio=(.7, .2, .1), # The ratio of split dataset
                   group_prefix=None, # If your dataset contains more than one file like ".jpg", ".pdf", etc
                   move=False # If you choose to move, turn this into True
                   )
    
    shutil.rmtree(dst_dir_img)
    shutil.rmtree(dst_dir_anno)
    
    return output  
    

In [None]:
class KPDataset(Dataset):
    def __init__(self, root, transform=None, demo=False):                
        self.root = root
        self.transform = transform
        self.demo = demo # Use demo=True if you need transformed and original images (for example, for visualization purposes)
        self.imgs_files = sorted(os.listdir(os.path.join(root, "images")))
        self.annotations_files = sorted(os.listdir(os.path.join(root, "annotations")))
    
    def __getitem__(self, idx):
        img_file = self.imgs_files[idx]
        img_path = os.path.join(self.root, "images", self.imgs_files[idx])
        annotations_path = os.path.join(self.root, "annotations", self.annotations_files[idx])

        img_original = cv2.imread(img_path)
        img_original = cv2.cvtColor(img_original, cv2.COLOR_BGR2RGB)
        
        with open(annotations_path) as f:
            data = json.load(f)
            bboxes_original = data['bboxes']
            keypoints_original = data['keypoints']
            
            # All objects are keypoints on the robot
            bboxes_labels_original = [] 
            bboxes_labels_original.append('base_joint')
            bboxes_labels_original.append('joint2')
            bboxes_labels_original.append('joint3')
            bboxes_labels_original.append('joint4')
            bboxes_labels_original.append('joint5')
            bboxes_labels_original.append('joint6')  

        if self.transform:   
            # Converting keypoints from [x,y,visibility]-format to [x, y]-format + Flattening nested list of keypoints            
            # For example, if we have the following list of keypoints for three objects (each object has two keypoints):
            # [[obj1_kp1, obj1_kp2], [obj2_kp1, obj2_kp2], [obj3_kp1, obj3_kp2]], where each keypoint is in [x, y]-format            
            # Then we need to convert it to the following list:
            # [obj1_kp1, obj1_kp2, obj2_kp1, obj2_kp2, obj3_kp1, obj3_kp2]
            keypoints_original_flattened = [el[0:2] for kp in keypoints_original for el in kp]
            
            # Apply augmentations
            transformed = self.transform(image=img_original, bboxes=bboxes_original, bboxes_labels=bboxes_labels_original, keypoints=keypoints_original_flattened)
            img = transformed['image']
            bboxes = transformed['bboxes']
            # Unflattening list transformed['keypoints']
            # For example, if we have the following list of keypoints for three objects (each object has two keypoints):
            # [obj1_kp1, obj1_kp2, obj2_kp1, obj2_kp2, obj3_kp1, obj3_kp2], where each keypoint is in [x, y]-format
            # Then we need to convert it to the following list:
            # [[obj1_kp1, obj1_kp2], [obj2_kp1, obj2_kp2], [obj3_kp1, obj3_kp2]]
            keypoints_transformed_unflattened = np.reshape(np.array(transformed['keypoints']), (-1,1,2)).tolist()

            # Converting transformed keypoints from [x, y]-format to [x,y,visibility]-format by appending original visibilities to transformed coordinates of keypoints
            keypoints = []
            for o_idx, obj in enumerate(keypoints_transformed_unflattened):
#                 print("object", obj)
#                 print(" obj index", o_idx)# Iterating over objects
                obj_keypoints = []
                for k_idx, kp in enumerate(obj): # Iterating over keypoints in each object
                    obj_keypoints.append(kp + [keypoints_original[o_idx][k_idx][2]])
                keypoints.append(obj_keypoints)
        
        else:
            img, bboxes, keypoints = img_original, bboxes_original, keypoints_original        
        
        # Convert everything into a torch tensor        
        bboxes = torch.as_tensor(bboxes, dtype=torch.float32)       
        target = {}
        labels = [1, 2, 3, 4, 5, 6]            
        target["boxes"] = bboxes
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64) # all objects are joint positions
        target["image_id"] = torch.tensor([idx])
        target["area"] = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
        target["iscrowd"] = torch.zeros(len(bboxes), dtype=torch.int64)
        target["keypoints"] = torch.as_tensor(keypoints, dtype=torch.float32)
        img = F.to_tensor(img)        
        bboxes_original = torch.as_tensor(bboxes_original, dtype=torch.float32)
        target_original = {}
        target_original["boxes"] = bboxes_original
        target_original["labels"] = torch.as_tensor(labels, dtype=torch.int64) # all objects are glue tubes
        target_original["image_id"] = torch.tensor([idx])
        target_original["area"] = (bboxes_original[:, 3] - bboxes_original[:, 1]) * (bboxes_original[:, 2] - bboxes_original[:, 0])
        target_original["iscrowd"] = torch.zeros(len(bboxes_original), dtype=torch.int64)
        target_original["keypoints"] = torch.as_tensor(keypoints_original, dtype=torch.float32)        
        img_original = F.to_tensor(img_original)

        if self.demo:
            return img, target, img_original, target_original, img_file
        else:
            return img, target, img_file
    
    def __len__(self):
        return len(self.imgs_files)

In [None]:
def get_model(num_keypoints, weights_path=None):
    
    anchor_generator = AnchorGenerator(sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.25, 0.5, 0.75, 1.0, 2.0, 3.0, 4.0))
    model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=False,
                                                                   pretrained_backbone=True,
                                                                   num_keypoints=num_keypoints,
                                                                   num_classes = 7, # Background is the first class, object is the second class
                                                                   rpn_anchor_generator=anchor_generator)

    if weights_path:
        state_dict = torch.load(weights_path)
        model.load_state_dict(state_dict)        
        
    return model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as func
import networkx as nx


class GNNEncoder(nn.Module):
    def __init__(self, vertices_dim=5, hidden_dim=128, num_vertices=6):
        super(GNNEncoder, self).__init__()
        self.f_enc = nn.Linear(vertices_dim, hidden_dim)
        self.f_e1 = nn.Linear(hidden_dim * 2, hidden_dim)
        self.f_v = nn.Linear(hidden_dim, hidden_dim)
        self.f_e2 = nn.Linear(hidden_dim * 2, hidden_dim)
        self.num_vertices = num_vertices
        
def generate_edges(self, vertices):
        vertices_list = vertices.tolist()
        G = nx.Graph()
        for idx, vertex in enumerate(vertices_list):
            G.add_node(idx, x=vertex[0], y=vertex[1], t=vertex[2])  
            if idx < len(vertices_list) - 1:
                # Add the edge with weight as the Euclidean distance between vertices
                edge_weight = torch.dist(vertices[idx], vertices[idx + 1]).item()
                G.add_edge(idx, idx + 1, weight=edge_weight)
        # Add the edge between the last and the first vertex to form a cycle
#         edge_weight = torch.dist(vertices[-1], vertices[0]).item()
#         G.add_edge(len(vertices_list) - 1, 0, weight=edge_weight)
        edges = list(G.edges(data=True))
        edges_tensor = torch.tensor([(edge[0], edge[1]) for edge in edges], dtype=torch.long).to(vertices.device)
        edges_weights = torch.tensor([edge[2]['weight'] for edge in edges], dtype=torch.float32).to(vertices.device)
        return edges_tensor, edges_weights

    def forward(self, vertices):
        h1 = self.f_enc(vertices)
        edges = self.generate_edges(vertices)
        h1_source = h1[edges[:, 0]]
        h1_target = h1[edges[:, 1]]
        h_e1 = self.f_e1(torch.cat((h1_source, h1_target, edges_weights.unsqueeze(1)), dim=1))  # Include edge weights in the input
        h_j_2 = self.f_v(h_e1)
        h2_source = h_j_2[edges[:, 0]]
        h2_target = h_j_2[edges[:, 1]]
        h_e2 = self.f_e2(torch.cat((h2_source, h2_target), dim=1))
        h_e2_prob = torch.sigmoid(h_e2)
        return vertices, h_e2_prob, edges

class GNNDecoder(nn.Module):
    def __init__(self, vertices_dim=5, hidden_dim=128, num_vertices=6):
        super(GNNDecoder, self).__init__()
        self.f_e = nn.Linear(vertices_dim * 2, hidden_dim)  # Concatenate two vertices features
        self.f_h = nn.Linear(hidden_dim, vertices_dim)  # Transform h_ij to the same dimension as vertices
        self.f_v = nn.Linear(vertices_dim, vertices_dim)  # Update vertex feature

    def forward(self, vertices, h_e2_prob, edges):
        h_source = vertices[edges[:, 0]]
        h_target = vertices[edges[:, 1]]
        h = torch.zeros_like(vertices)

        for idx, (i, j) in enumerate(edges):  # Iterate over edges
            edge_weight = edges_weights[idx].unsqueeze(0)
            h_ij = h_e2_prob[idx] * self.f_e(torch.cat((h_source[idx], h_target[idx], edge_weight), dim=0))  # Include edge weights in the input
            h_ij_transformed = self.f_h(h_ij)  # Transform h_ij to the same dimension as vertices
            h[j] += h_ij_transformed  # Accumulate edge features to the target vertex

        h_transformed = self.f_v(h.view(-1, vertices.shape[1]))  # Transform h
        h_transformed = h_transformed.view(vertices.shape)  # Reshape back to original shape
        vertices_g = vertices + h_transformed  # Update vertex features

        return vertices_g  # Return vertices_g as the prediction and vertices_g itself as the mean for Gaussian distribution



In [None]:
# class TrifocalLoss(nn.Module):
#     def __init__(self):
#         super().__init__()

#     def forward(self, vertices_pred, vertices_gt):
#         # Only consider the first two dimensions
#         vertices_pred = vertices_pred[:, :3]
#         vertices_gt = vertices_gt.squeeze()[:, :3]  # Use squeeze() to remove the singular dimension

#         loss = (vertices_gt - torch.tensor(vertices_pred)).pow(2).mean()  # Changed from sum() to mean()
#         return loss
# class HuberLoss(nn.Module):
#     def __init__(self, delta=1.0):
#         super().__init__()
#         self.delta = delta

#     def forward(self, vertices_pred, vertices_gt):
#         vertices_pred = vertices_pred[:, :3]
#         vertices_gt = vertices_gt.squeeze()[:, :3]
#         diff = (vertices_gt - vertices_pred).abs()
#         loss = torch.where(diff < self.delta, 0.5 * diff.pow(2), self.delta * (diff - 0.5 * self.delta))
#         return loss.mean()
# def cross_entropy_loss_func(edges_prob, edges_gt):
#     edges_gt_expanded = edges_gt.unsqueeze(-1).float()
#     loss_func = nn.BCEWithLogitsLoss()
#     loss = loss_func(edges_prob, edges_gt_expanded)
#     return loss
    
# class VisibleHuberLoss(nn.Module):
#     def __init__(self, delta=1.0):
#         super().__init__()
#         self.delta = delta

#     def forward(self, vertices_pred, vertices_gt):
#         print("Vertice_gt inside huber", vertices_gt)
#         visibility = vertices_gt[:, 3]  # extracting the visibility
#         vertices_pred = vertices_pred[:, :3]  # considering only x, y coordinates, confidence_score
#         vertices_gt = vertices_gt.squeeze()[:, :3]  # considering only x, y coordinates, confidence_score
#         print(f'vertices_pred shape: {vertices_pred.shape}')  # Debugging print
#         print(f'vertices_gt shape: {vertices_gt.shape}')  # Debugging print
#         diff = (vertices_gt - vertices_pred).abs()
#         loss = torch.where(diff < self.delta, 0.5 * diff.pow(2), self.delta * (diff - 0.5 * self.delta))
#         # Multiply by visibility
#         weighted_loss = visibility[:, None] * loss  # using None to keep dimensions consistent
#         return weighted_loss.mean()

class OccludedKeyPointLoss(nn.Module):
    def __init__(self, delta=1.0):
        super().__init__()
        self.delta = delta

    def forward(self, vertices_pred, vertices_gt):
#         vertices_gt = vertices_gt.squeeze()
        visibility = vertices_gt[:, 3].unsqueeze(1)  # Extracting the visibility
        vertices_pred = vertices_pred[:, :3]  # Considering only x, y coordinates, confidence_score
        vertices_gt = vertices_gt[:, :3]  # Considering only x, y coordinates, confidence_score

        # Compute differences
        diff = (vertices_gt - vertices_pred).abs()
        # Compute Huber loss
        huber_loss = torch.where(diff < self.delta, 0.5 * diff**2, self.delta * (diff - 0.5 * self.delta))

        # Weighted loss
#         weighted_loss = huber_loss * visibility

        return huber_loss.mean()
    
def visibility_loss (vertices_pred, vertices_gt):
    
    return func.cross_entropy(vertices_pred[:, 3], vertices_gt[:, 3])  # Loss based on visibility of keypoints

def edge_loss(edges_prob, edges_gt):
    # Expand edges_gt to match the shape of edges_prob
#     print(edges_prob.shape)
    edges_gt_expanded = torch.zeros(edges_prob.shape, dtype=torch.float32)
    
    for i in range(edges_gt.shape[0]):
        u, v = edges_gt[i]
#         print("u:", u)
#         print("v:", v)
#         print("edges_gt_expanded.shape:", edges_gt_expanded.shape)
        if i < edges_gt_expanded.shape[0]:
            if u < edges_gt_expanded.shape[1]:
                edges_gt_expanded[i, u] = 1
            if v < edges_gt_expanded.shape[1]:
                edges_gt_expanded[i, v] = 1
#         print('new u', u)
#         print('new v', v)
    # Compute the cross-entropy loss
    loss = -torch.sum(edges_gt_expanded.to(device) * torch.log(torch.clamp(edges_prob, min=1e-7)))
                      
    return loss



In [None]:
# class KeypointPipeline(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.keypoint_model = torch.load(weights_path).to(device)
#         self.keypoint_model.eval()  # Set the model to evaluation mode
#         self.keypoint_model = self.keypoint_model.to(device)
#         self.gnn_encoder = GNNEncoder()
#         self.gnn_decoder = GNNDecoder()
        
#     def forward(self, imgs):
#         outputs = []
#         for i in range(imgs.shape[0]):
#             img = imgs[i].unsqueeze(0).to(device)  # Unsqueeze the 0th dimension to make a batch of size 1
#             # Temporarily set the keypoint model to evaluation mode
#             keypoint_model_training = self.keypoint_model.training  # Save the current mode
#             self.keypoint_model.eval()
#             with torch.no_grad():
#                 output = self.keypoint_model(img)  # Keypoint model expects a list of images
                
#             # Set the keypoint model back to its previous mode
#             self.keypoint_model.train(keypoint_model_training)
                
#             img = (img[0].permute(1,2,0).detach().cpu().numpy() * 255).astype(np.uint8)
#             scores = output[0]['scores'].detach().cpu().numpy()
#             high_scores_idxs = np.where(scores > 0.7)[0].tolist() # Indexes of boxes with scores > 0.7
#             post_nms_idxs = torchvision.ops.nms(output[0]['boxes'][high_scores_idxs], \
#                 output[0]['scores'][high_scores_idxs], 0.3).cpu().numpy() # Indexes of boxes left after applying NMS (iou_threshold=0.3)

#             keypoints = []
#             key_points = []
#             for kps in output[0]['keypoints'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy():
#                 keypoints.append(list(map(int, kps[0,0:2])))
#                 key_points.append([list(map(int, kp[:2])) for kp in kps])

# #             print("keypoints", keypoints)

#             labels = []
#             for label in output[0]['labels'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy():
#                 labels.append(label)
#     #             labels.append('j' + str(int(label)))
    
#             print("keypoints", keypoints)

# #             print("labels", labels)

# #             keypoints_ = [x for _,x in sorted(zip(labels,keypoints))]
#     #         kp_label = [list(x) + [y] for (x, y) in sorted(zip(keypoints, labels))]

#             # Create a dictionary where the key is the label and the value is the keypoint
#             label_keypoint_dict = {lbl: kp for kp, lbl in zip(keypoints, labels)}

#             # Convert the dictionary back to a list and sort it by the label keys
#             labeled_keypoints = [value + [key] for key, value in sorted(label_keypoint_dict.items())] #,key=lambda item: int(item[0][1:]))]

# #             print("keypoints_", keypoints_)
#             print("kp_label", labeled_keypoints)
# # 
#             keypoints = torch.stack([torch.tensor(kp) for kp in labeled_keypoints]).float().to(device)
#             vertices, enc_e, edges = self.gnn_encoder(keypoints)
#             updated_vertices = self.gnn_decoder(vertices, enc_e, edges)
#             outputs.append((updated_vertices, enc_e, edges))

#         return outputs  # A list of tuples, each containing updated_vertices, enc_e, edges for an image in the batch



In [None]:
class KeypointPipeline(nn.Module):
    def __init__(self, weights_path, num_vertices):
        super().__init__()

        self.keypoint_model = torch.load(weights_path).to(device)
        self.num_vertices = num_vertices
        self.gnn_encoder = GNNEncoder()
        self.gnn_decoder = GNNDecoder()

    def process_model_output(self, output):
        scores = output[0]['scores'].detach().cpu().numpy()
        high_scores_idxs = np.where(scores > 0.7)[0].tolist()

        post_nms_idxs = torchvision.ops.nms(output[0]['boxes'][high_scores_idxs], 
                                            output[0]['scores'][high_scores_idxs], 0.3).cpu().numpy()

        confidence = output[0]['scores'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()
        labels = output[0]['labels'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()
        keypoints = []
        for idx, kps in enumerate(output[0]['keypoints'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()):
            # Setting t_i = 1 because label is found
            keypoints.append(list(map(int, kps[0,0:2])) + [confidence[idx]] + [1] + [labels[idx]])

        # Create a dictionary where the key is the label and the value is the keypoint
        label_to_keypoint = {}
        for keypoint in keypoints:
            label = keypoint[-1]
            if label not in label_to_keypoint or label_to_keypoint[label][-2] < keypoint[-2]:
                label_to_keypoint[label] = keypoint

        # Use a dictionary to keep track of all possible keypoints and their locations.
        # Initialize with placeholders for missing keypoints.
        all_keypoints = {i: [0, 0, 0, 0, i] for i in range(1, self.num_vertices+1)}  # added another 0 for t_i

        for label, keypoint in label_to_keypoint.items():
            all_keypoints[label] = keypoint

        # Convert the dictionary values back into a list
        keypoints = list(all_keypoints.values())
        keypoints = torch.stack([torch.tensor(kp) for kp in keypoints]).float().to(device)
        visibility = keypoints[:, 3].unsqueeze(1)  # Extracting the visibility
        keypoints_visible = keypoints * visibility  # Predicted visible vertices
        keypoints_occluded = keypoints * (1 - visibility)  # Predicted occluded vertices

        vertices, self.enc_e, self.edges = self.gnn_encoder(keypoints_visible)
        vertices_pred = self.gnn_decoder(vertices, self.enc_e, self.edges)
        vertices_pred_occluded = torch.cat((vertices_pred, keypoints_visible[:, 3].unsqueeze(1)), dim=1)
        nonzero_indices = keypoints_occluded.nonzero(as_tuple=True)
        if nonzero_indices[0].size()[0] > 0:  # Check if there are any non-zero elements
            keypoints_occluded[nonzero_indices] = vertices_pred_occluded[nonzero_indices]

            
        print(keypoints_visible + keypoints_occluded)
        return keypoints_visible + keypoints_occluded

    def process_image(self, img):
        img = img.unsqueeze(0).to(device)
        # Temporarily set the keypoint model to evaluation mode
        keypoint_model_training = self.keypoint_model.training  # Save the current mode
        self.keypoint_model.eval()
        with torch.no_grad():
            output = self.keypoint_model(img)
        # Set the keypoint model back to its previous mode
        self.keypoint_model.train(keypoint_model_training)
        img = (img[0].permute(1,2,0).detach().cpu().numpy() * 255).astype(np.uint8)
        labeled_keypoints = self.process_model_output(output)

        return labeled_keypoints

    def forward(self, imgs):
        outputs = []

        for i in range(imgs.shape[0]):
            labeled_keypoints = self.process_image(imgs[i])
            outputs.append(labeled_keypoints)
            
        print(outputs)

        return outputs
    

In [None]:
# class KeypointPipeline(nn.Module):
#     def __init__(self, num_keypoints, weights_path=None):
#         super().__init__()

#         # Instantiate your Keypoint R-CNN model
#         self.keypoint_model = get_model(num_keypoints, weights_path)
#         self.keypoint_model.to(device)

#         self.gnn_encoder = GNNEncoder()
#         self.gnn_decoder = GNNDecoder()

#     def process_model_output(self, output):
#         scores = output[0]['scores'].detach().cpu().numpy()
#         high_scores_idxs = np.where(scores > 0.7)[0].tolist()

#         post_nms_idxs = torchvision.ops.nms(output[0]['boxes'][high_scores_idxs], 
#                                             output[0]['scores'][high_scores_idxs], 0.3).cpu().numpy()

#         keypoints = []
#         for kps in output[0]['keypoints'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy():
#             keypoints.append(list(map(int, kps[0,0:2])))

#         labels = []
#         for label in output[0]['labels'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy():
#             labels.append(label)

#         label_keypoint_dict = {lbl: kp for kp, lbl in zip(keypoints, labels)}
#         labeled_keypoints = [value + [key] for key, value in sorted(label_keypoint_dict.items())]

#         return labeled_keypoints

#     def process_image(self, img):
#         img = img.unsqueeze(0).to(device)
#         # Temporarily set the keypoint model to evaluation mode
#         keypoint_model_training = self.keypoint_model.training  # Save the current mode
#         self.keypoint_model.eval()
#         with torch.no_grad():
#             output = self.keypoint_model(img)
#         print("Output", output)
#         # Set the keypoint model back to its previous mode
#         self.keypoint_model.train(keypoint_model_training)
#         img = (img[0].permute(1,2,0).detach().cpu().numpy() * 255).astype(np.uint8)
#         labeled_keypoints = self.process_model_output(output)

#         return labeled_keypoints

#     def forward(self, imgs):
#         outputs = []

#         for i in range(imgs.shape[0]):
#             labeled_keypoints = self.process_image(imgs[i])
#             keypoints = torch.stack([torch.tensor(kp) for kp in labeled_keypoints]).float().to(device)

#             vertices, enc_e, edges = self.gnn_encoder(keypoints)
#             updated_vertices = self.gnn_decoder(vertices, enc_e, edges)
#             outputs.append((updated_vertices, enc_e, edges))

#         return outputs

In [None]:
# Define the model
model = KeypointPipeline(weights_path, num_vertices=6)
model = model.to(device)

# Define the loss
criterion = OccludedKeyPointLoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 25  # Define your number of epochs
batch_size = 8

KEYPOINTS_FOLDER_TRAIN = train_test_split(root_dir) +"/train" #train_test_split(root_dir) +"/train"
KEYPOINTS_FOLDER_VAL = train_test_split(root_dir) +"/val"
KEYPOINTS_FOLDER_TEST = train_test_split(root_dir) +"/test"

dataset_train = KPDataset(KEYPOINTS_FOLDER_TRAIN, transform=None, demo=False)
dataset_val = KPDataset(KEYPOINTS_FOLDER_VAL, transform=None, demo=False)
dataset_test = KPDataset(KEYPOINTS_FOLDER_TEST, transform=None, demo=False)

data_loader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
data_loader_val = DataLoader(dataset_val, batch_size=1, shuffle=False, collate_fn=collate_fn)
data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, collate_fn=collate_fn)

v = 4

model.train()
for epoch in range(num_epochs):
    start_time = time.time()
    for i, batch in enumerate(data_loader_train):
        img_tuple, target_dict_tuple, img_files = batch
        print(f"Processing batch {i+1} with images:", img_files)
        
        imgs = [img.to(device) for img in img_tuple]  # Create list of images

        # Process each image individually
        losses = []
        for i in range(len(imgs)):
            img = imgs[i].unsqueeze(0)  # Unsqueeze to add batch dimension

            # Prepare ground truth vertices for the image
            keypoints = target_dict_tuple[i]['keypoints'].to(device)
            visibility = torch.ones((keypoints.shape[0], keypoints.shape[1], 1)).to(device)
            vertices_gt = torch.cat((keypoints, visibility), dim=2).unsqueeze(0)  # Unsqueeze to add batch dimension
            vertices_gt = vertices_gt.squeeze()

            # Forward pass
            output = model(img)
            vertices_pred = output[0]
            edges_prob = model.enc_e
            edges_gt = model.edges

            # Compute loss for the image
            huber_loss = criterion(vertices_pred, vertices_gt)
            ce_loss = edge_loss(edges_prob, edges_gt)
            vis_loss = visibility_loss(vertices_pred, vertices_gt)

            loss = huber_loss + ce_loss + vis_loss
            losses.append(loss)  # Store loss for the image

        # Average loss over all images in the batch
        loss = torch.mean(torch.stack(losses))

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

    end_time = time.time()
    epoch_time = end_time - start_time
    eta = epoch_time * (num_epochs - epoch - 1)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, ETA: {eta} seconds')

model_save_path = f"/home/jc-merlab/Pictures/Data/trained_models/keypointsrcnn_weights_occ_b{batch_size}_e{num_epochs}_v{v}.pth"

torch.save(model, model_save_path)
    
# Save the state dict of the model, not the entire model
# torch.save(model.state_dict(), model_save_path)
    
torch.save(model, model_save_path)


# model.train()
# # Epoch loop
# for epoch in range(num_epochs):
#     # For each batch in your training data
#     for batch in data_loader_train:
#         img_tuple, target_dict_tuple = batch
#         img = img_tuple[0]
# #         print(img.shape)
#         target = target_dict_tuple[0]
#         img = img.to(device)
#         vertices_gt = target['keypoints'].to(device)
#         num_vertices = vertices_gt.shape[0]
#         print(num_vertices)
#         vertices_gt[:, :, 2] = torch.arange(1, num_vertices+1).unsqueeze(1).to(device)

#         # Forward pass
#         vertices_pred, edges_prob, edges_gt = model(img)
        
#         # Compute the losses
#         trifocal_loss = criterion(vertices_pred, vertices_gt)
#         ce_loss = edge_loss(edges_prob, edges_gt)

#         # Combined loss
#         loss = trifocal_loss + ce_loss
# #         loss = trifocal_loss

#         # Backward pass and optimization
#         optimizer.zero_grad()
#         loss.backward()
        
#         optimizer.step()

#     # Print loss for each epoch
#     print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

In [None]:
def visualize_and_save(img, vertices, filename):
    print("type of image befor conversion",type(img))    
    print("type of vertices before conversion", type(vertices))
    print(img)
    img = (img.permute(1,2,0).cpu().numpy() * 255).astype(np.uint8)
#     img = (img * 255).astype(np.uint8)  # Convert back from [0, 1] range to [0, 255]
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    vertices = vertices.cpu().numpy()

    print(f"Image shape before saving: {img.shape}")  # print the image shape
    print("type of vertices", type(vertices))
#     print("entered vertices", vertices)
#     print("entered image", img)

    # Convert grayscale to BGR if necessary
    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        
    for i in range(vertices.shape[0]):
        img = cv2.circle(img, (int(vertices[i, 0]), int(vertices[i, 1])), radius=2, color=(0, 0, 255), thickness=-1)
        
    result = cv2.imwrite(filename, img)
    print(f"Image saved at {filename}: {result}")  # print if save was successful

    # If the image didn't save correctly, save the image data to a text file for examination
    if not result:
        with open(filename + ".txt", "w") as f:
            np.savetxt(f, img.flatten())

In [None]:
def test_and_save_model(model, data_loader_test):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    total_trifocal_loss = 0.0
    total_ce_loss = 0.0
    total_vis_loss = 0.0
    num_batches = 0

    # We don't need to track gradients during evaluation
    with torch.no_grad():
        for idx, batch in enumerate(data_loader_test):
            img_tuple, target_dict_tuple, img_file = batch

            total_batch_loss = 0.0
            total_batch_trifocal_loss = 0.0
            total_batch_ce_loss = 0.0
            total_batch_vis_loss = 0.0

            for i in range(len(img_tuple)):
                img = img_tuple[i].to(device)
                target = target_dict_tuple[i]

                # Prepare ground truth vertices for the image
                keypoints = target['keypoints'].to(device)
                visibility = torch.ones((keypoints.shape[0], keypoints.shape[1], 1)).to(device)
                vertices_gt = torch.cat((keypoints, visibility), dim=2).unsqueeze(0)  # Unsqueeze to add batch dimension
                vertices_gt = vertices_gt.squeeze()

                # Forward pass
                output = model(img.unsqueeze(0))
                vertices_pred = output[0]
                edges_prob = model.enc_e
                edges_gt = model.edges

                # Print the shapes for debugging
                print(f"img shape: {img.shape}, vertices_pred shape: {vertices_pred.shape}")

                # Compute the losses
                trifocal_loss = criterion(vertices_pred, vertices_gt)
                ce_loss = edge_loss(edges_prob, edges_gt)
                vis_loss = visibility_loss(vertices_pred, vertices_gt)

                # Combined loss
                loss = trifocal_loss + ce_loss + vis_loss

                total_batch_loss += loss.item()
                total_batch_trifocal_loss += trifocal_loss.item()
                total_batch_ce_loss += ce_loss.item()
                total_batch_vis_loss += vis_loss.item()

                # Visualize and save the prediction
                filename = f'/home/jc-merlab/Pictures/Data/occ_vis_data/image_{idx}_{i}.jpg'
                visualize_and_save(img, vertices_pred, filename)
                print(f"Image saved at {filename}")  # Print statement to confirm image save

            total_loss += total_batch_loss / len(img_tuple)
            total_trifocal_loss += total_batch_trifocal_loss / len(img_tuple)
            total_ce_loss += total_batch_ce_loss / len(img_tuple)
            num_batches += 1
    
    # Average the loss over all batches
    avg_loss = total_loss / num_batches
    avg_trifocal_loss = total_trifocal_loss / num_batches
    avg_ce_loss = total_ce_loss / num_batches
    
    print(f'Avg. Test Loss: {avg_loss}, Avg. Trifocal Loss: {avg_trifocal_loss}, Avg. Cross Entropy Loss: {avg_ce_loss}')
    return avg_loss, avg_trifocal_loss, avg_ce_loss

In [None]:
def test_model(model, data_loader_test):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    total_trifocal_loss = 0.0
    total_ce_loss = 0.0
    total_vis_loss = 0.0
    num_batches = 0
    
    all_vertices_pred = []  # List to store all predicted vertices
    
    # We don't need to track gradients during evaluation
    with torch.no_grad():
        for batch in data_loader_test:
            img_tuple, target_dict_tuple, img_file = batch
            
            total_batch_loss = 0.0
            total_batch_trifocal_loss = 0.0
            total_batch_ce_loss = 0.0
            

            for i in range(len(img_tuple)):
                img = img_tuple[i].to(device)
                target = target_dict_tuple[i]

                vertices_gt = target['keypoints'].to(device)
                num_vertices = vertices_gt.shape[0]
                vertices_gt[:, :, 2] = torch.arange(1, num_vertices+1).unsqueeze(1).to(device)
                

                # Forward pass
                output = model(img.unsqueeze(0))
                print("Output per img", output[0])
                vertices_pred, edges_prob, edges_gt = output[0]

                # Compute the losses
                trifocal_loss = criterion(vertices_pred, vertices_gt)
                ce_loss = edge_loss(vertices_pred, vertices_gt)
                vis_loss = 

                # Combined loss
                loss = trifocal_loss + ce_loss + vis_loss

                total_batch_loss += loss.item()
                total_batch_trifocal_loss += trifocal_loss.item()
                total_batch_ce_loss += ce_loss.item()

                # Save the predictions for this image
                all_vertices_pred.append(vertices_pred.cpu().numpy())
            
            total_loss += total_batch_loss / len(img_tuple)
            total_trifocal_loss += total_batch_trifocal_loss / len(img_tuple)
            total_ce_loss += total_batch_ce_loss / len(img_tuple)
            num_batches += 1
    
    # Average the loss over all batches
    avg_loss = total_loss / num_batches
    avg_trifocal_loss = total_trifocal_loss / num_batches
    avg_ce_loss = total_ce_loss / num_batches
    
    print(f'Avg. Test Loss: {avg_loss}, Avg. Trifocal Loss: {avg_trifocal_loss}, Avg. Cross Entropy Loss: {avg_ce_loss}, All Predicted Vertices: {all_vertices_pred}')
    return avg_loss, avg_trifocal_loss, avg_ce_loss, all_vertices_pred

In [None]:
# avg_loss, avg_trifocal_loss, avg_ce_loss, all_preds = test_and_save_model(model, data_loader_test)

avg_loss, avg_trifocal_loss, avg_ce_loss = test_and_save_model(model, data_loader_test)

In [None]:
def visualize_and_save(img, vertices, filename):    
    
    img = img.squeeze().cpu().numpy()
    img = (img * 255).astype(np.uint8)  # Convert back from [0, 1] range to [0, 255]
    vertices = vertices.cpu().numpy()

    # Convert grayscale to BGR if necessary
    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        
    for i in range(vertices.shape[0]):
        img = cv2.circle(img, (int(vertices[i, 0]), int(vertices[i, 1])), radius=2, color=(0, 0, 255), thickness=-1)
        
    cv2.imwrite(filename, img)

# Iterate over batches
for idx, batch in enumerate(data_loader_test):
    img_tuple, target_dict_tuple = batch

    # Only visualize the first image of each batch
    img = img_tuple[0].to(device)
    outputs = model([img])
    filename = f'image_with_vertices_batch_{idx}.jpg'  # unique filename for each batch
    visualize_and_save(img, outputs[0][0], filename)

In [None]:
import cv2
import os

# Directory containing images
dir_path = '/home/jc-merlab/Pictures/Data/occ_vis_data/'
images = []

# Ensure the images are sorted by name
for f in sorted(os.listdir(dir_path)):
    if f.endswith('.jpg') or f.endswith('.png'):  # Check for image file extension
        images.append(f)

# Determine the width and height from the first image
image_path = os.path.join(dir_path, images[0])
frame = cv2.imread(image_path)
cv2.imshow('video',frame)
height, width, channels = frame.shape

# Define the codec and create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Be sure to use the correct codec
video_filename = 'output.mp4'
video = cv2.VideoWriter(video_filename, fourcc, 3.0, (width, height))

for image in images:
    image_path = os.path.join(dir_path, image)
    frame = cv2.imread(image_path)
    video.write(frame)  # Write out frame to video

# Release everything when job is finished
video.release()
cv2.destroyAllWindows()

print("The output video is", video_filename)

In [None]:
weights_path = '/home/jc-merlab/Pictures/Data/trained_models/keypointsrcnn_weights_occ_b8_e25_v4.pth'

model = torch.load(weights_path).to(device)


image = Image.open("/home/jc-merlab/Pictures/Data/occluded_results_mi20_ma80_n2/occluded_000027.rgb.jpg")
print(type(image))

img = F.to_tensor(image).to(device)
img.unsqueeze_(0)
# print(image.shape)
# image = list(image)
# print(type(images))
# images = list(image.to(device) for image in images)

with torch.no_grad():
    model.to(device)
    model.eval()
    output = model(img)
    
keypoints = output[0]

print(keypoints)
plt.imshow(image)

# Assuming each keypoint is a tensor representing (x, y)
for i, keypoint in enumerate(keypoints):
    print(f'Key point {i}: {keypoint}')
    keypoint = keypoint.cpu().numpy()
    plt.plot(keypoint[0], keypoint[1], 'ro')
plt.show()

# Plotting the image

# plt.imshow(image)

# for keypoint in output[0]:
#     plt.plot(keypoint[0], keypoint[1], 'ro')

# plt.show()