In [None]:
import os
import time
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
from os import listdir
import pandas as pd
import numpy as np
import glob
import cv2
import json
from os.path import expanduser
import splitfolders
import shutil
from define_path import Def_Path

from tqdm import tqdm

from scipy.optimize import linear_sum_assignment

import torch 
import torchvision
from torchvision import models
from torchvision.models.detection.rpn import AnchorGenerator
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn 
import torchvision.transforms as T
from torchvision.transforms import functional as F
from torchsummary import summary
from sklearn.model_selection import train_test_split

import albumentations as A # Library for augmentations

import matplotlib.pyplot as plt 
from PIL import Image

import transforms, utils, engine, train
from utils import collate_fn
from engine import train_one_epoch, evaluate

t = torch.cuda.get_device_properties(0).total_memory
print(t)
torch.cuda.empty_cache()

r = torch.cuda.memory_reserved(0)
print(r)
a = torch.cuda.memory_allocated(0)
print(a)
# f = r-a  # free inside reserved

weights_path = '/home/jc-merlab/Pictures/Data/trained_models/keypointsrcnn_weights_sim_b1_e25_v0.pth'

In [None]:
# to generalize home directory. User can change their parent path without entering their home directory
path = Def_Path()

parent_path =  path.home + "/Pictures/" + "Data/"

# root_dir = parent_path + path.year + "-" + path.month + "-" + path.day + "/"
root_dir = parent_path + "occ_sim_dataset/"

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# torch.cuda.set_per_process_memory_fraction(0.9, 0)
print(device)

In [None]:
def train_transform():
    return A.Compose([
        A.Sequential([
            A.RandomRotate90(p=1), # Random rotation of an image by 90 degrees zero or more times
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.3, brightness_by_max=True, always_apply=False, p=1), # Random change of brightness & contrast
        ], p=1)
#         A.Resize(640, 480)  # Resize all images to be 640x480
    ],
    keypoint_params=A.KeypointParams(format='xy'), # More about keypoint formats used in albumentations library read at https://albumentations.ai/docs/getting_started/keypoints_augmentation/
    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bboxes_labels']) # Bboxes should have labels, read more at https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/
    )

In [None]:
def train_test_split(src_dir):
    dst_dir_img = src_dir + "images"
    dst_dir_anno = src_dir + "annotations"
    
    if os.path.exists(dst_dir_img) and os.path.exists(dst_dir_anno):
        print("folders exist")
    else:
        os.mkdir(dst_dir_img)
        os.mkdir(dst_dir_anno)
        
    for jpgfile in glob.iglob(os.path.join(src_dir, "*.jpg")):
        shutil.copy(jpgfile, dst_dir_img)

    for jsonfile in glob.iglob(os.path.join(src_dir, "*.json")):
        shutil.copy(jsonfile, dst_dir_anno)
        
    output = parent_path + "split_folder_output" + "-" + path.year + "-" + path.month + "-" + path.day 
    
    splitfolders.ratio(src_dir, # The location of dataset
                   output=output, # The output location
                   seed=42, # The number of seed
                   ratio=(.7, .2, .1), # The ratio of split dataset
                   group_prefix=None, # If your dataset contains more than one file like ".jpg", ".pdf", etc
                   move=False # If you choose to move, turn this into True
                   )
    
    shutil.rmtree(dst_dir_img)
    shutil.rmtree(dst_dir_anno)
    
    return output  
    

In [None]:
class KPDataset(Dataset):
    def __init__(self, root, transform=None, demo=False):                
        self.root = root
        self.transform = transform
        self.demo = demo # Use demo=True if you need transformed and original images (for example, for visualization purposes)
        self.imgs_files = sorted(os.listdir(os.path.join(root, "images")))
        self.annotations_files = sorted(os.listdir(os.path.join(root, "annotations")))
    
    def __getitem__(self, idx):
        img_file = self.imgs_files[idx]
        img_path = os.path.join(self.root, "images", self.imgs_files[idx])
        annotations_path = os.path.join(self.root, "annotations", self.annotations_files[idx])

        img_original = cv2.imread(img_path)
        img_original = cv2.cvtColor(img_original, cv2.COLOR_BGR2RGB)
        
        with open(annotations_path) as f:
            data = json.load(f)
            bboxes_original = data['bboxes']
            keypoints_original = data['keypoints']
            
            # All objects are keypoints on the robot
            bboxes_labels_original = [] 
            bboxes_labels_original.append('base_joint')
            bboxes_labels_original.append('joint2')
            bboxes_labels_original.append('joint3')
            bboxes_labels_original.append('joint4')
            bboxes_labels_original.append('joint5')
            bboxes_labels_original.append('joint6')  

        if self.transform:   
            # Converting keypoints from [x,y,visibility]-format to [x, y]-format + Flattening nested list of keypoints            
            # For example, if we have the following list of keypoints for three objects (each object has two keypoints):
            # [[obj1_kp1, obj1_kp2], [obj2_kp1, obj2_kp2], [obj3_kp1, obj3_kp2]], where each keypoint is in [x, y]-format            
            # Then we need to convert it to the following list:
            # [obj1_kp1, obj1_kp2, obj2_kp1, obj2_kp2, obj3_kp1, obj3_kp2]
            keypoints_original_flattened = [el[0:2] for kp in keypoints_original for el in kp]
            
            # Apply augmentations
            transformed = self.transform(image=img_original, bboxes=bboxes_original, bboxes_labels=bboxes_labels_original, keypoints=keypoints_original_flattened)
            img = transformed['image']
            bboxes = transformed['bboxes']
            # Unflattening list transformed['keypoints']
            # For example, if we have the following list of keypoints for three objects (each object has two keypoints):
            # [obj1_kp1, obj1_kp2, obj2_kp1, obj2_kp2, obj3_kp1, obj3_kp2], where each keypoint is in [x, y]-format
            # Then we need to convert it to the following list:
            # [[obj1_kp1, obj1_kp2], [obj2_kp1, obj2_kp2], [obj3_kp1, obj3_kp2]]
            keypoints_transformed_unflattened = np.reshape(np.array(transformed['keypoints']), (-1,1,2)).tolist()

            # Converting transformed keypoints from [x, y]-format to [x,y,visibility]-format by appending original visibilities to transformed coordinates of keypoints
            keypoints = []
            for o_idx, obj in enumerate(keypoints_transformed_unflattened):
#                 print("object", obj)
#                 print(" obj index", o_idx)# Iterating over objects
                obj_keypoints = []
                for k_idx, kp in enumerate(obj): # Iterating over keypoints in each object
                    obj_keypoints.append(kp + [keypoints_original[o_idx][k_idx][2]])
                keypoints.append(obj_keypoints)
        
        else:
            img, bboxes, keypoints = img_original, bboxes_original, keypoints_original        
        
        # Convert everything into a torch tensor        
        bboxes = torch.as_tensor(bboxes, dtype=torch.float32)       
        target = {}
        labels = [1, 2, 3, 4, 5, 6]            
        target["boxes"] = bboxes
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64) # all objects are joint positions
        target["image_id"] = torch.tensor([idx])
        target["area"] = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
        target["iscrowd"] = torch.zeros(len(bboxes), dtype=torch.int64)
        target["keypoints"] = torch.as_tensor(keypoints, dtype=torch.float32)
        img = F.to_tensor(img)        
        bboxes_original = torch.as_tensor(bboxes_original, dtype=torch.float32)
        target_original = {}
        target_original["boxes"] = bboxes_original
        target_original["labels"] = torch.as_tensor(labels, dtype=torch.int64) # all objects are glue tubes
        target_original["image_id"] = torch.tensor([idx])
        target_original["area"] = (bboxes_original[:, 3] - bboxes_original[:, 1]) * (bboxes_original[:, 2] - bboxes_original[:, 0])
        target_original["iscrowd"] = torch.zeros(len(bboxes_original), dtype=torch.int64)
        target_original["keypoints"] = torch.as_tensor(keypoints_original, dtype=torch.float32)        
        img_original = F.to_tensor(img_original)

        if self.demo:
            return img, target, img_original, target_original, img_file
        else:
            return img, target, img_file
    
    def __len__(self):
        return len(self.imgs_files)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as func
import networkx as nx
import torch_geometric.nn as pyg
from torch_geometric.data import Data

class GNNLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GNNLayer, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return func.relu(self.fc(x))

# Graph Encoder
class GraphEncoder(nn.Module):
    def __init__(self, node_feature_size):
        super(GraphEncoder, self).__init__()
        self.f_enc = GNNLayer(node_feature_size, 128)
        self.f_e1 = GNNLayer(256, 128)
        self.f_v = GNNLayer(128, 128)
        self.f_e2 = GNNLayer(256, 128)

    def forward(self, V):
        # Implementing the encoder part
        # Node to edge
        H1 = self.f_enc(V)
        H_edge1 = self.f_e1(torch.cat((H1, H1), dim=1))  # Example implementation
        # Edge to node
        H2 = self.f_v(H_edge1.sum(dim=0, keepdim=True))  # Summing for simplicity
        # Node to edge again
        H_edge2 = self.f_e2(torch.cat((H2, H2), dim=1))  # Example implementation

        # Softmax over edges to get occlusion statistics
        edge_predictions = func.softmax(H_edge2, dim=1)
        return edge_predictions

# Graph Decoder
class GraphDecoder(nn.Module):
    def __init__(self, node_feature_size):
        super(GraphDecoder, self).__init__()
        self.f_e_p = GNNLayer(node_feature_size * 2, 128)
        self.f_v = GNNLayer(128, node_feature_size)

    def forward(self, V, E):
        # Implementing the decoder part
        # Vertex to Edge
        H_edge = self.f_e_p(torch.cat((V, V), dim=1)) * E
        # Edge to Vertex
        mu_g = V + self.f_v(H_edge.sum(dim=0, keepdim=True))

        return mu_g



In [None]:
def edge_loss(pred_edges, gt_edges):
    """
    Compute the cross-entropy loss for edge predictions.

    Args:
    - pred_edges (Tensor): Predicted probabilities of edges being visible, 
                           shape [#edges, 2] where second column is visibility probability.
    - gt_edges (Tensor): Ground truth for edges, binary values (0 or 1), 
                         shape [#edges].

    Returns:
    - loss (Tensor): Computed cross-entropy loss.
    """
    
    gt_edges_tensor = torch.tensor(gt_edges, dtype=torch.float32).to(pred_edges.device)  # Convert gt_edges to tensor and move to the correct device

    # Extract the probabilities corresponding to the edges being visible.
    visible_prob = pred_edges[:, 1]
    
    # Compute the binary cross-entropy loss.
    loss = -torch.sum(gt_edges_tensor * torch.log(visible_prob + 1e-10))  # Adding a small value to avoid log(0)

    return loss



# def kp_loss(predictions, targets):
#     return torch.mean((predictions - targets) ** 2)

def compute_loss(pred_keypoints, gt_keypoints):
    # Assuming pred_keypoints and gt_keypoints are tensors of shape [num_keypoints, 3]
    # where the last dimension is (x, y, confidence)

    # Create a distance matrix between all predicted and ground truth keypoints
    distance_matrix = torch.cdist(pred_keypoints[:, :2], gt_keypoints[:, :2])

    # Match predicted keypoints with ground truth keypoints
    # .detach() is used to convert the tensor to a numpy array without requiring grad
    pred_idx, gt_idx = linear_sum_assignment(distance_matrix.detach().cpu().numpy())

    # Compute loss for matched keypoints
    matched_loss = func.mse_loss(pred_keypoints[pred_idx, :2], gt_keypoints[gt_idx, :2])

    return matched_loss


In [None]:
#ground truth edge creation
def create_gt_edges(keypoints, edges_definition):
    """
    Create ground truth edges based on keypoints visibility.

    Args:
    - keypoints: A NumPy array or PyTorch tensor of shape [#keypoints, 3] where each row represents a keypoint 
                 (x, y, visibility). Visibility is 1 if the keypoint is visible, else 0.
    - edges_definition (list of tuples): Each tuple contains the indices of the keypoints that 
                                         form an edge, e.g., (0, 1) for an edge between the first 
                                         and second keypoints.

    Returns:
    - gt_edges (np.array): Array of shape [#edges] where each element is 1 if the edge is visible, else 0.
    """
    # Convert PyTorch tensor to NumPy array if necessary
    if isinstance(keypoints, torch.Tensor):
        # Move tensor to CPU if it's on CUDA
        keypoints = keypoints.cpu().numpy()
    
    gt_edges = []
    for start_idx, end_idx in edges_definition:
        # An edge is visible if both its keypoints are visible.
        edge_visible = keypoints[start_idx, 2] and keypoints[end_idx, 2]
        gt_edges.append(edge_visible)

    return np.array(gt_edges)

In [None]:
class KeypointPipeline(nn.Module):
    def __init__(self, weights_path):
        super().__init__()

        self.keypoint_model = torch.load(weights_path).to(device)
        self.gnn_encoder = GraphEncoder(node_feature_size=4)
        self.gnn_decoder = GraphDecoder(node_feature_size=4)

    def process_model_output(self, output):
        scores = output[0]['scores'].detach().cpu().numpy()
        high_scores_idxs = np.where(scores > 0.7)[0].tolist()

        post_nms_idxs = torchvision.ops.nms(output[0]['boxes'][high_scores_idxs], 
                                            output[0]['scores'][high_scores_idxs], 0.3).cpu().numpy()

        confidence = output[0]['scores'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()
        labels = output[0]['labels'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()
        keypoints = []
        for idx, kps in enumerate(output[0]['keypoints'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()):
            keypoints.append(list(map(int, kps[0,0:2])) + [confidence[idx]] + [labels[idx]])
        
        # Sort keypoints based on label
        keypoints.sort(key=lambda x: x[-1])
        print("initial keypoints", keypoints)
        keypoints_tensor = torch.tensor(keypoints, dtype=torch.float32).to(device)
        self.enc_edges = self.gnn_encoder(keypoints_tensor)
        print("edges encoded", self.enc_edges)
        vertices_pred = self.gnn_decoder(keypoints_tensor, self.enc_edges)
        print("decoder keypoints", vertices_pred)
        return vertices_pred       
    

    def process_image(self, img):
        img = img.unsqueeze(0).to(device)
        # Temporarily set the keypoint model to evaluation mode
        keypoint_model_training = self.keypoint_model.training  # Save the current mode
        self.keypoint_model.eval()
        with torch.no_grad():
            output = self.keypoint_model(img)
        # Set the keypoint model back to its previous mode
        self.keypoint_model.train(keypoint_model_training)
        img = (img[0].permute(1,2,0).detach().cpu().numpy() * 255).astype(np.uint8)
        labeled_keypoints = self.process_model_output(output)

        return labeled_keypoints

    def forward(self, imgs):
        outputs = []

        for i in range(imgs.shape[0]):
            labeled_keypoints = self.process_image(imgs[i])
            outputs.append(labeled_keypoints)

        return outputs
    

In [None]:
# Define the model
model = KeypointPipeline(weights_path)
model = model.to(device)

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 25  # Define your number of epochs
batch_size = 8

KEYPOINTS_FOLDER_TRAIN = train_test_split(root_dir) +"/train" #train_test_split(root_dir) +"/train"
KEYPOINTS_FOLDER_VAL = train_test_split(root_dir) +"/val"
KEYPOINTS_FOLDER_TEST = train_test_split(root_dir) +"/test"

dataset_train = KPDataset(KEYPOINTS_FOLDER_TRAIN, transform=None, demo=False)
dataset_val = KPDataset(KEYPOINTS_FOLDER_VAL, transform=None, demo=False)
dataset_test = KPDataset(KEYPOINTS_FOLDER_TEST, transform=None, demo=False)

data_loader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
data_loader_val = DataLoader(dataset_val, batch_size=1, shuffle=False, collate_fn=collate_fn)
data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, collate_fn=collate_fn)

edges_def = [(0,1),(0,2),(0,3),(0,4),(0,5),(1,2),(1,3),(1,4),(1,5),(2,3),(2,4),(2,5),(3,4),(3,5),(4,5)]

v = 1

model.train()
for epoch in range(num_epochs):
    start_time = time.time()
    for i, batch in enumerate(data_loader_train):
        img_tuple, target_dict_tuple, img_files = batch
        print(f"Processing batch {i+1} with images:", img_files)
        
        imgs = [img.to(device) for img in img_tuple]  # Create list of images

        # Process each image individually
        losses = []
        for i in range(len(imgs)):
            img = imgs[i].unsqueeze(0)  # Unsqueeze to add batch dimension

            # Prepare ground truth vertices for the image
            gt_keypoints = target_dict_tuple[i]['keypoints'].to(device).squeeze()
            print(gt_keypoints.shape)

            # Forward pass
            output = model(img)
            pred_keypoints = output[0]
            
            print("predicted keypoints", pred_keypoints)
            
            edges_prob = model.enc_edges
            
            edges_gt = create_gt_edges(gt_keypoints,edges_def)

            # Compute loss for the image
            kp_loss = compute_loss(pred_keypoints, gt_keypoints)
            ce_loss = edge_loss(edges_prob, edges_gt)

            loss = kp_loss + ce_loss
            losses.append(loss)  # Store loss for the image
            
        # Average loss over all images in the batch
        total_loss = torch.mean(torch.stack(losses))

        # Backward pass and optimization
        optimizer.zero_grad()
        total_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

    end_time = time.time()
    epoch_time = end_time - start_time
    eta = epoch_time * (num_epochs - epoch - 1)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, ETA: {eta} seconds')

model_save_path = f"/home/jc-merlab/Pictures/Data/trained_models/keypointsrcnn_weights_occ_b{batch_size}_e{num_epochs}_v{v}.pth"

torch.save(model, model_save_path)
    
# Save the state dict of the model, not the entire model
# torch.save(model.state_dict(), model_save_path)
    
torch.save(model, model_save_path)



In [None]:
def visualize_and_save(img, vertices, filename):
    print("type of image befor conversion",type(img))    
    print("type of vertices before conversion", type(vertices))
    print(img)
    img = (img.permute(1,2,0).cpu().numpy() * 255).astype(np.uint8)
#     img = (img * 255).astype(np.uint8)  # Convert back from [0, 1] range to [0, 255]
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    vertices = vertices.cpu().numpy()

    print(f"Image shape before saving: {img.shape}")  # print the image shape
    print("type of vertices", type(vertices))
#     print("entered vertices", vertices)
#     print("entered image", img)

    # Convert grayscale to BGR if necessary
    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        
    for i in range(vertices.shape[0]):
        img = cv2.circle(img, (int(vertices[i, 0]), int(vertices[i, 1])), radius=2, color=(0, 0, 255), thickness=-1)
        
    result = cv2.imwrite(filename, img)
    print(f"Image saved at {filename}: {result}")  # print if save was successful

    # If the image didn't save correctly, save the image data to a text file for examination
    if not result:
        with open(filename + ".txt", "w") as f:
            np.savetxt(f, img.flatten())

In [None]:
def test_and_save_model(model, data_loader_test):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    total_trifocal_loss = 0.0
    total_ce_loss = 0.0
    total_vis_loss = 0.0
    num_batches = 0

    y_true_sequence = []
    y_pred_sequence = []

    # We don't need to track gradients during evaluation
    with torch.no_grad():
        for idx, batch in enumerate(data_loader_test):
            img_tuple, target_dict_tuple, img_files = batch

            total_batch_loss = 0.0
            total_batch_trifocal_loss = 0.0
            total_batch_ce_loss = 0.0
            total_batch_vis_loss = 0.0

            # Process each image individually
            for i in range(len(img_tuple)):
                img = img_tuple[i].to(device)
                target = target_dict_tuple[i]

                # Prepare ground truth vertices for the image
                keypoints = target['keypoints'].to(device)
                visibility = torch.ones((keypoints.shape[0], keypoints.shape[1], 1)).to(device)
                vertices_gt = torch.cat((keypoints, visibility), dim=2).unsqueeze(0)  # Unsqueeze to add batch dimension
                vertices_gt = vertices_gt.squeeze()
                y_true_sequence.append(vertices_gt)

                # Forward pass
                output = model(img.unsqueeze(0))
                vertices_pred = output[0]
                y_pred_sequence.append(vertices_pred)

                edges_prob = model.enc_e
                edges = model.edges.T
                edge_features = model.edge_features
                edges_gt = torch.cat((edges, edge_features), dim=1) 

                trifocal_loss = criterion(vertices_pred, vertices_gt)
                ce_loss = edge_loss(edges_prob, edges_gt)
                vis_loss = visibility_loss(vertices_pred, vertices_gt)
                loss = trifocal_loss + ce_loss + vis_loss

                total_batch_loss += loss.item()
                total_batch_trifocal_loss += trifocal_loss.item()
                total_batch_ce_loss += ce_loss.item()
                total_batch_vis_loss += vis_loss.item()

                # Visualize and save the prediction
                filename = f'/home/jc-merlab/Pictures/Data/occ_vis_data/image_{idx}_{i}.jpg'
                visualize_and_save(img, vertices_pred, filename)
                print(f"Image saved at {filename}")  # Print statement to confirm image save

            # Convert true and predicted sequences to tensors
            y_true_tensor = torch.stack(y_true_sequence)
            y_pred_tensor = torch.stack(y_pred_sequence)

            # Compute temporal consistency loss
            temporal_loss = temporal_consistency_loss(y_true_tensor, y_pred_tensor)

            total_loss += (total_batch_loss + temporal_loss) / len(img_tuple)
            total_trifocal_loss += total_batch_trifocal_loss / len(img_tuple)
            total_ce_loss += total_batch_ce_loss / len(img_tuple)
            num_batches += 1

            # Clear the sequences for the next batch
            y_true_sequence.clear()
            y_pred_sequence.clear()
    
    # Average the loss over all batches
    avg_loss = total_loss / num_batches
    avg_trifocal_loss = total_trifocal_loss / num_batches
    avg_ce_loss = total_ce_loss / num_batches
    
    print(f'Avg. Test Loss: {avg_loss}, Avg. Trifocal Loss: {avg_trifocal_loss}, Avg. Cross Entropy Loss: {avg_ce_loss}')
    return avg_loss, avg_trifocal_loss, avg_ce_loss

In [None]:
# avg_loss, avg_trifocal_loss, avg_ce_loss, all_preds = test_and_save_model(model, data_loader_test)

avg_loss, avg_trifocal_loss, avg_ce_loss = test_and_save_model(model, data_loader_test)

In [None]:
import cv2
import os

# Directory containing images
dir_path = '/home/jc-merlab/Pictures/Data/occ_vis_data/'
images = []

# Ensure the images are sorted by name
for f in sorted(os.listdir(dir_path)):
    if f.endswith('.jpg') or f.endswith('.png'):  # Check for image file extension
        images.append(f)

# Determine the width and height from the first image
image_path = os.path.join(dir_path, images[0])
frame = cv2.imread(image_path)
cv2.imshow('video',frame)
height, width, channels = frame.shape

# Define the codec and create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Be sure to use the correct codec
video_filename = 'output.mp4'
video = cv2.VideoWriter(video_filename, fourcc, 3.0, (width, height))

for image in images:
    image_path = os.path.join(dir_path, image)
    frame = cv2.imread(image_path)
    video.write(frame)  # Write out frame to video

# Release everything when job is finished
video.release()
cv2.destroyAllWindows()

print("The output video is", video_filename)

In [None]:
# model_path = '/home/jc-merlab/Pictures/Data/trained_models/keypointsrcnn_weights_occ_b16_e25_v1.pth'

# model = torch.load(model_path).to(device)


image = Image.open("/home/jc-merlab/Pictures/Data/occluded_results_mi20_ma80_n2/occluded_000041.rgb.jpg")
print(type(image))

img = F.to_tensor(image).to(device)
img.unsqueeze_(0)
# print(image.shape)
# image = list(image)
# print(type(images))
# images = list(image.to(device) for image in images)

with torch.no_grad():
    model.to(device)
    model.eval()
    output = model(img)
    
keypoints = output[0]

print(keypoints)
plt.imshow(image)

# Assuming each keypoint is a tensor representing (x, y)
for i, keypoint in enumerate(keypoints):
    print(f'Key point {i}: {keypoint}')
    keypoint = keypoint.cpu().numpy()
    plt.plot(keypoint[0], keypoint[1], 'ro')
plt.show()

# Plotting the image

# plt.imshow(image)

# for keypoint in output[0]:
#     plt.plot(keypoint[0], keypoint[1], 'ro')

# plt.show()