In [1]:
import os
import time
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
from os import listdir
import pandas as pd
import numpy as np
import glob
import cv2
import json
from os.path import expanduser
import splitfolders
import shutil
from define_path import Def_Path
from datetime import datetime

from tqdm import tqdm

import torch 
import torchvision
from torchvision import models
from torchvision.models.detection.rpn import AnchorGenerator
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn 
import torchvision.transforms as T
from torchvision.transforms import functional as F
from torchsummary import summary
from sklearn.model_selection import train_test_split

import albumentations as A # Library for augmentations

import matplotlib.pyplot as plt 
from PIL import Image

import transforms, utils, engine, train
from utils import collate_fn
from engine import train_one_epoch, evaluate

t = torch.cuda.get_device_properties(0).total_memory
print(t)
torch.cuda.empty_cache()

r = torch.cuda.memory_reserved(0)
print(r)
a = torch.cuda.memory_allocated(0)
print(a)
# f = r-a  # free inside reserved

weights_path = '/home/jc-merlab/Pictures/Data/trained_models/keypointsrcnn_weights_sim_b1_e25_v0.pth'

16899571712
0
0


In [2]:
# to generalize home directory. User can change their parent path without entering their home directory
path = Def_Path()

parent_path =  path.home + "/Pictures/" + "Data/"

# root_dir = parent_path + path.year + "-" + path.month + "-" + path.day + "/"
root_dir = parent_path + "occ_sim_dataset/"

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# torch.cuda.set_per_process_memory_fraction(0.9, 0)
print(device)

cuda


In [4]:
def train_transform():
    return A.Compose([
        A.Sequential([
            A.RandomRotate90(p=1), # Random rotation of an image by 90 degrees zero or more times
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.3, brightness_by_max=True, always_apply=False, p=1), # Random change of brightness & contrast
        ], p=1)
#         A.Resize(640, 480)  # Resize all images to be 640x480
    ],
    keypoint_params=A.KeypointParams(format='xy'), # More about keypoint formats used in albumentations library read at https://albumentations.ai/docs/getting_started/keypoints_augmentation/
    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bboxes_labels']) # Bboxes should have labels, read more at https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/
    )

In [5]:
def train_test_split(src_dir):
    dst_dir_img = src_dir + "images"
    dst_dir_anno = src_dir + "annotations"
    
    if os.path.exists(dst_dir_img) and os.path.exists(dst_dir_anno):
        print("folders exist")
    else:
        os.mkdir(dst_dir_img)
        os.mkdir(dst_dir_anno)
        
    for jpgfile in glob.iglob(os.path.join(src_dir, "*.jpg")):
        shutil.copy(jpgfile, dst_dir_img)

    for jsonfile in glob.iglob(os.path.join(src_dir, "*.json")):
        shutil.copy(jsonfile, dst_dir_anno)
        
    output = parent_path + "split_folder_output" + "-" + path.year + "-" + path.month + "-" + path.day 
    
    splitfolders.ratio(src_dir, # The location of dataset
                   output=output, # The output location
                   seed=42, # The number of seed
                   ratio=(0.95, 0.025, 0.025), # The ratio of split dataset
                   group_prefix=None, # If your dataset contains more than one file like ".jpg", ".pdf", etc
                   move=False # If you choose to move, turn this into True
                   )
    
    shutil.rmtree(dst_dir_img)
    shutil.rmtree(dst_dir_anno)
    
    return output  

In [6]:
class KPDataset(Dataset):
    def __init__(self, root, transform=None, demo=False):                
        self.root = root
        self.transform = transform
        self.demo = demo # Use demo=True if you need transformed and original images (for example, for visualization purposes)
        self.imgs_files = sorted(os.listdir(os.path.join(root, "images")))
        self.annotations_files = sorted(os.listdir(os.path.join(root, "annotations")))
    
    def __getitem__(self, idx):
        img_file = self.imgs_files[idx]
        img_path = os.path.join(self.root, "images", self.imgs_files[idx])
        annotations_path = os.path.join(self.root, "annotations", self.annotations_files[idx])

        img_original = cv2.imread(img_path)
        img_original = cv2.cvtColor(img_original, cv2.COLOR_BGR2RGB)
        
        with open(annotations_path) as f:
            data = json.load(f)
            bboxes_original = data['bboxes']
            keypoints_original = data['keypoints']
            
            # All objects are keypoints on the robot
            bboxes_labels_original = [] 
            bboxes_labels_original.append('base_joint')
            bboxes_labels_original.append('joint2')
            bboxes_labels_original.append('joint3')
            bboxes_labels_original.append('joint4')
            bboxes_labels_original.append('joint5')
            bboxes_labels_original.append('joint6')  

        if self.transform:   
            # Converting keypoints from [x,y,visibility]-format to [x, y]-format + Flattening nested list of keypoints            
            # For example, if we have the following list of keypoints for three objects (each object has two keypoints):
            # [[obj1_kp1, obj1_kp2], [obj2_kp1, obj2_kp2], [obj3_kp1, obj3_kp2]], where each keypoint is in [x, y]-format            
            # Then we need to convert it to the following list:
            # [obj1_kp1, obj1_kp2, obj2_kp1, obj2_kp2, obj3_kp1, obj3_kp2]
            keypoints_original_flattened = [el[0:2] for kp in keypoints_original for el in kp]
            
            # Apply augmentations
            transformed = self.transform(image=img_original, bboxes=bboxes_original, bboxes_labels=bboxes_labels_original, keypoints=keypoints_original_flattened)
            img = transformed['image']
            bboxes = transformed['bboxes']
            # Unflattening list transformed['keypoints']
            # For example, if we have the following list of keypoints for three objects (each object has two keypoints):
            # [obj1_kp1, obj1_kp2, obj2_kp1, obj2_kp2, obj3_kp1, obj3_kp2], where each keypoint is in [x, y]-format
            # Then we need to convert it to the following list:
            # [[obj1_kp1, obj1_kp2], [obj2_kp1, obj2_kp2], [obj3_kp1, obj3_kp2]]
            keypoints_transformed_unflattened = np.reshape(np.array(transformed['keypoints']), (-1,1,2)).tolist()

            # Converting transformed keypoints from [x, y]-format to [x,y,visibility]-format by appending original visibilities to transformed coordinates of keypoints
            keypoints = []
            for o_idx, obj in enumerate(keypoints_transformed_unflattened):
#                 print("object", obj)
#                 print(" obj index", o_idx)# Iterating over objects
                obj_keypoints = []
                for k_idx, kp in enumerate(obj): # Iterating over keypoints in each object
                    obj_keypoints.append(kp + [keypoints_original[o_idx][k_idx][2]])
                keypoints.append(obj_keypoints)
        
        else:
            img, bboxes, keypoints = img_original, bboxes_original, keypoints_original        
        
        # Convert everything into a torch tensor        
        bboxes = torch.as_tensor(bboxes, dtype=torch.float32)       
        target = {}
        labels = [1, 2, 3, 4, 5, 6]            
        target["boxes"] = bboxes
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64) # all objects are joint positions
        target["image_id"] = torch.tensor([idx])
        target["area"] = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
        target["iscrowd"] = torch.zeros(len(bboxes), dtype=torch.int64)
        target["keypoints"] = torch.as_tensor(keypoints, dtype=torch.float32)
        img = F.to_tensor(img)        
        bboxes_original = torch.as_tensor(bboxes_original, dtype=torch.float32)
        target_original = {}
        target_original["boxes"] = bboxes_original
        target_original["labels"] = torch.as_tensor(labels, dtype=torch.int64) # all objects are glue tubes
        target_original["image_id"] = torch.tensor([idx])
        target_original["area"] = (bboxes_original[:, 3] - bboxes_original[:, 1]) * (bboxes_original[:, 2] - bboxes_original[:, 0])
        target_original["iscrowd"] = torch.zeros(len(bboxes_original), dtype=torch.int64)
        target_original["keypoints"] = torch.as_tensor(keypoints_original, dtype=torch.float32)        
        img_original = F.to_tensor(img_original)

        if self.demo:
            return img, target, img_original, target_original, img_file
        else:
            return img, target, img_file
    
    def __len__(self):
        return len(self.imgs_files)
    

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as func
import math
import numpy as np
from torch.autograd import Variable
import torch_geometric.nn as pyg
from torch_geometric.data import Data

_EPS = 1e-10

class SelfAttention(nn.Module):
    def __init__(self, hidden_dim):
        super(SelfAttention, self).__init__()
        self.projection = nn.Sequential(
            nn.Linear(hidden_dim, 64),
            nn.ReLU(True),
            nn.Linear(64, hidden_dim)
        )

    def forward(self, x):
        # x: (batch_size, seq_len, hidden_dim)
        energy = self.projection(x)  # (batch_size, seq_len, 1)
        weights = func.softmax(energy, dim=-1)  # (batch_size, seq_len)
        outputs = (x * weights)  # (batch_size, hidden_dim)
        return outputs, weights

class AttentionMLP(nn.Module):
    def __init__(self, n_in, n_hid, n_out, do_prob=0.):
        super(AttentionMLP, self).__init__()
        self.fc1 = nn.Linear(n_in, n_hid)
        self.attn = SelfAttention(n_hid)
        self.fc2 = nn.Linear(n_hid, n_out)
        self.bn = nn.BatchNorm1d(n_out)
        self.dropout_prob = do_prob
        
    def batch_norm(self, inputs):
        x = inputs.view(inputs.size(0) * inputs.size(1), -1)
        x = self.bn(x)
        return x.view(inputs.size(0), inputs.size(1), -1)

    def forward(self, inputs):
        x = func.elu(self.fc1(inputs))
        print("shape of x before attention", x.shape)
        x, _ = self.attn(x)
        print("shape of x after attn", x.shape)
        x = func.dropout(x, self.dropout_prob, training=self.training)
        x = func.elu(self.fc2(x))
        print("batch norm input", x.shape)
        return self.batch_norm(x)

class GraphEncoder(nn.Module):
    def __init__(self, n_in, n_hid, n_out=4, do_prob=0., factor=True):
        super(GraphEncoder, self).__init__()

        self.factor = factor

        self.mlp1 = AttentionMLP(n_in, n_hid, n_hid, do_prob)
        self.mlp2 = AttentionMLP(n_hid * 2, n_hid, n_hid, do_prob)
        self.mlp3 = AttentionMLP(n_hid, n_hid, n_hid, do_prob)
        if self.factor:
            self.mlp4 = AttentionMLP(n_hid * 3, n_hid, n_hid, do_prob)
            print("Using factor graph MLP encoder.")
        else:
            self.mlp4 = AttentionMLP(n_hid * 2, n_hid, n_hid, do_prob)
#             print("mlp4", self.mlp4)
            print("Using MLP graph encoder.")
        self.fc_out = nn.Linear(n_hid, n_out)
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight.data)
                m.bias.data.fill_(0.1)

    def edge2node(self, x, rel_rec, rel_send):
        # NOTE: Assumes that we have the same graph across all samples.
        incoming = torch.matmul(rel_rec.t(), x)
#         print("Nodes in edge2node: ", incoming)
#         print("final incoming: ", incoming / incoming.size(1))
        return incoming / incoming.size(1)

    def node2edge(self, x, rel_rec, rel_send):
        # NOTE: Assumes that we have the same graph across all samples.
        receivers = torch.matmul(rel_rec, x)
        senders = torch.matmul(rel_send, x)
        edges = torch.cat([receivers, senders], dim=2)
#         print("Edges in node2edge: ", edges)
        return edges

    def forward(self, inputs, rel_rec, rel_send):
        # Input shape: [num_sims, num_atoms, num_timesteps, num_dims]
        x = inputs.view(inputs.size(0), inputs.size(1), -1)
#         print("x shape:", x.shape)
#         print("rel_rec shape:", rel_rec.shape)
#         print("rel_send shape:", rel_send.shape)

        # New shape: [num_sims, num_atoms, num_timesteps*num_dims]
        x = self.mlp1(x)  # 2-layer ELU net per node

        x = self.node2edge(x, rel_rec, rel_send)
        x = self.mlp2(x)
        x_skip = x    
        
        if self.factor:
            x = self.edge2node(x, rel_rec, rel_send)
            x = self.mlp3(x)
            x = self.node2edge(x, rel_rec, rel_send)
            x = torch.cat((x, x_skip), dim=2)  # Skip connection
            x = self.mlp4(x)
        else:
            x = self.mlp3(x)
            x = torch.cat((x, x_skip), dim=2)  # Skip connection
            x = self.mlp4(x)
            
#         print("output of graph encoder: ", self.fc_out(x))

        return self.fc_out(x)    
    
class GraphDecoder(nn.Module):

    def __init__(self, n_in_node, edge_types, msg_hid, msg_out, n_hid,
                 do_prob=0., skip_first=False):
        super(GraphDecoder, self).__init__()
        self.msg_fc1 = nn.ModuleList(
            [nn.Linear(2 * n_in_node, msg_hid) for _ in range(edge_types)])
        self.msg_fc2 = nn.ModuleList(
            [nn.Linear(msg_hid, msg_out) for _ in range(edge_types)])
        self.msg_out_shape = msg_out
        self.skip_first_edge_type = skip_first

        self.out_fc1 = nn.Linear(n_in_node + msg_out, n_hid)
        self.out_fc2 = nn.Linear(n_hid, n_hid)
        self.out_fc3 = nn.Linear(n_hid, n_in_node)

        print('Using learned graph decoder.')

        self.dropout_prob = do_prob

    def single_step_forward(self, single_timestep_inputs, rel_rec, rel_send,
                            single_timestep_rel_type):

        # single_timestep_inputs has shape
        # [batch_size, num_timesteps, num_atoms, num_dims]

        # single_timestep_rel_type has shape:
        # [batch_size, num_timesteps, num_atoms*(num_atoms-1), num_edge_types]

        # Node2edge
        receivers = torch.matmul(rel_rec, single_timestep_inputs)
        senders = torch.matmul(rel_send, single_timestep_inputs)
        pre_msg = torch.cat([receivers, senders], dim=-1)

        all_msgs = Variable(torch.zeros(pre_msg.size(0), pre_msg.size(1),self.msg_out_shape))
        if single_timestep_inputs.is_cuda:
            all_msgs = all_msgs.cuda()

        if self.skip_first_edge_type:
            start_idx = 1
        else:
            start_idx = 0

        # Run separate MLP for every edge type
        # NOTE: To exlude one edge type, simply offset range by 1
        for i in range(start_idx, len(self.msg_fc2)):
            msg = func.relu(self.msg_fc1[i](pre_msg))
            msg = func.dropout(msg, p=self.dropout_prob)
            msg = func.relu(self.msg_fc2[i](msg))
            msg = msg * single_timestep_rel_type[:, :, i:i + 1]
            all_msgs += msg

        # Aggregate all msgs to receiver
        agg_msgs = all_msgs.transpose(-2, -1).matmul(rel_rec).transpose(-2, -1)
        agg_msgs = agg_msgs.contiguous()

        # Skip connection
        aug_inputs = torch.cat([single_timestep_inputs, agg_msgs], dim=-1)

        # Output MLP
        pred = func.dropout(func.relu(self.out_fc1(aug_inputs)), p=self.dropout_prob)
        pred = func.dropout(func.relu(self.out_fc2(pred)), p=self.dropout_prob)
        pred = self.out_fc3(pred)
#        print(pred.shape,single_timestep_inputs.shape)
#         print("output for single time steps fwd: ", single_timestep_inputs, pred, single_timestep_inputs + pred)
        # Predict position/velocity difference
        return single_timestep_inputs + pred

    def forward(self, inputs, rel_type, rel_rec, rel_send, pred_steps=4):
        # NOTE: Assumes that we have the same graph across all samples.


        # Only take n-th timesteps as starting points (n: pred_steps)
        last_pred = inputs[:, :, :]
        #asa
        curr_rel_type = rel_type[:, :, :]
        preds=[]
        #print(curr_rel_type.shape)
        # NOTE: Assumes rel_type is constant (i.e. same across all time steps).

        # Run n prediction steps
        for step in range(0, pred_steps):
            last_pred = self.single_step_forward(last_pred, rel_rec, rel_send,
                                                 curr_rel_type)
            preds.append(last_pred)

        sizes = [preds[0].size(0), preds[0].size(1),
                 preds[0].size(2)]

        output = Variable(torch.zeros(sizes))
        if inputs.is_cuda:
            output = output.cuda()

        # Re-assemble correct timeline
        for i in range(len(preds)):
            output[:, :, :] = preds[i]

        pred_all = output[:, :, :]

        # NOTE: We potentially over-predicted (stored in future_pred). Unused.
        # future_pred = output[:, (inputs.size(1) - 1):, :, :]
#         print("output for pred_all", pred_all)
        return pred_all#.transpose(1, 2).contiguous()    


In [8]:
def my_softmax(input, axis=1):
    trans_input = input.transpose(axis, 0).contiguous()
    soft_max_1d = func.softmax(trans_input,dim=0)
    return soft_max_1d.transpose(axis, 0)


In [9]:
class KeypointPipeline(nn.Module):
    def __init__(self, weights_path):
        super(KeypointPipeline, self).__init__()  
        self.keypoint_model = torch.load(weights_path).to(device)
        self.encoder = GraphEncoder(4,512,4,0.5,True)
        self.decoder = GraphDecoder(n_in_node=4,
                                 edge_types=2,
                                 msg_hid=512,
                                 msg_out=512,
                                 n_hid=512,
                                 do_prob=0.5,
                                 skip_first=False)
        
#         self.off_diag = np.ones([6,6]) - np.eye(6)

#         self.rel_rec = np.array(encode_onehot(np.where(self.off_diag)[1]), dtype=np.float32)
#         self.rel_send = np.array(encode_onehot(np.where(self.off_diag)[0]), dtype=np.float32)
#         self.rel_rec = torch.FloatTensor(self.rel_rec)
#         self.rel_send = torch.FloatTensor(self.rel_send)

        num_nodes = 6
        self.off_diag = np.zeros([num_nodes, num_nodes])        
#         # Creating a cycle: 1->2, 2->3, ..., 6->1
#         for i in range(num_nodes):
#             self.off_diag[i, (i + 1) % num_nodes] = 1

        # Creating a bidirectional cycle
#         for i in range(num_nodes):
#             # Forward connection: i -> (i + 1) % num_nodes
#             self.off_diag[i, (i + 1) % num_nodes] = 1

#             # Backward connection: i -> (i - 1 + num_nodes) % num_nodes
#             # The addition of num_nodes before modulo ensures a positive index
#             self.off_diag[i, (i - 1 + num_nodes) % num_nodes] = 1

        # Creating a bidirectional, non-cyclic graph
        for i in range(num_nodes):
            # Forward connection: i -> (i + 1), except for the last node
            if i < num_nodes - 1:  # This prevents connecting the last node to the first
                self.off_diag[i, i + 1] = 1

            # Backward connection: i -> (i - 1), except for the first node
            if i > 0:  # This prevents connecting the first node to the last
                self.off_diag[i, i - 1] = 1

        # Update rel_rec and rel_send based on the new off_diag
        self.rel_rec = np.array(encode_onehot(np.where(self.off_diag)[1]), dtype=np.float32)
        self.rel_send = np.array(encode_onehot(np.where(self.off_diag)[0]), dtype=np.float32)
        self.rel_rec = torch.FloatTensor(self.rel_rec).to(device)
        self.rel_send = torch.FloatTensor(self.rel_send).to(device)

        self.encoder= self.encoder.cuda()
        self.decoder = self.decoder.cuda()
        self.rel_rec = self.rel_rec.cuda()
        self.rel_send = self.rel_send.cuda()
    
    def process_model_output(self, output):
        scores = output[0]['scores'].detach().cpu().numpy()
        high_scores_idxs = np.where(scores > 0.7)[0].tolist()

        post_nms_idxs = torchvision.ops.nms(output[0]['boxes'][high_scores_idxs], 
                                            output[0]['scores'][high_scores_idxs], 0.3).cpu().numpy()

        confidence = output[0]['scores'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()
        labels = output[0]['labels'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()
        keypoints = []
        for idx, kps in enumerate(output[0]['keypoints'][high_scores_idxs][post_nms_idxs].detach().cpu().numpy()):
            keypoints.append(list(map(int, kps[0,0:2])) + [confidence[idx]] + [labels[idx]])
        
        # Sort keypoints based on label
        keypoints.sort(key=lambda x: x[-1])
        print("Keypoints: ", keypoints)
        return keypoints
    
    def keypoints_to_graph(self, keypoints, image_width, image_height):
        # keypoints is expected to be a tensor with shape (num_keypoints, 4),
        # where each keypoint is (x, y, score, label).
        # Convert all elements in keypoints to tensors if they are not already
        keypoints = [torch.tensor(kp, dtype=torch.float32).to(device) if not isinstance(kp, torch.Tensor) else kp for kp in keypoints]

        # Then stack them
        keypoints = torch.stack(keypoints).to(device)        
        
        # Remove duplicates: Only keep the keypoint with the highest score for each label
        unique_labels, best_keypoint_indices = torch.unique(keypoints[:, 3], return_inverse=True)
        best_scores, best_indices = torch.max(keypoints[:, 2].unsqueeze(0) * (best_keypoint_indices == torch.arange(len(unique_labels)).unsqueeze(1).cuda()), dim=1)
        keypoints = keypoints[best_indices]
        
#         print("init keypoints in graph features", keypoints)

        # Normalize x and y to be in the range [-1, 1]
        keypoints[:, 0] = (keypoints[:, 0] - image_width / 2) / (image_width / 2)
        keypoints[:, 1] = (keypoints[:, 1] - image_height / 2) / (image_height / 2)

        # Use only x, y, and score for the graph features
        graph_features = keypoints[:, :4]  # Now shape is (num_keypoints, 3)
        
        # Ensure the shape is [num_keypoints, 3] before returning
        graph_features = graph_features.view(-1, 4)  # Reshape to ensure it's [num_keypoints, 3]
#         print("graph features", graph_features)
#         print("graph features shape", graph_features.shape)
        print("graph feature: ", graph_features)

        return graph_features
        
    def forward(self, imgs):
        # Temporarily set the keypoint model to evaluation mode
        keypoint_model_training = self.keypoint_model.training
        self.keypoint_model.eval()

        # Process each image in the batch
        with torch.no_grad():
            batch_outputs = [self.keypoint_model(img.unsqueeze(0).to(device)) for img in imgs]

        # Set the keypoint model back to its original training mode
        self.keypoint_model.train(mode=keypoint_model_training)

        # Process model outputs to get labeled keypoints
        batch_labeled_keypoints = [self.process_model_output(output) for output in batch_outputs]
        # Generate graph input tensor for each image and handle varying number of keypoints
        batch_x = []
        for labeled_keypoints in batch_labeled_keypoints:
            keypoints = self.keypoints_to_graph(labeled_keypoints, 640, 480)

            # Initialize x with zeros for 6 nodes with 4 features each
            x = torch.zeros(1, 6, 4, device=device)

            # Ensure that keypoints are on the correct device and fill in x
            num_keypoints_detected = keypoints.size(0)
            if num_keypoints_detected <= 6:
                x[0, :num_keypoints_detected, :] = keypoints
            else:
                raise ValueError("Number of keypoints detected exceeds the maximum of 6.")

            batch_x.append(x)

        # Stack the batch of x tensors for batch processing
        batch_x = torch.cat(batch_x, dim=0)

        # Forward pass through the encoder and decoder
        logits = self.encoder(batch_x, self.rel_rec, self.rel_send)
        edges = my_softmax(logits, -1)
        KGNN2D = self.decoder(batch_x, edges, self.rel_rec, self.rel_send)

        return logits, KGNN2D, batch_labeled_keypoints

In [10]:
def loss_edges(valid_points, edges):
#     num_nodes = 6
    print("valid_points: ", valid_points)
    batch_size, num_nodes = valid_points.shape
    print(batch_size, num_nodes)
    off_diag = np.zeros([num_nodes, num_nodes])    
    print("valid_points shape: ", valid_points.shape)
    print("off_diag shape: ", off_diag.shape)

    # Creating a bidirectional, cyclic graph
#     for i in range(num_nodes):
#         next_node = (i + 1) % num_nodes  # Ensures cyclic behavior
#         off_diag[i, next_node] = 1  # Connect node i to node i+1 (or to the first node if i is the last node)
#         off_diag[next_node, i] = 1  # Connect node i+1 (or the first node if i is the last node) back to node i

#     Creating a bidirectional, non-cyclic graph
    for i in range(num_nodes):
        # Forward connection: i -> (i + 1), except for the last node
        if i < num_nodes - 1:  # This prevents connecting the last node to the first
            off_diag[i, i + 1] = 1

        # Backward connection: i -> (i - 1), except for the first node
        if i > 0:  # This prevents connecting the first node to the last
            off_diag[i, i - 1] = 1   
        
#     print("off_diag original: ", off_diag)
        
    # Convert off_diag to tensor for indexing
    off_diag = torch.tensor(off_diag, dtype=torch.bool, device='cuda')
    
#     print("off_diag: ", off_diag.shape, off_diag)
    # Extracting indices where there is a relationship
    idx = torch.where(off_diag)[1].reshape((num_nodes-1), -1).to(device='cuda', dtype=torch.long)
#     print(torch.where(off_diag)[1])
#     print("idx: ", idx)# Adjust relations calculation to accommodate batches and node relationships
    relations = torch.zeros((batch_size, (num_nodes-1) * 2), device='cuda')
#     print("relations: ", relations)
    
    for count, vis in enumerate(valid_points):
        vis = vis.view(-1, 1).float()
#         print("vis: ", vis)
        vis_tran_mat = vis*vis.t() # Matrix multiplication to get visibility matrix
#         print("vis after vis*vis.t(): ", vis_tran_mat)
        vis_selected = torch.gather(vis_tran_mat, 1, idx)  # Gather visible relations based on the bidirectional cyclic graph
#         print("vis after gather: ", vis_selected)
#         print("vis with flatten: ", vis_selected.view(-1))
        relations[count] = vis_selected.view(-1)  # Flatten and assign
#     print("relations", relations.shape)
    relations = relations.to(torch.long)  # Ensure correct dtype for loss calculation
    # Calculate and return cross-entropy loss
#     print("edges shape: ", edges.shape, edges)
    print("relations reshaped", relations.view(-1).shape)
    print("edges reshaped", edges.view(-1,4).shape)
    loss_edges = func.cross_entropy(edges.view(-1, 4), relations.view(-1))
    return loss_edges



In [11]:
# def nll_gaussian(preds, target, variance, add_const=False):
#     neg_log_p = ((preds - target) ** 2 / (2 * variance))
#     if add_const:
#         const = 0.5 * np.log(2 * np.pi * variance)
#         neg_log_p += const
#     return neg_log_p.sum() / (target.size(0) * target.size(1))

# def kgnn2d_loss(keypoints_gt, valid_points, keypoints_logits):
#     # Ensure data types are consistent and move tensors to the appropriate device
#     keypoints_gt = keypoints_gt.type(torch.FloatTensor).cuda()
#     keypoints_logits = keypoints_logits.type(torch.FloatTensor).cuda()
#     valid_points = valid_points.type(torch.FloatTensor).cuda()

#     # Print shapes for debugging
# #     print(f"keypoints_gt.shape: {keypoints_gt.shape}")
# #     print(f"keypoints_logits.shape: {keypoints_logits.shape}")
# #     print(f"valid_points.shape: {valid_points.shape}")
#     keypoints_gt = keypoints_gt.type(torch.FloatTensor)*valid_points.unsqueeze(2).type(torch.FloatTensor)
#     keypoints_logits = keypoints_logits.type(torch.FloatTensor)*valid_points.unsqueeze(2).type(torch.FloatTensor)
#     keypoints_gt = keypoints_gt.cuda()
#     keypoints_logits = keypoints_logits.cuda()
#     loss_occ = nll_gaussian(keypoints_gt[:,:,0:2], keypoints_logits[:,:,0:2] , 0.1)
#     return loss_occ

In [12]:
def kgnn2d_loss(gt_keypoints, pred_keypoints):
    loss = func.mse_loss(pred_keypoints, gt_keypoints)

    return loss

In [13]:
def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot
# def process_keypoints(keypoints):
#     # Assuming keypoints is a list of Nx3 tensors where N is the number of keypoints
#     # and each keypoint is represented as [x, y, visibility]
#     # Remove the unnecessary middle dimension
#     keypoints = [kp.squeeze(1) for kp in keypoints]
#     visibilities = [kp[:, 2] for kp in keypoints]  # Extract visibility flags
#     valid_vis_all = torch.cat([v == 1 for v in visibilities]).long().cuda()
#     valid_invis_all = torch.cat([v == 0 for v in visibilities]).long().cuda()

#     keypoints_gt = torch.cat([kp[:, :2] for kp in keypoints]).float().cuda()  # Gather all keypoints and discard visibility flags
#     keypoints_gt = keypoints_gt.view(-1, 2).unsqueeze(0)  # Add an extra dimension to match expected shape for loss_edges

#     return keypoints_gt, valid_vis_all, valid_invis_all

# def process_batch_keypoints(batch_keypoints):
#     # Assuming batch_keypoints is a batch of keypoints tensors
#     # Each tensor in the batch has shape [N, 3] where N is the number of keypoints
#     # and each keypoint is represented as [x, y, visibility]

#     # Concatenate all keypoints and visibilities from the batch
#     all_keypoints = torch.cat([kp for kp in batch_keypoints])
#     visibilities = all_keypoints[:, 2]  # Extract visibility flags

#     valid_vis_all = (visibilities == 1).long().cuda()
#     valid_invis_all = (visibilities == 0).long().cuda()

#     keypoints_gt = all_keypoints[:, :2].float().cuda()  # Discard visibility flags
#     keypoints_gt = keypoints_gt.view(-1, 2)  # Reshape for consistency

#     return keypoints_gt, valid_vis_all, valid_invis_all

def process_batch_keypoints(target_dicts):
    # This function now expects target_dicts, a list of dictionaries containing keypoints information
    batch_size = len(target_dicts)
#     print(batch_size)

    # Initialize lists to store keypoints and visibilities for each image in the batch
    keypoints_list = []
    visibilities_list = []

    for dict_ in target_dicts:
        # Each keypoints tensor in the dict is expected to have a shape [num_keypoints, 3]
        keypoints = dict_['keypoints'].squeeze(1).to(device)
        print(f"Original shape of keypoints in dict: {keypoints.shape}")

        # Extract x, y coordinates and visibility flags
        xy_coords = keypoints[:, :2]  # Keep only x, y coordinates
        visibilities = keypoints[:, 2]  # Extract visibility flags

        keypoints_list.append(xy_coords)
        visibilities_list.append(visibilities)

    # Concatenate keypoints and visibilities for the entire batch
    # The final shape of keypoints_gt should be [batch_size, num_keypoints, 2]
    keypoints_gt = torch.stack(keypoints_list).float().cuda()
    visibilities = torch.stack(visibilities_list).cuda()

    # Create valid visibility masks
    valid_vis_all = (visibilities == 1).long().cuda()
    valid_invis_all = (visibilities == 0).long().cuda()
    return keypoints_gt, valid_vis_all, valid_invis_all

def reorder_batch_keypoints(batch_keypoints):
    # Assuming batch_keypoints is a tensor of shape [batch_size, num_keypoints, num_features]
    batch_size, num_keypoints, num_features = batch_keypoints.shape
    reordered_keypoints_batch = []

    for i in range(batch_size):
        # Directly use the normalized keypoints
        normalized_keypoints = batch_keypoints[i]

        # Initialize a tensor for reordered keypoints with only x, y coordinates
        reordered_normalized_keypoints = torch.zeros(num_keypoints, 2, device=batch_keypoints.device)

        # Reordering logic
        rounded_labels = torch.round(normalized_keypoints[:, -1]).int()
        used_indices = []
        for label in range(1, 7):
            valid_idx = (rounded_labels == label).nonzero(as_tuple=True)[0]
            if valid_idx.numel() > 0:
                reordered_normalized_keypoints[label - 1] = normalized_keypoints[valid_idx[0], :2]
            else:
                invalid_idx = ((rounded_labels < 1) | (rounded_labels > 6)).nonzero(as_tuple=True)[0]
                invalid_idx = [idx for idx in invalid_idx if idx not in used_indices]
                if invalid_idx:
                    reordered_normalized_keypoints[label - 1] = normalized_keypoints[invalid_idx[0], :2]
                    used_indices.append(invalid_idx[0])

        reordered_keypoints_batch.append(reordered_normalized_keypoints)

    return torch.stack(reordered_keypoints_batch)

def denormalize_keypoints(batch_keypoints, width=640, height=480):
    # Assuming batch_keypoints is a batch of normalized keypoints tensors
    # Denormalize each keypoint in the batch
    denormalized_keypoints = []
    for kp in batch_keypoints:
        denormalized_x = (kp[:, 0] * (width / 2)) + (width / 2)
        denormalized_y = (kp[:, 1] * (height / 2)) + (height / 2)
        denormalized_kp = torch.stack((denormalized_x, denormalized_y), dim=1)
        denormalized_keypoints.append(denormalized_kp)
        
    denormalized_keypoints = torch.stack(denormalized_keypoints)
#     print("denormalized_keypoints.shape", denormalized_keypoints.shape)
    return denormalized_keypoints

In [27]:
# Define the model
model = KeypointPipeline(weights_path)
model = model.to(device)

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 1 # Define your number of epochs
batch_size = 2

split_folder_path = train_test_split(root_dir)
KEYPOINTS_FOLDER_TRAIN = split_folder_path +"/train" #train_test_split(root_dir) +"/train"
KEYPOINTS_FOLDER_VAL = split_folder_path +"/val"
KEYPOINTS_FOLDER_TEST = split_folder_path +"/test"

dataset_train = KPDataset(KEYPOINTS_FOLDER_TRAIN, transform=None, demo=False)
dataset_val = KPDataset(KEYPOINTS_FOLDER_VAL, transform=None, demo=False)
dataset_test = KPDataset(KEYPOINTS_FOLDER_TEST, transform=None, demo=False)

data_loader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
data_loader_val = DataLoader(dataset_val, batch_size=1, shuffle=False, collate_fn=collate_fn)
data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, collate_fn=collate_fn)

v = 1

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for imgs, target_dicts, _ in data_loader_train:
        imgs = [img.to(device) for img in imgs]
        optimizer.zero_grad()

        # Forward pass for batch
        logits, KGNN2D, batch_labeled_keypoints = model(imgs)
#         print(KGNN2D.shape)
        print("Normalized Prediction in training", KGNN2D)

        # Process keypoints for the entire batch
        keypoints_gt, valid_vis_all, valid_invis_all = process_batch_keypoints(target_dicts)
        print("gt keypoints for loss", keypoints_gt)
#         print("valid_invis_all.shape", valid_invis_all.shape)
        
        # Normalize and reorder keypoints as per your existing logic
        # Ensure this logic works on the batch level
        
        reordered_normalized_keypoints = reorder_batch_keypoints(KGNN2D)
#         print("keypoints_logits.shape", reordered_normalized_keypoints.shape)
        # Denormalize the reordered keypoints for the entire batch
        print("Normalized Reordered", reordered_normalized_keypoints)
        denormalized_keypoints = denormalize_keypoints(reordered_normalized_keypoints)
        print("Denormalized Prediction in training", denormalized_keypoints)
        
#         print(valid_vis_all.shape)
#         print(logits.shape)
        
#         loss_kgnn2d = kgnn2d_loss(keypoints_gt, valid_invis_all, denormalized_keypoints)
        loss_kgnn2d = kgnn2d_loss(keypoints_gt, denormalized_keypoints)

        # Compute batch losses
        edge_loss = loss_edges(valid_vis_all, logits)
        
#         loss_kgnn2d = kgnn2d_loss(keypoints_gt, denormalized_keypoints, valid_vis_all)

        # Combine the losses
        total_batch_loss = edge_loss + loss_kgnn2d
        total_batch_loss.backward()
        optimizer.step()
        total_loss += total_batch_loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(data_loader_train)}')

Using factor graph MLP encoder.
Using learned graph decoder.


KeyboardInterrupt: 

In [None]:
import torch
from torchvision.transforms import functional as F
from PIL import Image
import cv2
import numpy as np

# Assuming KeypointPipeline and all necessary classes/functions are defined above or imported

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Path to the trained model weights
model_path = '/home/jc-merlab/Pictures/Data/trained_models/occ_ckpt/ckpt_e100.pth'

# Load the model
model = torch.load(model_path)
model = model.to(device)
model.eval()  # Set the model to evaluation mode

def prepare_image(image_path):
    """
    Load an image and prepare it for the model prediction.
    This function should replicate the preprocessing applied during training.
    """
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # Your preprocessing steps here, for example, resizing, normalization, etc.
    # This is a placeholder transformation, adjust it according to your training transformations
    img_tensor = F.to_tensor(img).to(device)  # Add batch dim and convert to tensor
    return img_tensor

def predict(model, img_tensor):
    # Assuming the model takes a list of tensors as input
    with torch.no_grad():
        logits, KGNN2D, batch_labeled_keypoints = model([img_tensor])
    # Process the output as needed
    return KGNN2D

def postprocess_keypoints(keypoints, width=640, height=480):
    # Adjust this function based on your model's output format
    denormalized_keypoints = denormalize_keypoints(keypoints, width, height)
    return denormalized_keypoints

image_path = '/home/jc-merlab/Pictures/Data/occ_sim_dataset/004866.rgb.jpg'


# Example usage
img_tensor = prepare_image(image_path).to(device)
KGNN2D = predict(model, img_tensor)
ordered_keypoints = reorder_batch_keypoints(KGNN2D)
denormalized_keypoints = postprocess_keypoints(ordered_keypoints)
print(denormalized_keypoints)

In [None]:
"keypoints": [[[257.95220042652915, 366.9198630617724, 1]], [[257.95973939799904, 283.013113744617, 1]], 
              [[179.70016595863137, 298.244571585954, 1]], [[175.69899307811323, 277.8348649543144, 0]], 
              [[79.90486225732596, 303.90392338594796, 0]], 
              [[66.15504343164937, 289.4557892368882, 1]]]

In [14]:
import cv2
import os
import torch
from torchvision.transforms import functional as F

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the model
model_path = '/home/jc-merlab/Pictures/Data/trained_models/occ_ckpt_2/ckpt_e120.pth'
model = torch.load(model_path)
model = model.to(device)
model.eval()  # Set the model to evaluation mode

def prepare_image(image_path):
    """
    Load an image and prepare it for the model prediction.
    This function should replicate the preprocessing applied during training.
    """
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # Your preprocessing steps here, for example, resizing, normalization, etc.
    # This is a placeholder transformation, adjust it according to your training transformations
    img_tensor = F.to_tensor(img).to(device)  # Add batch dim and convert to tensor
    return img_tensor


def predict(model, img_tensor):
    # Assuming the model takes a list of tensors as input
    with torch.no_grad():
        logits, KGNN2D, batch_labeled_keypoints = model([img_tensor])
    # Process the output as needed
    return KGNN2D

def postprocess_keypoints(keypoints, width=640, height=480):
    # Adjust this function based on your model's output format
    denormalized_keypoints = denormalize_keypoints(keypoints, width, height)
    return denormalized_keypoints

def visualize_keypoints(image_path, keypoints, out_dir):
    """
    Visualize keypoints on the image using cv2.
    """
    # Load the original image
    img = cv2.imread(image_path)
    # Convert keypoints to a NumPy array if it's a tensor
    if torch.is_tensor(keypoints):
        keypoints = keypoints.cpu().numpy()
    
    # Draw keypoints on the image
    for kp in keypoints[0]:  # Assuming the keypoints shape is [1, N, 2]
        x, y = int(kp[0]), int(kp[1])
        cv2.circle(img, (x, y), radius=9, color=(0, 0, 0), thickness=-1)
    
   # Construct the output path and save the image
    filename = os.path.basename(image_path)
    output_path = os.path.join(out_dir, filename)
    cv2.imwrite(output_path, img)

def process_folder(folder_path, output_path):
    """
    Process all images in the specified folder, predict and visualize keypoints.
    """
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            img_tensor = prepare_image(image_path).to(device)
            KGNN2D = predict(model, img_tensor)
            ordered_keypoints = reorder_batch_keypoints(KGNN2D)  # Ensure this function is defined
            denormalized_keypoints = postprocess_keypoints(ordered_keypoints)
            visualize_keypoints(image_path, denormalized_keypoints, output_path)

# Example usage
folder_path = '/home/jc-merlab/Pictures/Data/occ_test_data/'
output_path = '/home/jc-merlab/Pictures/Data/occ_test_data/output_attn/'
process_folder(folder_path, output_path)

Keypoints:  [[258, 367, 0.9999565, 1], [322, 308, 0.99733907, 5], [339, 296, 0.99838626, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [ 0.0063,  0.2833,  0.9973,  5.0000],
        [ 0.0594,  0.2333,  0.9984,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999597, 1], [304, 214, 0.9984475, 4], [400, 197, 0.99874544, 5], [420, 186, 0.9967391, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.

Keypoints:  [[258, 367, 0.9999598, 1], [258, 283, 0.9999119, 2], [310, 223, 0.9992461, 3], [326, 237, 0.99979097, 4], [308, 332, 0.73011726, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0312, -0.0708,  0.9992,  3.0000],
        [ 0.0188, -0.0125,  0.9998,  4.0000],
        [-0.0375,  0.3833,  0.7301,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 367, 0.9999585, 1], [258, 283, 0.9999045, 2], [178, 277, 0.99977523, 3], [179, 256, 0.9997702, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1542,  0.9998,  3.0000],
        [-0.4406,  0.0667,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999175, 2], [161, 121, 0.91463053, 3]]
graph fea

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9999424, 2], [399, 140, 0.7243855, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.2469, -0.4167,  0.7244,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.99992466, 2], [305, 219, 0.999653, 3], [322, 231, 0.99981767, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.00

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99989927, 2]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995923, 1], [178, 276, 0.9998584, 3], [180, 256, 0.9996958, 4], [157, 159, 0.9985258, 5], [179, 155, 0.9949804, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4437,  0.1500,  0.9999,  3.0

Keypoints:  [[258, 283, 0.99992394, 2], [321, 232, 0.9988254, 4], [381, 150, 0.78206694, 6]]
graph feature:  tensor([[-1.9375e-01,  1.7917e-01,  9.9992e-01,  2.0000e+00],
        [ 3.1250e-03, -3.3333e-02,  9.9883e-01,  4.0000e+00],
        [ 1.9062e-01, -3.7500e-01,  7.8207e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999596, 1], [194, 235, 0.99985814, 3], [207, 218, 0.9996824, 4], [297, 178, 0.9989219, 5], [304, 1

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999052, 2], [350, 134, 0.9985952, 5], [371, 134, 0.9990798, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0938, -0.4417,  0.9986,  5.0000],
        [ 0.1594, -0.4417,  0.9991,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999591, 1], [233, 207, 0.9994642, 3], [252, 201, 0.9997657, 4], [347, 170, 0

Keypoints:  [[258, 283, 0.9999118, 2], [375, 203, 0.9966625, 5]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.1719, -0.1542,  0.9967,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995863, 1], [257, 283, 0.99989414, 2], [178, 276, 0.9994697, 3], [156, 159, 0.99892515, 5], [173, 145, 0.99553716, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969,  0.1792,  0.9999,  2

Keypoints:  [[258, 367, 0.99996054, 1], [180, 298, 0.9993949, 3], [176, 278, 0.99983215, 4], [108, 211, 0.7840631, 4], [99, 186, 0.98448503, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4375,  0.2417,  0.9994,  3.0000],
        [-0.4500,  0.1583,  0.9998,  4.0000],
        [-0.6906, -0.2250,  0.9845,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 366, 0.9999596, 1], [274, 206, 0.9702519, 3], [348, 300,

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.99993384, 2], [232, 207, 0.99976426, 3], [251, 201, 0.99962795, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2750, -0.1375,  0.9998,  3.0000],
        [-0.2156, -0.1625,  0.9996,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [183, 254, 0.9993425, 3], [190, 235, 0.9995034, 4], [94, 199,

Keypoints:  [[258, 367, 0.9999597, 1], [184, 254, 0.99973756, 3], [299, 208, 0.7476816, 3], [191, 234, 0.999905, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4250,  0.0583,  0.9997,  3.0000],
        [-0.4031, -0.0250,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99993837, 2], [232, 207, 0.9995315, 3], [252, 200, 0.99964106, 4], [335, 146, 0.99887437, 5], [351, 130, 0.9980621, 6]]
gra

Keypoints:  [[258, 367, 0.99995697, 1], [257, 282, 0.747433, 4], [338, 340, 0.9367157, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969,  0.1750,  0.7474,  4.0000],
        [ 0.0563,  0.4167,  0.9367,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995756, 1], [258, 283, 0.99992, 2], [304, 218, 0.9623655, 3], [398, 167, 0.9985359, 5], [409, 148, 0.99714833, 6]]
graph feature:  tensor([[-0.1938

Keypoints:  [[258, 367, 0.9999585, 1], [258, 283, 0.999915, 2], [281, 203, 0.92395604, 4], [304, 107, 0.9165879, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1219, -0.1542,  0.9240,  4.0000],
        [-0.0500, -0.5542,  0.9166,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999317, 2], [281, 207, 0.9998252, 3], [301, 213, 

Keypoints:  [[258, 367, 0.9999585, 1], [258, 283, 0.99995553, 2], [229, 205, 0.99678624, 4], [346, 185, 0.9807998, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  1.0000,  2.0000],
        [-0.2844, -0.1458,  0.9968,  4.0000],
        [ 0.0813, -0.2292,  0.9808,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9998766, 2], [183, 253, 0.999752, 3], [191, 234,

Keypoints:  [[258, 283, 0.9999157, 2], [194, 234, 0.99958664, 3], [257, 365, 0.92094684, 4], [205, 217, 0.862869, 4], [239, 124, 0.99828374, 5]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.3938, -0.0250,  0.9996,  3.0000],
        [-0.1969,  0.5208,  0.9209,  4.0000],
        [-0.2531, -0.4833,  0.9983,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999577, 1], [295, 213, 0.99969494, 3], [314, 223

Keypoints:  [[258, 367, 0.99995816, 1], [77, 277, 0.9983158, 5], [56, 268, 0.9909986, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.7594,  0.1542,  0.9983,  5.0000],
        [-0.8250,  0.1167,  0.9910,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999567, 1], [258, 282, 0.99992657, 2], [314, 222, 0.82794565, 4], [411, 287, 0.9937915, 5], [423, 304, 0.9949714, 6]]
graph feature:  tensor([[-0.193

Keypoints:  [[258, 367, 0.9999615, 1], [225, 140, 0.9992561, 5], [245, 133, 0.99417996, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2969, -0.4167,  0.9993,  5.0000],
        [-0.2344, -0.4458,  0.9942,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.99992776, 2], [382, 274, 0.9995981, 5], [391, 294, 0.99595344, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9999058, 2], [258, 203, 0.9996599, 3], [279, 203, 0.9998851, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1938, -0.1542,  0.9997,  3.0000],
        [-0.1281, -0.1542,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99990785, 2], [365, 155, 0.9971933, 5], [370, 133, 

Keypoints:  [[257, 283, 0.99986804, 2], [178, 275, 0.9994578, 3], [180, 254, 0.99972874, 4]]
graph feature:  tensor([[-0.1969,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1458,  0.9995,  3.0000],
        [-0.4375,  0.0583,  0.9997,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995875, 1], [211, 218, 0.9998481, 3], [228, 206, 0.9997694, 4], [308, 147, 0.998618, 5], [316, 126, 0.9979771, 6]]
graph feature:  tensor([[-0.19

Keypoints:  [[258, 283, 0.99993336, 2], [253, 200, 0.9821108, 4], [346, 170, 0.99505675, 5]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2094, -0.1667,  0.9821,  4.0000],
        [ 0.0813, -0.2917,  0.9951,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999504, 2], [211, 218, 0.9998229, 3], [228, 206, 0.99975246, 4], [327, 196, 0.99897087, 5], [349, 191, 0.9974891, 6]]
graph feature:  tensor([[-0.

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.99991393, 2], [303, 213, 0.9882387, 4], [399, 190, 0.9953934, 5], [412, 173, 0.99850595, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0531, -0.1125,  0.9882,  4.0000],
        [ 0.2469, -0.2083,  0.9954,  5.0000],
        [ 0.2875, -0.2792,  0.9985,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99

Keypoints:  [[211, 218, 0.9998317, 3], [228, 206, 0.9995065, 4], [317, 246, 0.9981749, 5]]
graph feature:  tensor([[-0.3406, -0.0917,  0.9998,  3.0000],
        [-0.2875, -0.1417,  0.9995,  4.0000],
        [-0.0094,  0.0250,  0.9982,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999589, 1], [256, 182, 0.8393888, 3], [259, 280, 0.7638131, 4], [328, 118, 0.9983719, 5], [346, 104, 0.9933395, 6]]
graph feature:  tensor([[-0.1938

Keypoints:  [[306, 219, 0.9991736, 3], [321, 230, 0.98300904, 4], [412, 271, 0.9995308, 5], [418, 250, 0.9950982, 6]]
graph feature:  tensor([[-4.3750e-02, -8.7500e-02,  9.9917e-01,  3.0000e+00],
        [ 3.1250e-03, -4.1667e-02,  9.8301e-01,  4.0000e+00],
        [ 2.8750e-01,  1.2917e-01,  9.9953e-01,  5.0000e+00],
        [ 3.0625e-01,  4.1667e-02,  9.9510e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995947, 1]

Keypoints:  [[258, 367, 0.9999585, 1], [258, 283, 0.99989367, 2], [178, 277, 0.999793, 3], [180, 256, 0.9997402, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1542,  0.9998,  3.0000],
        [-0.4375,  0.0667,  0.9997,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999609, 1], [282, 207, 0.99981564, 3], [302, 213, 0.99985063, 4], [388, 169,

Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999026, 2], [183, 254, 0.9998271, 3], [191, 235, 0.9998977, 4], [149, 145, 0.9985399, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4281,  0.0583,  0.9998,  3.0000],
        [-0.4031, -0.0208,  0.9999,  4.0000],
        [-0.5344, -0.3958,  0.9985,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 283, 0.9999174, 2], [355, 107, 0.704375, 2], [257, 203, 0.9997011, 3], [279, 203, 0.99966633, 4]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1969, -0.1542,  0.9997,  3.0000],
        [-0.1281, -0.1542,  0.9997,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.999879, 2]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.17

Keypoints:  [[258, 367, 0.99995446, 1], [256, 203, 0.9996774, 3], [277, 203, 0.9998648, 4], [347, 273, 0.998789, 5], [360, 290, 0.99729866, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2000, -0.1542,  0.9997,  3.0000],
        [-0.1344, -0.1542,  0.9999,  4.0000],
        [ 0.0844,  0.1375,  0.9988,  5.0000],
        [ 0.1250,  0.2083,  0.9973,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999

Keypoints:  [[258, 367, 0.9999604, 1], [282, 207, 0.99982977, 3], [301, 213, 0.999871, 4], [257, 290, 0.7475448, 4], [401, 216, 0.99741095, 5], [412, 198, 0.99831533, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1187, -0.1375,  0.9998,  3.0000],
        [-0.0594, -0.1125,  0.9999,  4.0000],
        [ 0.2531, -0.1000,  0.9974,  5.0000],
        [ 0.2875, -0.1750,  0.9983,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Key

Keypoints:  [[194, 235, 0.9998266, 3], [206, 218, 0.9996301, 4], [240, 125, 0.998343, 5], [251, 106, 0.99716884, 6]]
graph feature:  tensor([[-0.3938, -0.0208,  0.9998,  3.0000],
        [-0.3563, -0.0917,  0.9996,  4.0000],
        [-0.2500, -0.4792,  0.9983,  5.0000],
        [-0.2156, -0.5583,  0.9972,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9998549, 2], [183, 253, 0.99978834, 3], [191, 234, 

Keypoints:  [[258, 283, 0.99994195, 2], [282, 207, 0.99984324, 3], [302, 213, 0.9997273, 4], [370, 142, 0.997815, 5], [370, 120, 0.9958786, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1187, -0.1375,  0.9998,  3.0000],
        [-0.0563, -0.1125,  0.9997,  4.0000],
        [ 0.1562, -0.4083,  0.9978,  5.0000],
        [ 0.1562, -0.5000,  0.9959,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[306, 219, 0.9996

Keypoints:  [[258, 367, 0.9999597, 1], [258, 283, 0.9997576, 2], [180, 297, 0.99919707, 3], [176, 278, 0.999701, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9998,  2.0000],
        [-0.4375,  0.2375,  0.9992,  3.0000],
        [-0.4500,  0.1583,  0.9997,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.99992275, 2], [395, 251, 0.99938464, 5], [415, 259

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9998673, 2], [85, 225, 0.9975479, 5], [88, 206, 0.98565125, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.7344, -0.0625,  0.9975,  5.0000],
        [-0.7250, -0.1417,  0.9857,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9999409, 2], [232, 207, 0.99924505, 3], [255, 200, 0

Keypoints:  [[258, 283, 0.9998957, 2], [178, 275, 0.9995702, 3], [180, 254, 0.99965525, 4], [80, 187, 0.9838575, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1458,  0.9996,  3.0000],
        [-0.4375,  0.0583,  0.9997,  4.0000],
        [-0.7500, -0.2208,  0.9839,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999269, 2], [281, 207, 0.99987674, 3], [300, 213, 0.9998203, 4]]
graph featu

Keypoints:  [[258, 367, 0.9999591, 1], [284, 207, 0.9997807, 3], [303, 214, 0.99989855, 4], [340, 306, 0.998808, 5], [335, 327, 0.9921956, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1125, -0.1375,  0.9998,  3.0000],
        [-0.0531, -0.1083,  0.9999,  4.0000],
        [ 0.0625,  0.2750,  0.9988,  5.0000],
        [ 0.0469,  0.3625,  0.9922,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995

Keypoints:  [[258, 367, 0.9999566, 1], [258, 283, 0.99992526, 2], [411, 286, 0.99662054, 5], [411, 304, 0.9808246, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.2844,  0.1917,  0.9966,  5.0000],
        [ 0.2844,  0.2667,  0.9808,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995756, 1], [258, 283, 0.99992156, 2], [304, 218, 0.9974396, 3], [397, 16

Keypoints:  [[304, 218, 0.99955744, 3], [321, 230, 0.9997924, 4], [381, 308, 0.9988733, 5], [398, 323, 0.99570686, 6]]
graph feature:  tensor([[-5.0000e-02, -9.1667e-02,  9.9956e-01,  3.0000e+00],
        [ 3.1250e-03, -4.1667e-02,  9.9979e-01,  4.0000e+00],
        [ 1.9062e-01,  2.8333e-01,  9.9887e-01,  5.0000e+00],
        [ 2.4375e-01,  3.4583e-01,  9.9571e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999604, 2]

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999106, 2]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999577, 2], [211, 218, 0.9998597, 3], [229, 206, 0.9997185, 4], [290, 129, 0.99790215, 5], [312, 125, 0.9980026, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  1.0000,  2.0000],
        [-0.3406, -0.0917,  0.9999,  3.0

Keypoints:  [[258, 283, 0.99989736, 2], [257, 203, 0.9997676, 3], [278, 203, 0.999833, 4], [383, 264, 0.8389253, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1969, -0.1542,  0.9998,  3.0000],
        [-0.1313, -0.1542,  0.9998,  4.0000],
        [ 0.1969,  0.1000,  0.8389,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[257, 283, 0.99994004, 2], [194, 235, 0.9998086, 3], [206, 218, 0.99960405, 4], [264, 364,

Keypoints:  [[178, 276, 0.99981767, 3], [180, 255, 0.9997546, 4], [111, 184, 0.9988636, 5], [103, 163, 0.99621165, 6]]
graph feature:  tensor([[-0.4437,  0.1500,  0.9998,  3.0000],
        [-0.4375,  0.0625,  0.9998,  4.0000],
        [-0.6531, -0.2333,  0.9989,  5.0000],
        [-0.6781, -0.3208,  0.9962,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999585, 1], [258, 283, 0.9999082, 2]]
graph feature:  tensor([[-0.1938,  0

Keypoints:  [[258, 367, 0.9999603, 1], [319, 321, 0.9977151, 5], [320, 344, 0.96740127, 6]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.9996e-01,  1.0000e+00],
        [-3.1250e-03,  3.3750e-01,  9.9772e-01,  5.0000e+00],
        [ 0.0000e+00,  4.3333e-01,  9.6740e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.99992025, 2], [305, 218, 0.99880016, 3], [320, 230, 0.9855623, 4], [411, 1

Keypoints:  [[258, 367, 0.9999584, 1], [178, 276, 0.99981934, 3], [180, 256, 0.99986935, 4], [268, 211, 0.9978035, 5], [290, 208, 0.9972498, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4437,  0.1500,  0.9998,  3.0000],
        [-0.4375,  0.0667,  0.9999,  4.0000],
        [-0.1625, -0.1208,  0.9978,  5.0000],
        [-0.0938, -0.1333,  0.9972,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.9999267, 2], [323, 231, 0.9992217, 4], [419, 240, 0.85734886, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0094, -0.0375,  0.9992,  4.0000],
        [ 0.3094,  0.0000,  0.8573,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999578, 1], [258, 282, 0.99990857, 2], [191, 233, 0.8704051, 3], [305, 228,

Keypoints:  [[258, 283, 0.99993706, 2], [282, 206, 0.99943405, 3], [301, 215, 0.99465394, 4], [396, 242, 0.9973916, 5], [417, 234, 0.9980186, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1187, -0.1417,  0.9994,  3.0000],
        [-0.0594, -0.1042,  0.9947,  4.0000],
        [ 0.2375,  0.0083,  0.9974,  5.0000],
        [ 0.3031, -0.0250,  0.9980,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99

Keypoints:  [[194, 235, 0.999826, 3], [207, 218, 0.9996326, 4], [256, 282, 0.8241919, 4], [186, 124, 0.98424673, 5]]
graph feature:  tensor([[-0.3938, -0.0208,  0.9998,  3.0000],
        [-0.3531, -0.0917,  0.9996,  4.0000],
        [-0.4188, -0.4833,  0.9842,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999597, 1], [257, 283, 0.9998324, 2], [180, 297, 0.9941513, 3], [80, 304, 0.96369994, 5], [73, 284, 0.98010916, 6]]
graph 

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99989474, 2], [329, 118, 0.99917054, 5], [350, 111, 0.99642915, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0281, -0.5083,  0.9992,  5.0000],
        [ 0.0938, -0.5375,  0.9964,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999547, 1], [258, 282, 0.9999149, 2], [311, 223, 0.9995173, 3], [326, 237

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999354, 2], [194, 235, 0.99982613, 3], [206, 218, 0.9996012, 4], [240, 124, 0.8556365, 4], [263, 125, 0.97491395, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.3938, -0.0208,  0.9998,  3.0000],
        [-0.3563, -0.0917,  0.9996,  4.0000],
        [-0.1781, -0.4792,  0.9749,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Ke

Keypoints:  [[258, 367, 0.9999584, 1], [282, 207, 0.9998203, 3], [302, 214, 0.99988055, 4], [265, 280, 0.864545, 4], [407, 153, 0.98381585, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1187, -0.1375,  0.9998,  3.0000],
        [-0.0563, -0.1083,  0.9999,  4.0000],
        [ 0.2719, -0.3625,  0.9838,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999546, 1], [258, 283, 0.9998895, 2], [310, 222, 

Keypoints:  [[258, 283, 0.9999075, 2], [257, 203, 0.9997836, 3], [277, 203, 0.99981433, 4], [362, 256, 0.93807375, 4], [385, 258, 0.9823554, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1969, -0.1542,  0.9998,  3.0000],
        [-0.1344, -0.1542,  0.9998,  4.0000],
        [ 0.2031,  0.0750,  0.9824,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99996006, 1], [282, 207, 0.99982303, 3], [302, 21

Keypoints:  [[258, 283, 0.99985564, 2], [107, 206, 0.9955745, 5], [91, 189, 0.98368376, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.6656, -0.1417,  0.9956,  5.0000],
        [-0.7156, -0.2125,  0.9837,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.99992967, 2], [305, 219, 0.9997003, 3], [321, 231, 0.9998148, 4], [420, 216, 0.99854666, 5]]
graph feature:  tensor([[-1.9

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.999918, 2], [311, 223, 0.9996338, 3], [326, 237, 0.99979836, 4], [419, 262, 0.99629265, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0281, -0.0708,  0.9996,  3.0000],
        [ 0.0188, -0.0125,  0.9998,  4.0000],
        [ 0.3094,  0.0917,  0.9963,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999261, 2]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995697, 1], [305, 219, 0.9996872, 3], [322, 231, 0.99979216, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.0469, -0.0875,  0.9997,  3.0000],
        [ 0.0063, -0.0375,  0.9998,  4.0000]]

Keypoints:  [[258, 283, 0.9999598, 2], [228, 205, 0.81124073, 4], [308, 147, 0.9984837, 5], [330, 150, 0.9976071, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  1.0000,  2.0000],
        [-0.2875, -0.1458,  0.8112,  4.0000],
        [-0.0375, -0.3875,  0.9985,  5.0000],
        [ 0.0312, -0.3750,  0.9976,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999598, 1], [183, 254, 0.99961025, 3], [190, 235, 0.99983287, 4], [107, 182

Keypoints:  [[258, 367, 0.9999596, 1], [183, 254, 0.99975926, 3], [191, 235, 0.99985266, 4], [147, 145, 0.98916245, 5], [167, 140, 0.99496967, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4281,  0.0583,  0.9998,  3.0000],
        [-0.4031, -0.0208,  0.9999,  4.0000],
        [-0.5406, -0.3958,  0.9892,  5.0000],
        [-0.4781, -0.4167,  0.9950,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9

Keypoints:  [[258, 283, 0.99989045, 2], [178, 276, 0.9998528, 3], [180, 255, 0.99980897, 4], [132, 169, 0.99460334, 5], [144, 150, 0.9891467, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1500,  0.9999,  3.0000],
        [-0.4375,  0.0625,  0.9998,  4.0000],
        [-0.5875, -0.2958,  0.9946,  5.0000],
        [-0.5500, -0.3750,  0.9891,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99

Keypoints:  [[258, 283, 0.99995065, 2], [280, 205, 0.9985672, 3], [332, 119, 0.9987739, 5], [340, 98, 0.9976012, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  1.0000,  2.0000],
        [-0.1250, -0.1458,  0.9986,  3.0000],
        [ 0.0375, -0.5042,  0.9988,  5.0000],
        [ 0.0625, -0.5917,  0.9976,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [257, 283, 0.99994326, 2], [221, 98, 0.9918013, 6]]
graph featu

Keypoints:  [[258, 283, 0.9999176, 2], [296, 213, 0.99961025, 3], [314, 223, 0.9998338, 4], [320, 320, 0.9836896, 5]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0750, -0.1125,  0.9996,  3.0000],
        [-0.0188, -0.0708,  0.9998,  4.0000],
        [ 0.0000,  0.3333,  0.9837,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[194, 235, 0.99981314, 3], [206, 218, 0.999716, 4], [283, 155, 0.99827325, 5], [296, 172,

Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.99994147, 2], [194, 235, 0.9998503, 3], [206, 218, 0.99963903, 4], [319, 183, 0.99768853, 6]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.9996e-01,  1.0000e+00],
        [-1.9375e-01,  1.7917e-01,  9.9994e-01,  2.0000e+00],
        [-3.9375e-01, -2.0833e-02,  9.9985e-01,  3.0000e+00],
        [-3.5625e-01, -9.1667e-02,  9.9964e-01,  4.0000e+00],
        [-3.1250e-03, -2.3750e-01,  9.9769e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1,

Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.99991596, 2], [281, 207, 0.966506, 3], [332, 119, 0.99849355, 5], [314, 108, 0.9950113, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1219, -0.1375,  0.9665,  3.0000],
        [ 0.0375, -0.5042,  0.9985,  5.0000],
        [-0.0188, -0.5500,  0.9950,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.999

Keypoints:  [[258, 366, 0.99995697, 1], [305, 219, 0.99971277, 3], [322, 231, 0.999879, 4], [413, 270, 0.99939394, 5], [434, 262, 0.9956929, 6]]
graph feature:  tensor([[-0.1938,  0.5250,  1.0000,  1.0000],
        [-0.0469, -0.0875,  0.9997,  3.0000],
        [ 0.0063, -0.0375,  0.9999,  4.0000],
        [ 0.2906,  0.1250,  0.9994,  5.0000],
        [ 0.3563,  0.0917,  0.9957,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 283, 0.9999114, 2], [311, 223, 0.9993794, 3], [326, 238, 0.9997842, 4]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0281, -0.0708,  0.9994,  3.0000],
        [ 0.0188, -0.0083,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99994385, 2], [282, 207, 0.99989533, 3], [301, 213, 0.9998221, 4], [375, 148, 0.97402126, 5], [392, 131, 0.86502, 6]]
graph

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99990773, 2], [258, 203, 0.99738055, 3], [374, 203, 0.8071972, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1938, -0.1542,  0.9974,  3.0000],
        [ 0.1688, -0.1542,  0.8072,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999095, 2], [304, 218, 0.9994804, 3], [321, 230, 0.99972147, 4], [382, 309

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99990904, 2], [349, 133, 0.99830663, 5], [348, 111, 0.9964456, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0906, -0.4458,  0.9983,  5.0000],
        [ 0.0875, -0.5375,  0.9964,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999144, 2], [304, 218, 0.9992349, 3]]
graph feature:  tensor([[-0.1938,  0

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999176, 2], [183, 254, 0.9996704, 3], [191, 235, 0.99981385, 4], [124, 139, 0.941236, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4281,  0.0583,  0.9997,  3.0000],
        [-0.4031, -0.0208,  0.9998,  4.0000],
        [-0.6125, -0.4208,  0.9412,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[283, 207, 0.9997

Keypoints:  [[258, 367, 0.99995923, 1], [206, 216, 0.9771015, 4], [315, 249, 0.7596205, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.3563, -0.1000,  0.9771,  4.0000],
        [-0.0156,  0.0375,  0.7596,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999559, 2], [211, 218, 0.99980074, 3], [229, 206, 0.9997142, 4], [216, 84, 0.9774304, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  1.0000,  2.00

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999262, 2], [222, 98, 0.7236571, 3]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.3063, -0.5917,  0.7237,  3.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999589, 1], [190, 233, 0.98075944, 4], [106, 185, 0.89350194, 4], [92, 164, 0.9824525, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.000

Keypoints:  [[258, 367, 0.9999596, 1], [258, 283, 0.99983966, 2], [180, 297, 0.9993499, 3], [176, 277, 0.9997255, 4], [152, 181, 0.9758848, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9998,  2.0000],
        [-0.4375,  0.2375,  0.9993,  3.0000],
        [-0.4500,  0.1542,  0.9997,  4.0000],
        [-0.5250, -0.2458,  0.9759,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 367, 0.9999583, 1], [304, 218, 0.9995283, 3], [321, 230, 0.9997714, 4], [258, 280, 0.73736304, 4], [381, 309, 0.9987998, 5], [398, 324, 0.9960437, 6]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.9996e-01,  1.0000e+00],
        [-5.0000e-02, -9.1667e-02,  9.9953e-01,  3.0000e+00],
        [ 3.1250e-03, -4.1667e-02,  9.9977e-01,  4.0000e+00],
        [ 1.9062e-01,  2.8750e-01,  9.9880e-01,  5.0000e+00],
        [ 2.4375e-01,  3.5000e-01,  9.9604e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x af

Keypoints:  [[258, 367, 0.99995816, 1], [258, 282, 0.99994206, 2], [311, 224, 0.99961793, 3], [326, 238, 0.99985087, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1750,  0.9999,  2.0000],
        [-0.0281, -0.0667,  0.9996,  3.0000],
        [ 0.0188, -0.0083,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999589, 1], [306, 219, 0.99970335, 3], [322, 231, 0.9998894, 4], [434, 2

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99989605, 2], [255, 204, 0.9497512, 3]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2031, -0.1500,  0.9498,  3.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999577, 1], [258, 283, 0.9999248, 2], [302, 218, 0.9965509, 3], [376, 148, 0.9988175, 5], [357, 138, 0.99420255, 6]]
graph feature:  tensor([[-0.19

Keypoints:  [[258, 283, 0.9998078, 2], [179, 297, 0.9994911, 3], [175, 277, 0.999688, 4], [181, 178, 0.99801743, 5], [198, 164, 0.9969283, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9998,  2.0000],
        [-0.4406,  0.2375,  0.9995,  3.0000],
        [-0.4531,  0.1542,  0.9997,  4.0000],
        [-0.4344, -0.2583,  0.9980,  5.0000],
        [-0.3812, -0.3167,  0.9969,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995

Keypoints:  [[258, 367, 0.9999585, 1], [258, 283, 0.9999037, 2], [258, 203, 0.999744, 3], [278, 203, 0.99990964, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1938, -0.1542,  0.9997,  3.0000],
        [-0.1313, -0.1542,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999145, 2], [306, 220, 0.9981876, 3], [322, 234, 0.71938044, 4], [420, 243, 

Keypoints:  [[258, 367, 0.99995875, 1], [211, 218, 0.999747, 3], [229, 206, 0.9997557, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.3406, -0.0917,  0.9997,  3.0000],
        [-0.2844, -0.1417,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999349, 2], [282, 207, 0.9998313, 3], [301, 213, 0.99982244, 4], [407, 195, 0.95373815, 6]]
graph feature:  tensor([[-0.1

Keypoints:  [[258, 283, 0.9998667, 2], [183, 253, 0.99977094, 3], [191, 234, 0.9998933, 4]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4281,  0.0542,  0.9998,  3.0000],
        [-0.4031, -0.0250,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995935, 1], [284, 207, 0.999683, 3], [303, 214, 0.99988556, 4], [339, 306, 0.99932265, 5], [347, 328, 0.98968714, 6]]
graph feature:  tensor([[-0.

Keypoints:  [[258, 283, 0.99993634, 2], [284, 207, 0.99968743, 3], [303, 214, 0.9998908, 4], [339, 305, 0.8170187, 4]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1125, -0.1375,  0.9997,  3.0000],
        [-0.0531, -0.1083,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[257, 283, 0.9999088, 2], [296, 213, 0.99204695, 3], [320, 321, 0.99814296, 5], [313, 340, 0.9854036, 6]]
graph feature:  tensor([[-0.

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.99992824, 2], [310, 223, 0.99919504, 3], [326, 237, 0.99980515, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0312, -0.0708,  0.9992,  3.0000],
        [ 0.0188, -0.0125,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999379, 2], [228, 209, 0.99031067, 3]]
graph f

Keypoints:  [[258, 367, 0.999959, 1], [232, 207, 0.9997528, 3], [251, 201, 0.99965465, 4], [242, 102, 0.99852043, 5], [235, 80, 0.99698573, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2750, -0.1375,  0.9998,  3.0000],
        [-0.2156, -0.1625,  0.9997,  4.0000],
        [-0.2438, -0.5750,  0.9985,  5.0000],
        [-0.2656, -0.6667,  0.9970,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 367, 0.9999589, 1], [390, 166, 0.9989888, 5], [379, 148, 0.9966979, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [ 0.2188, -0.3083,  0.9990,  5.0000],
        [ 0.1844, -0.3833,  0.9967,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.99994314, 2], [194, 234, 0.99984646, 3], [162, 137, 0.94757503, 3], [207, 218, 0.9997154, 4]]
graph feature:  tensor([[-0.1

Keypoints:  [[258, 283, 0.9999536, 2], [327, 195, 0.99832934, 5], [347, 202, 0.99702865, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  1.0000,  2.0000],
        [ 0.0219, -0.1875,  0.9983,  5.0000],
        [ 0.0844, -0.1583,  0.9970,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[257, 283, 0.99982363, 2], [176, 278, 0.9998294, 4], [129, 190, 0.9959484, 5], [141, 172, 0.99571675, 6]]
graph feature:  tensor([[-0.1969,  0.1792,  0.9998,  2

Keypoints:  [[192, 233, 0.75402445, 3], [188, 121, 0.9944622, 5], [209, 117, 0.99437016, 6]]
graph feature:  tensor([[-0.4000, -0.0292,  0.7540,  3.0000],
        [-0.4125, -0.4958,  0.9945,  5.0000],
        [-0.3469, -0.5125,  0.9944,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999597, 1], [243, 102, 0.99751866, 5], [251, 82, 0.996601, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2406, -0.

Keypoints:  [[180, 297, 0.9995228, 3], [176, 276, 0.99977833, 4], [181, 178, 0.9983962, 5], [188, 157, 0.9986141, 6]]
graph feature:  tensor([[-0.4375,  0.2375,  0.9995,  3.0000],
        [-0.4500,  0.1500,  0.9998,  4.0000],
        [-0.4344, -0.2583,  0.9984,  5.0000],
        [-0.4125, -0.3458,  0.9986,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995756, 1], [326, 289, 0.9992261, 5], [345, 298, 0.9981875, 6]]
graph feat

Keypoints:  [[258, 367, 0.99995816, 1], [178, 276, 0.9998621, 3], [180, 255, 0.99969435, 4], [133, 167, 0.7439607, 4], [256, 284, 0.731806, 4], [146, 153, 0.98490334, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4437,  0.1500,  0.9999,  3.0000],
        [-0.4375,  0.0625,  0.9997,  4.0000],
        [-0.5437, -0.3625,  0.9849,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999596, 1], [258, 283,

Keypoints:  [[194, 235, 0.99984026, 3], [206, 218, 0.999762, 4], [297, 178, 0.99901307, 5], [319, 173, 0.99763346, 6]]
graph feature:  tensor([[-3.9375e-01, -2.0833e-02,  9.9984e-01,  3.0000e+00],
        [-3.5625e-01, -9.1667e-02,  9.9976e-01,  4.0000e+00],
        [-7.1875e-02, -2.5833e-01,  9.9901e-01,  5.0000e+00],
        [-3.1250e-03, -2.7917e-01,  9.9763e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999591, 1]

Keypoints:  [[258, 367, 0.99995995, 1], [258, 283, 0.99992657, 2], [310, 223, 0.9991787, 3], [326, 238, 0.9998105, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0312, -0.0708,  0.9992,  3.0000],
        [ 0.0188, -0.0083,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99991417, 2], [303, 218, 0.99978393, 3], [321, 230, 0.9997688, 4], [376, 14

Keypoints:  [[311, 223, 0.9994894, 3], [326, 238, 0.9998989, 4], [436, 225, 0.90007985, 6]]
graph feature:  tensor([[-0.0281, -0.0708,  0.9995,  3.0000],
        [ 0.0188, -0.0083,  0.9999,  4.0000],
        [ 0.3625, -0.0625,  0.9001,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.999915, 2], [183, 255, 0.99969983, 3], [191, 235, 0.9998047, 4]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4281,  0.

Keypoints:  [[258, 283, 0.9999293, 2], [280, 207, 0.9553008, 3]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1250, -0.1375,  0.9553,  3.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [305, 219, 0.9994266, 3], [320, 232, 0.77331555, 4], [430, 225, 0.98302436, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.0469, -0.0875,  0.9994,  3.0000],
        [ 0.0000, -0

Keypoints:  [[258, 367, 0.99995995, 1], [257, 203, 0.999665, 3], [278, 203, 0.9998913, 4], [372, 228, 0.9949367, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969, -0.1542,  0.9997,  3.0000],
        [-0.1313, -0.1542,  0.9999,  4.0000],
        [ 0.1625, -0.0500,  0.9949,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999391, 2], [282, 207, 0.9997192, 3], [301, 213, 0.98047465, 4], [373, 147, 

Keypoints:  [[258, 367, 0.99995875, 1], [257, 283, 0.9998473, 2], [180, 254, 0.9997727, 4], [82, 252, 0.7463899, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969,  0.1792,  0.9998,  2.0000],
        [-0.4375,  0.0583,  0.9998,  4.0000],
        [-0.7438,  0.0500,  0.7464,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999229, 2], [302, 213, 0.99493533, 4]]
graph feature:  tensor([[-0.1938,  0.

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.99993014, 2]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999099, 2], [258, 203, 0.9995478, 3], [279, 203, 0.99993026, 4], [306, 108, 0.99707687, 5], [295, 87, 0.9815057, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1938, -0.1542,  0.9995,  3.

Keypoints:  [[258, 283, 0.99995506, 2], [211, 218, 0.9998085, 3], [228, 206, 0.9996327, 4], [330, 142, 0.9943605, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  1.0000,  2.0000],
        [-0.3406, -0.0917,  0.9998,  3.0000],
        [-0.2875, -0.1417,  0.9996,  4.0000],
        [ 0.0312, -0.4083,  0.9944,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.99988747, 2], [178, 276, 0.99979836, 3], [180, 255

Keypoints:  [[257, 283, 0.99984956, 2], [180, 298, 0.99597734, 3], [176, 278, 0.99945074, 4], [79, 303, 0.99601424, 5], [59, 294, 0.9864026, 6]]
graph feature:  tensor([[-0.1969,  0.1792,  0.9998,  2.0000],
        [-0.4375,  0.2417,  0.9960,  3.0000],
        [-0.4500,  0.1583,  0.9995,  4.0000],
        [-0.7531,  0.2625,  0.9960,  5.0000],
        [-0.8156,  0.2250,  0.9864,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 283, 0.9998803, 2], [165, 272, 0.8014487, 3], [179, 256, 0.9989507, 4], [209, 161, 0.9988336, 5], [222, 143, 0.9970605, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4844,  0.1333,  0.8014,  3.0000],
        [-0.4406,  0.0667,  0.9990,  4.0000],
        [-0.3469, -0.3292,  0.9988,  5.0000],
        [-0.3063, -0.4042,  0.9971,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995

Keypoints:  [[258, 283, 0.9999238, 2], [304, 218, 0.9996284, 3], [321, 230, 0.99978894, 4], [412, 190, 0.9987909, 5], [402, 170, 0.9972363, 6]]
graph feature:  tensor([[-1.9375e-01,  1.7917e-01,  9.9992e-01,  2.0000e+00],
        [-5.0000e-02, -9.1667e-02,  9.9963e-01,  3.0000e+00],
        [ 3.1250e-03, -4.1667e-02,  9.9979e-01,  4.0000e+00],
        [ 2.8750e-01, -2.0833e-01,  9.9879e-01,  5.0000e+00],
        [ 2.5625e-01, -2.9167e-01,  9.9724e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10

Keypoints:  [[258, 367, 0.9999558, 1], [258, 283, 0.9999573, 2], [303, 213, 0.9970566, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  1.0000,  2.0000],
        [-0.0531, -0.1125,  0.9971,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [233, 208, 0.96220785, 3], [269, 103, 0.99872476, 5], [257, 281, 0.80688983, 5], [260, 82, 0.99903345, 6]]
graph feature:  tensor([[-0.

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9998988, 2], [257, 203, 0.9985098, 3], [277, 200, 0.975576, 4], [377, 203, 0.9988965, 5], [397, 211, 0.9973605, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1969, -0.1542,  0.9985,  3.0000],
        [-0.1344, -0.1667,  0.9756,  4.0000],
        [ 0.1781, -0.1542,  0.9989,  5.0000],
        [ 0.2406, -0.1208,  0.9974,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
ba

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9998932, 2], [281, 203, 0.92824215, 4], [377, 203, 0.9990306, 5], [396, 214, 0.99763715, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1219, -0.1542,  0.9282,  4.0000],
        [ 0.1781, -0.1542,  0.9990,  5.0000],
        [ 0.2375, -0.1083,  0.9976,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 367, 0.99995935, 1], [280, 104, 0.99892336, 5], [277, 82, 0.9958085, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1250, -0.5667,  0.9989,  5.0000],
        [-0.1344, -0.6583,  0.9958,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999242, 2], [304, 218, 0.99979, 3], [321, 230, 0.9997632, 4], [376, 148, 0.99845946, 5], [363, 133, 0.9963586, 6]]
graph feature:  tensor([[-1.9375

Keypoints:  [[258, 367, 0.99995935, 1], [211, 218, 0.9998336, 3], [228, 206, 0.99956197, 4], [257, 283, 0.92663336, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.3406, -0.0917,  0.9998,  3.0000],
        [-0.2875, -0.1417,  0.9996,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999409, 2], [195, 232, 0.9988458, 3], [298, 177, 0.99886465, 5], [319, 179, 0.99864775, 6]]


Keypoints:  [[258, 367, 0.9999596, 1], [283, 207, 0.99973506, 3], [303, 214, 0.9998227, 4], [381, 274, 0.9994066, 5], [393, 292, 0.9863494, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1156, -0.1375,  0.9997,  3.0000],
        [-0.0531, -0.1083,  0.9998,  4.0000],
        [ 0.1906,  0.1417,  0.9994,  5.0000],
        [ 0.2281,  0.2167,  0.9863,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 367, 0.99995923, 1], [184, 252, 0.99953425, 3], [194, 232, 0.96849155, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4250,  0.0500,  0.9995,  3.0000],
        [-0.3938, -0.0333,  0.9685,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99994504, 2], [153, 119, 0.81532735, 3], [159, 133, 0.9933587, 4], [207, 217, 0.9861059, 4], [136, 147, 0.94007397, 5]]
graph feature:  tensor([[-

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99990296, 2], [257, 203, 0.99953806, 3], [278, 203, 0.9999068, 4], [390, 169, 0.934349, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1969, -0.1542,  0.9995,  3.0000],
        [-0.1313, -0.1542,  0.9999,  4.0000],
        [ 0.2188, -0.2958,  0.9343,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 367, 0.9999566, 1], [258, 283, 0.99992764, 2], [388, 276, 0.949657, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.2125,  0.1500,  0.9497,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [304, 218, 0.9996177, 3], [321, 230, 0.9998196, 4]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.9996e-01,  1.0000e+00],
      

Keypoints:  [[258, 367, 0.9999565, 1], [258, 283, 0.9999207, 2], [311, 222, 0.9278436, 3], [394, 307, 0.83521086, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0281, -0.0750,  0.9278,  3.0000],
        [ 0.2313,  0.2792,  0.8352,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999589, 1], [258, 283, 0.9999063, 2], [178, 276, 0.9998419, 3], [179, 255, 

Keypoints:  [[282, 207, 0.9998367, 3], [302, 213, 0.99979264, 4]]
graph feature:  tensor([[-0.1187, -0.1375,  0.9998,  3.0000],
        [-0.0563, -0.1125,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999249, 2], [355, 130, 0.97942114, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.1094, -0.4583,  0.9794,  5.0000]]

Keypoints:  [[258, 367, 0.9999565, 1], [258, 283, 0.99992037, 2], [183, 254, 0.99973375, 3], [191, 235, 0.9997719, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4281,  0.0583,  0.9997,  3.0000],
        [-0.4031, -0.0208,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999571, 1], [258, 283, 0.9999125, 2], [178, 276, 0.99976736, 3], [180, 256

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99992657, 2], [230, 192, 0.9193918, 3], [253, 200, 0.9187733, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2812, -0.2000,  0.9194,  3.0000],
        [-0.2094, -0.1667,  0.9188,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[375, 146, 0.997454, 5], [381, 124, 0.99733704, 6]]
graph feature:  tensor([[ 0.1719, -0.

Keypoints:  [[258, 283, 0.99994564, 2], [193, 236, 0.99853456, 3], [262, 365, 0.888747, 4], [187, 120, 0.9976897, 5], [204, 107, 0.9975097, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.3969, -0.0167,  0.9985,  3.0000],
        [-0.1813,  0.5208,  0.8887,  4.0000],
        [-0.4156, -0.5000,  0.9977,  5.0000],
        [-0.3625, -0.5542,  0.9975,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.99976784, 2], [180, 296, 0.9981287, 3], [172, 278, 0.9921257, 4], [249, 211, 0.99877983, 5], [270, 220, 0.9982755, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9998,  2.0000],
        [-0.4375,  0.2333,  0.9981,  3.0000],
        [-0.4625,  0.1583,  0.9921,  4.0000],
        [-0.2219, -0.1208,  0.9988,  5.0000],
        [-0.1562, -0.0833,  0.9983,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512]

Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.99991035, 2], [183, 254, 0.99966955, 3], [190, 235, 0.99978167, 4], [75, 192, 0.9762463, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4281,  0.0583,  0.9997,  3.0000],
        [-0.4062, -0.0208,  0.9998,  4.0000],
        [-0.7656, -0.2000,  0.9762,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.99994254, 2], [233, 207, 0.9997465, 3], [252, 201, 0.9993358, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2719, -0.1375,  0.9997,  3.0000],
        [-0.2125, -0.1625,  0.9993,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999959, 1], [183, 254, 0.99980706, 3], [191, 235, 0.9998074, 4], [258, 283, 

Keypoints:  [[253, 201, 0.9957411, 4], [336, 146, 0.998992, 5], [355, 135, 0.9967525, 6]]
graph feature:  tensor([[-0.2094, -0.1625,  0.9957,  4.0000],
        [ 0.0500, -0.3917,  0.9990,  5.0000],
        [ 0.1094, -0.4375,  0.9968,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [183, 254, 0.99978167, 3], [191, 234, 0.9998927, 4], [225, 141, 0.9984528, 5], [246, 144, 0.99705064, 6]]
graph feature:  tensor([[-0.193

Keypoints:  [[258, 367, 0.999962, 1]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999124, 2], [178, 275, 0.99948347, 3], [183, 156, 0.9982748, 5], [186, 135, 0.9986638, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1458,  0.9995,  3.0000],
        [-0.4281, -0.3500,  0.9983,  5.0000],
        [-0.4188, -0.4375,  0.9987,  6.0000]], d

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.99994016, 2], [232, 207, 0.999742, 3], [252, 201, 0.9991881, 4], [315, 126, 0.9480123, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2750, -0.1375,  0.9997,  3.0000],
        [-0.2125, -0.1625,  0.9992,  4.0000],
        [-0.0156, -0.4750,  0.9480,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99993

Keypoints:  [[258, 367, 0.99995863, 1], [183, 257, 0.99568945, 3], [262, 281, 0.82793, 4], [191, 237, 0.7755559, 4], [179, 136, 0.99785846, 5], [200, 137, 0.9946925, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4281,  0.0708,  0.9957,  3.0000],
        [-0.1813,  0.1708,  0.8279,  4.0000],
        [-0.4406, -0.4333,  0.9979,  5.0000],
        [-0.3750, -0.4292,  0.9947,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keyp

Keypoints:  [[258, 367, 0.99995816, 1], [257, 283, 0.9998553, 2], [184, 254, 0.9997391, 3], [191, 234, 0.99991953, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969,  0.1792,  0.9999,  2.0000],
        [-0.4250,  0.0583,  0.9997,  3.0000],
        [-0.4031, -0.0250,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999597, 1], [179, 298, 0.9993236, 3], [176, 277, 0.9996624, 4], [230, 195,

Keypoints:  [[258, 367, 0.999959, 1], [253, 200, 0.97443455, 4], [217, 108, 0.9984688, 5], [214, 86, 0.99405247, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2094, -0.1667,  0.9744,  4.0000],
        [-0.3219, -0.5500,  0.9985,  5.0000],
        [-0.3313, -0.6417,  0.9941,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999603, 1], [179, 297, 0.99918646, 3], [175, 278, 0.99942625, 4], [250, 212,

Keypoints:  [[258, 367, 0.999961, 1], [257, 204, 0.90310663, 3], [363, 253, 0.9988513, 5], [383, 261, 0.9951994, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969, -0.1500,  0.9031,  3.0000],
        [ 0.1344,  0.0542,  0.9989,  5.0000],
        [ 0.1969,  0.0875,  0.9952,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.999928, 2], [283, 207, 0.9990482, 3], [356, 130, 0

Keypoints:  [[258, 367, 0.99995863, 1], [192, 234, 0.9995122, 4], [258, 283, 0.7805932, 4], [281, 193, 0.9987243, 5], [303, 191, 0.99811316, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4000, -0.0250,  0.9995,  4.0000],
        [-0.1219, -0.1958,  0.9987,  5.0000],
        [-0.0531, -0.2042,  0.9981,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 282, 0.9999392, 2], [311, 224, 0.9995602, 3], [396, 147,

Keypoints:  [[258, 367, 0.99995613, 1]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[232, 207, 0.9994771, 3], [252, 200, 0.9996828, 4], [346, 124, 0.96938825, 6]]
graph feature:  tensor([[-0.2750, -0.1375,  0.9995,  3.0000],
        [-0.2125, -0.1667,  0.9997,  4.0000],
        [ 0.0813, -0.4833,  0.9694,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
sh

Keypoints:  [[258, 367, 0.9999565, 1], [258, 283, 0.9999281, 2], [281, 207, 0.99972695, 3], [298, 213, 0.9993773, 4], [367, 282, 0.8432304, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1219, -0.1375,  0.9997,  3.0000],
        [-0.0688, -0.1125,  0.9994,  4.0000],
        [ 0.1469,  0.1750,  0.8432,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 367, 0.9999584, 1], [257, 283, 0.9998603, 2], [180, 254, 0.78306943, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969,  0.1792,  0.9999,  2.0000],
        [-0.4375,  0.0583,  0.7831,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999473, 2], [192, 238, 0.7389108, 3]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0

Keypoints:  [[258, 367, 0.99995494, 1], [258, 283, 0.99983263, 2], [232, 198, 0.87187433, 3], [253, 200, 0.99617946, 4], [324, 269, 0.99159324, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9998,  2.0000],
        [-0.2750, -0.1750,  0.8719,  3.0000],
        [-0.2094, -0.1667,  0.9962,  4.0000],
        [ 0.0125,  0.1208,  0.9916,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.

Keypoints:  [[258, 283, 0.99992085, 2], [284, 214, 0.94026244, 3], [342, 325, 0.91628397, 3], [289, 340, 0.82140064, 3]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1125, -0.1083,  0.9403,  3.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99993, 2], [256, 374, 0.8731108, 4], [332, 119, 0.9983765, 5], [316, 105, 0.99451184, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2

Keypoints:  [[258, 367, 0.9999553, 1], [258, 283, 0.99992657, 2], [311, 224, 0.9993326, 3], [326, 238, 0.99976844, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0281, -0.0667,  0.9993,  3.0000],
        [ 0.0188, -0.0083,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.99992263, 2], [304, 218, 0.9992594, 3], [321, 230

Keypoints:  [[258, 367, 0.9999577, 1], [258, 283, 0.9998934, 2], [325, 289, 0.9994356, 5], [334, 310, 0.9968817, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0156,  0.2042,  0.9994,  5.0000],
        [ 0.0437,  0.2917,  0.9969,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999571, 1], [304, 214, 0.9956726, 4], [341, 324, 0.7261019, 6]]
graph featur

Keypoints:  [[258, 283, 0.9998981, 2], [258, 203, 0.9995826, 3], [392, 175, 0.8354439, 3], [278, 203, 0.9999292, 4], [374, 178, 0.9957497, 5], [380, 158, 0.8104618, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1938, -0.1542,  0.9996,  3.0000],
        [-0.1313, -0.1542,  0.9999,  4.0000],
        [ 0.1688, -0.2583,  0.9957,  5.0000],
        [ 0.1875, -0.3417,  0.8105,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypo

Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9998832, 2], [225, 141, 0.999131, 5], [241, 127, 0.99344784, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2969, -0.4125,  0.9991,  5.0000],
        [-0.2469, -0.4708,  0.9934,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9998752, 2], [192, 234, 0.9997243, 4], [182, 257, 0.9564251, 4], [267, 170, 

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9999263, 2], [301, 220, 0.8774277, 3], [412, 189, 0.9976907, 5], [427, 200, 0.99572134, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0594, -0.0833,  0.8774,  3.0000],
        [ 0.2875, -0.2125,  0.9977,  5.0000],
        [ 0.3344, -0.1667,  0.9957,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 283, 0.999938, 2], [232, 207, 0.99975413, 3], [251, 201, 0.9995943, 4], [269, 102, 0.9975909, 5], [274, 81, 0.9980806, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2750, -0.1375,  0.9998,  3.0000],
        [-0.2156, -0.1625,  0.9996,  4.0000],
        [-0.1594, -0.5750,  0.9976,  5.0000],
        [-0.1437, -0.6625,  0.9981,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.999934

Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999449, 2], [185, 221, 0.9190981, 3], [197, 233, 0.98710376, 4], [207, 219, 0.77155083, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4219, -0.0792,  0.9191,  3.0000],
        [-0.3844, -0.0292,  0.9871,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999589, 1], [257, 281, 0.7904794, 2], [180, 254

Keypoints:  [[258, 367, 0.9999573, 1], [281, 206, 0.9998627, 3], [376, 316, 0.7235461, 3], [301, 212, 0.9998442, 4], [347, 300, 0.99423605, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1219, -0.1417,  0.9999,  3.0000],
        [-0.0594, -0.1167,  0.9998,  4.0000],
        [ 0.0844,  0.2500,  0.9942,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99994063, 2], [284, 207,

Keypoints:  [[258, 367, 0.99995875, 1], [311, 223, 0.9994122, 3], [326, 237, 0.99986005, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.0281, -0.0708,  0.9994,  3.0000],
        [ 0.0188, -0.0125,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999211, 2], [305, 218, 0.9996531, 3], [321, 231, 0.9998504, 4], [412, 190, 0.99889934, 5], [427, 174, 0.99735826, 6]]
graph feature:  tensor([[-1.

Keypoints:  [[258, 367, 0.9999584, 1], [232, 207, 0.9997528, 3], [251, 201, 0.9996505, 4], [232, 81, 0.83748144, 4], [242, 104, 0.988896, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2750, -0.1375,  0.9998,  3.0000],
        [-0.2156, -0.1625,  0.9997,  4.0000],
        [-0.2438, -0.5667,  0.9889,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999558, 1], [258, 283, 0.9999316, 2]]
graph feature

Keypoints:  [[258, 283, 0.9999162, 2], [282, 207, 0.9998159, 3], [302, 213, 0.999897, 4], [398, 191, 0.9942703, 5], [410, 172, 0.9985411, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1187, -0.1375,  0.9998,  3.0000],
        [-0.0563, -0.1125,  0.9999,  4.0000],
        [ 0.2438, -0.2042,  0.9943,  5.0000],
        [ 0.2812, -0.2833,  0.9985,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.999959

Keypoints:  [[258, 367, 0.99995875, 1], [257, 283, 0.999856, 2], [178, 274, 0.99826896, 3], [180, 254, 0.99959475, 4], [86, 207, 0.71082187, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1417,  0.9983,  3.0000],
        [-0.4375,  0.0583,  0.9996,  4.0000],
        [-0.7312, -0.1375,  0.7108,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[257, 203, 0.99975723, 3], [277, 203, 0.9998288, 4], [255, 281, 0.9362402, 4], [364, 253, 0.99916136, 5], [379, 269, 0.99801874, 6]]
graph feature:  tensor([[-0.1969, -0.1542,  0.9998,  3.0000],
        [-0.1344, -0.1542,  0.9998,  4.0000],
        [ 0.1375,  0.0542,  0.9992,  5.0000],
        [ 0.1844,  0.1208,  0.9980,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995804, 1], [387, 213, 0.7870989, 1], [282, 20

Keypoints:  [[258, 367, 0.9999584, 1], [257, 283, 0.99991095, 2], [165, 271, 0.9006481, 3], [180, 255, 0.9998672, 4], [112, 183, 0.9931369, 5], [127, 168, 0.9946976, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1969,  0.1792,  0.9999,  2.0000],
        [-0.4844,  0.1292,  0.9006,  3.0000],
        [-0.4375,  0.0625,  0.9999,  4.0000],
        [-0.6500, -0.2375,  0.9931,  5.0000],
        [-0.6031, -0.3000,  0.9947,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])


Keypoints:  [[258, 367, 0.99995947, 1], [311, 223, 0.99953914, 3], [326, 237, 0.99979526, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.0281, -0.0708,  0.9995,  3.0000],
        [ 0.0188, -0.0125,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995875, 1], [290, 200, 0.7526811, 3], [303, 213, 0.9996209, 4], [401, 223, 0.9991467, 5], [421, 233, 0.9982735, 6]]
graph feature:  tensor([[-0.

Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.9999269, 2], [283, 207, 0.99944204, 3], [303, 214, 0.9694102, 4], [355, 130, 0.99851674, 5], [334, 125, 0.9932636, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1156, -0.1375,  0.9994,  3.0000],
        [-0.0531, -0.1083,  0.9694,  4.0000],
        [ 0.1094, -0.4583,  0.9985,  5.0000],
        [ 0.0437, -0.4792,  0.9933,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])

Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.99993086, 2], [310, 223, 0.9987582, 3], [326, 236, 0.99985516, 4], [359, 327, 0.9834952, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0312, -0.0708,  0.9988,  3.0000],
        [ 0.0188, -0.0167,  0.9999,  4.0000],
        [ 0.1219,  0.3625,  0.9835,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[317, 126, 0.998

Keypoints:  [[258, 367, 0.9999597, 1], [258, 283, 0.99991024, 2], [322, 334, 0.98965, 5], [306, 332, 0.9490656, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0063,  0.3917,  0.9897,  5.0000],
        [-0.0437,  0.3833,  0.9491,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9998628, 2], [151, 292, 0.8745122, 3], [107, 206, 0.99872464, 5], [103, 185, 0

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999509, 2], [207, 218, 0.9994248, 4], [141, 144, 0.9992061, 5], [150, 124, 0.99721813, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  1.0000,  2.0000],
        [-0.3531, -0.0917,  0.9994,  4.0000],
        [-0.5594, -0.4000,  0.9992,  5.0000],
        [-0.5312, -0.4833,  0.9972,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[253, 200, 0.998

Keypoints:  [[258, 367, 0.99996066, 1], [183, 255, 0.9996604, 3], [190, 235, 0.9997501, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4281,  0.0625,  0.9997,  3.0000],
        [-0.4062, -0.0208,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.9999471, 2], [318, 246, 0.99857736, 5], [330, 265, 0.99427754, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.99993074, 2], [232, 208, 0.77961814, 3], [295, 111, 0.99908626, 5], [303, 91, 0.99720925, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2750, -0.1333,  0.7796,  3.0000],
        [-0.0781, -0.5375,  0.9991,  5.0000],
        [-0.0531, -0.6208,  0.9972,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9998803, 2], [176, 248, 0.71162933, 3], [191, 233, 0.9412107, 4], [248, 153, 0.9979964, 5], [266, 139, 0.99523973, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4500,  0.0333,  0.7116,  3.0000],
        [-0.4031, -0.0292,  0.9412,  4.0000],
        [-0.2250, -0.3625,  0.9980,  5.0000],
        [-0.1688, -0.4208,  0.9952,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])

Keypoints:  [[258, 367, 0.99995947, 1], [258, 283, 0.9998393, 2], [180, 298, 0.9995696, 3], [176, 277, 0.99978083, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9998,  2.0000],
        [-0.4375,  0.2417,  0.9996,  3.0000],
        [-0.4500,  0.1542,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999603, 1], [281, 206, 0.99986553, 3], [301, 213, 0.9998939, 4], [396, 242

Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.9999218, 2], [432, 259, 0.99378043, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.3500,  0.0792,  0.9938,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.99992895, 2], [305, 204, 0.99953985, 5], [312, 224, 0.99752825, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  

Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.99992, 2]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99996114, 1], [231, 207, 0.9992335, 3], [251, 201, 0.99944574, 4], [215, 109, 0.982227, 5], [254, 108, 0.83920074, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2781, -0.1375,  0.9992,  3.000

Keypoints:  [[258, 367, 0.9999589, 1], [211, 218, 0.9997582, 3], [229, 206, 0.9997534, 4], [257, 282, 0.84987336, 4], [260, 94, 0.9403893, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.3406, -0.0917,  0.9998,  3.0000],
        [-0.2844, -0.1417,  0.9998,  4.0000],
        [-0.1875, -0.6083,  0.9404,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.9999101, 2], [364, 306, 0

Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.999943, 2], [232, 207, 0.99933296, 3], [249, 201, 0.992743, 4], [339, 248, 0.9579748, 5], [349, 267, 0.9942756, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2750, -0.1375,  0.9993,  3.0000],
        [-0.2219, -0.1625,  0.9927,  4.0000],
        [ 0.0594,  0.0333,  0.9580,  5.0000],
        [ 0.0906,  0.1125,  0.9943,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
ba

Keypoints:  [[258, 367, 0.9999609, 1], [233, 207, 0.9997137, 3], [253, 201, 0.9997099, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2719, -0.1375,  0.9997,  3.0000],
        [-0.2094, -0.1625,  0.9997,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995756, 1], [304, 218, 0.9997427, 3], [321, 230, 0.9997209, 4], [257, 281, 0.92081136, 5]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.99

Keypoints:  [[258, 367, 0.9999598, 1], [286, 159, 0.97199833, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1063, -0.3375,  0.9720,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99993086, 2], [302, 213, 0.99547476, 4], [395, 242, 0.9991953, 5], [398, 220, 0.99488384, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0563, -0.1125,  0.9955,  4.0000],
        [ 0.2344, 

Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999206, 2], [207, 217, 0.8976733, 4], [264, 137, 0.9960777, 5], [278, 121, 0.99617493, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.3531, -0.0958,  0.8977,  4.0000],
        [-0.1750, -0.4292,  0.9961,  5.0000],
        [-0.1313, -0.4958,  0.9962,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 367, 0.9999565, 1], [258, 283, 0.999892, 2], [341, 306, 0.9747317, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0656,  0.2750,  0.9747,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999603, 1], [283, 207, 0.99977547, 3], [303, 214, 0.999877, 4], [400, 197, 0.9974214, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999448, 2], [254, 200, 0.97948647, 4], [232, 206, 0.889088, 4], [339, 249, 0.99893326, 5], [361, 245, 0.99761534, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2062, -0.1667,  0.9795,  4.0000],
        [ 0.0594,  0.0375,  0.9989,  5.0000],
        [ 0.1281,  0.0208,  0.9976,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Key

Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999305, 2], [281, 207, 0.9997974, 3], [301, 213, 0.9998784, 4], [396, 241, 0.9370146, 5]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.9996e-01,  1.0000e+00],
        [-1.9375e-01,  1.7917e-01,  9.9993e-01,  2.0000e+00],
        [-1.2188e-01, -1.3750e-01,  9.9980e-01,  3.0000e+00],
        [-5.9375e-02, -1.1250e-01,  9.9988e-01,  4.0000e+00],
        [ 2.3750e-01,  4.1667e-03,  9.3701e-01,  5.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10

Keypoints:  [[258, 367, 0.9999602, 1], [312, 223, 0.9960632, 3], [422, 213, 0.96068925, 5], [426, 228, 0.993505, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.0250, -0.0708,  0.9961,  3.0000],
        [ 0.3187, -0.1125,  0.9607,  5.0000],
        [ 0.3313, -0.0500,  0.9935,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000]], d

Keypoints:  [[179, 276, 0.9998105, 3], [180, 256, 0.9998766, 4], [160, 158, 0.9119006, 4], [173, 145, 0.8674906, 6]]
graph feature:  tensor([[-0.4406,  0.1500,  0.9998,  3.0000],
        [-0.4375,  0.0667,  0.9999,  4.0000],
        [-0.4594, -0.3958,  0.8675,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99990976, 2]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000]], device='cuda:0')
shape of x before attention t

Keypoints:  [[258, 367, 0.9999585, 1], [258, 283, 0.9998691, 2], [179, 256, 0.99500024, 4], [209, 161, 0.9994849, 5], [228, 150, 0.9955076, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4406,  0.0667,  0.9950,  4.0000],
        [-0.3469, -0.3292,  0.9995,  5.0000],
        [-0.2875, -0.3750,  0.9955,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99993503, 2], [232, 207, 0.9997607, 3], [252, 201, 0.9995053, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2750, -0.1375,  0.9998,  3.0000],
        [-0.2125, -0.1625,  0.9995,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9999343, 2], [232, 207, 0.9996774, 3], [252, 201, 

Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.9999199, 2], [304, 218, 0.99958724, 3], [321, 231, 0.9998733, 4]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.9996e-01,  1.0000e+00],
        [-1.9375e-01,  1.7917e-01,  9.9992e-01,  2.0000e+00],
        [-5.0000e-02, -9.1667e-02,  9.9959e-01,  3.0000e+00],
        [ 3.1250e-03, -3.7500e-02,  9.9987e-01,  4.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999132, 2],

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99994063, 2], [233, 207, 0.9997402, 3], [253, 200, 0.99911386, 4], [361, 255, 0.9663971, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2719, -0.1375,  0.9997,  3.0000],
        [-0.2094, -0.1667,  0.9991,  4.0000],
        [ 0.1281,  0.0625,  0.9664,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[179, 298, 0.997

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999472, 2], [232, 206, 0.9997029, 3], [252, 200, 0.9998217, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2750, -0.1417,  0.9997,  3.0000],
        [-0.2125, -0.1667,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[282, 207, 0.9994049, 3], [396, 242, 0.99909043, 5], [402, 222, 0.99834144, 6]]
graph feat

Keypoints:  [[258, 367, 0.9999578, 1], [284, 207, 0.9998048, 3], [303, 214, 0.9998448, 4], [382, 274, 0.9993686, 5], [404, 277, 0.9974482, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1125, -0.1375,  0.9998,  3.0000],
        [-0.0531, -0.1083,  0.9998,  4.0000],
        [ 0.1938,  0.1417,  0.9994,  5.0000],
        [ 0.2625,  0.1542,  0.9974,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99996

Keypoints:  [[258, 367, 0.99995387, 1], [258, 283, 0.9999232, 2], [322, 309, 0.9986327, 5], [343, 313, 0.9964904, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0063,  0.2875,  0.9986,  5.0000],
        [ 0.0719,  0.3042,  0.9965,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.99993527, 2], [346, 170, 0.99521196, 5], [357, 151, 0.9988545, 6]]
graph fea

shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.99992216, 2], [412, 271, 0.9968645, 5], [423, 251, 0.9977319, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.2875,  0.1292,  0.9969,  5.0000],
        [ 0.3219,  0.0458,  0.9977,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [

Keypoints:  [[258, 367, 0.9999597, 1], [258, 283, 0.99987054, 2], [180, 296, 0.9859272, 3], [107, 206, 0.9988054, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4375,  0.2333,  0.9859,  3.0000],
        [-0.6656, -0.1417,  0.9988,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995863, 1], [258, 283, 0.99991834, 2], [179, 275, 0.99977034, 3], [180, 25

Keypoints:  [[268, 365, 0.7421969, 1], [258, 283, 0.9999212, 2], [304, 218, 0.9996439, 3], [321, 230, 0.99985254, 4], [405, 145, 0.94960505, 6]]
graph feature:  tensor([[-1.6250e-01,  5.2083e-01,  7.4220e-01,  1.0000e+00],
        [-1.9375e-01,  1.7917e-01,  9.9992e-01,  2.0000e+00],
        [-5.0000e-02, -9.1667e-02,  9.9964e-01,  3.0000e+00],
        [ 3.1250e-03, -4.1667e-02,  9.9985e-01,  4.0000e+00],
        [ 2.6562e-01, -3.9583e-01,  9.4961e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 1

Keypoints:  [[258, 283, 0.9997687, 2], [180, 297, 0.9991961, 3], [176, 278, 0.9996942, 4], [250, 212, 0.99912304, 5], [270, 220, 0.9977437, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9998,  2.0000],
        [-0.4375,  0.2375,  0.9992,  3.0000],
        [-0.4500,  0.1583,  0.9997,  4.0000],
        [-0.2188, -0.1167,  0.9991,  5.0000],
        [-0.1562, -0.0833,  0.9977,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999

Keypoints:  [[258, 283, 0.99992275, 2], [194, 235, 0.9998615, 3], [206, 218, 0.9995414, 4]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.3938, -0.0208,  0.9999,  3.0000],
        [-0.3563, -0.0917,  0.9995,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995804, 1], [258, 283, 0.9999155, 2], [183, 254, 0.99973935, 3], [191, 235, 0.9998367, 4], [124, 159, 0.79744303, 5]]
graph feature:  tensor([[-0.

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99990165, 2], [377, 203, 0.99878246, 5], [399, 203, 0.99816966, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.1781, -0.1542,  0.9988,  5.0000],
        [ 0.2469, -0.1542,  0.9982,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 282, 0.99995947, 2], [338, 157, 0.8922253, 6]]
graph fe

Keypoints:  [[258, 367, 0.9999596, 1], [194, 235, 0.99983776, 3], [206, 218, 0.9996092, 4], [235, 117, 0.7377373, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.3938, -0.0208,  0.9998,  3.0000],
        [-0.3563, -0.0917,  0.9996,  4.0000],
        [-0.2656, -0.5125,  0.7377,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99996066, 1], [194, 235, 0.9998222, 3], [206, 218, 0.99962604, 4], [239, 125

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99996006, 2], [321, 170, 0.99848866, 5], [342, 163, 0.9975974, 6]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.9996e-01,  1.0000e+00],
        [-1.9375e-01,  1.7917e-01,  9.9996e-01,  2.0000e+00],
        [ 3.1250e-03, -2.9167e-01,  9.9849e-01,  5.0000e+00],
        [ 6.8750e-02, -3.2083e-01,  9.9760e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995875, 1

Keypoints:  [[258, 283, 0.9999263, 2], [355, 311, 0.73659724, 3], [305, 214, 0.9956464, 4], [335, 327, 0.91693866, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.1094,  0.2958,  0.7366,  3.0000],
        [-0.0469, -0.1083,  0.9956,  4.0000],
        [ 0.0469,  0.3625,  0.9169,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999093, 2]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000]],

Keypoints:  [[258, 283, 0.9999062, 2], [257, 203, 0.9984932, 3], [349, 134, 0.9991159, 5], [370, 126, 0.997182, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1969, -0.1542,  0.9985,  3.0000],
        [ 0.0906, -0.4417,  0.9991,  5.0000],
        [ 0.1562, -0.4750,  0.9972,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.99986064, 2], [250, 154, 0.997255, 5], [263, 136, 0

Keypoints:  [[258, 367, 0.99995995, 1], [304, 213, 0.9702326, 4], [325, 98, 0.98083043, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.0500, -0.1125,  0.9702,  4.0000],
        [ 0.0156, -0.5917,  0.9808,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995744, 1], [258, 283, 0.9999155, 2], [375, 321, 0.99813265, 5], [367, 336, 0.9855479, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.

Keypoints:  [[258, 367, 0.9999577, 1], [284, 207, 0.9997638, 3], [303, 214, 0.9998758, 4], [358, 317, 0.99364, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1125, -0.1375,  0.9998,  3.0000],
        [-0.0531, -0.1083,  0.9999,  4.0000],
        [ 0.1187,  0.3208,  0.9936,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999273, 2], [310, 223, 0.9984206, 3]]
graph feature

Keypoints:  [[258, 282, 0.99995923, 2], [211, 218, 0.99984896, 3], [228, 206, 0.99964094, 4], [327, 196, 0.9911419, 5]]
graph feature:  tensor([[-0.1938,  0.1750,  1.0000,  2.0000],
        [-0.3406, -0.0917,  0.9998,  3.0000],
        [-0.2875, -0.1417,  0.9996,  4.0000],
        [ 0.0219, -0.1833,  0.9911,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99996054, 1], [183, 255, 0.99947435, 3], [190, 235, 0.9997631, 4], [106, 1

Keypoints:  [[258, 367, 0.99996066, 1], [324, 97, 0.9984681, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [ 0.0125, -0.5958,  0.9985,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999062, 2], [290, 207, 0.9933252, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0938, -0.1375,  0.9933,  6.0000]], d

Keypoints:  [[258, 367, 0.9999577, 1], [258, 283, 0.99994004, 2], [284, 207, 0.99976856, 3], [303, 214, 0.99990296, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1125, -0.1375,  0.9998,  3.0000],
        [-0.0531, -0.1083,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999435, 2], [233, 206, 0.9984302, 3], [255, 199

Keypoints:  [[258, 367, 0.99995923, 1], [298, 217, 0.9806475, 3], [400, 294, 0.99892235, 5], [420, 287, 0.9973948, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.0688, -0.0958,  0.9806,  3.0000],
        [ 0.2500,  0.2250,  0.9989,  5.0000],
        [ 0.3125,  0.1958,  0.9974,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995995, 1], [178, 276, 0.9998599, 3], [180, 255, 0.99978715, 4], [190, 13

Keypoints:  [[258, 283, 0.9998727, 2], [233, 172, 0.998847, 5], [255, 175, 0.9981699, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2719, -0.2833,  0.9988,  5.0000],
        [-0.2031, -0.2708,  0.9982,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99996066, 1], [232, 171, 0.9969469, 5], [244, 153, 0.9970523, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.2750, -0.2

Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999409, 2], [305, 214, 0.99773467, 4], [382, 274, 0.9985135, 5], [403, 278, 0.997532, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0469, -0.1083,  0.9977,  4.0000],
        [ 0.1938,  0.1417,  0.9985,  5.0000],
        [ 0.2594,  0.1583,  0.9975,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 367, 0.999961, 1], [278, 203, 0.9997596, 4], [373, 229, 0.99724066, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1313, -0.1542,  0.9998,  4.0000],
        [ 0.1656, -0.0458,  0.9972,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995565, 1], [258, 283, 0.9999262, 2], [315, 223, 0.9906031, 4], [366, 308, 0.9988533, 5], [358, 323, 0.9933389, 6]]
graph feature:  tensor([[-0.193

Keypoints:  [[258, 283, 0.9999434, 2], [233, 206, 0.99787223, 3], [264, 363, 0.9711712, 4], [351, 197, 0.9984717, 5], [373, 191, 0.99791986, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.2719, -0.1417,  0.9979,  3.0000],
        [-0.1750,  0.5125,  0.9712,  4.0000],
        [ 0.0969, -0.1792,  0.9985,  5.0000],
        [ 0.1656, -0.2042,  0.9979,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999

Keypoints:  [[258, 367, 0.99996006, 1], [304, 218, 0.9997024, 3], [320, 230, 0.99977213, 4], [377, 150, 0.9695739, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.0500, -0.0917,  0.9997,  3.0000],
        [ 0.0000, -0.0417,  0.9998,  4.0000],
        [ 0.1781, -0.3750,  0.9696,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.9999517, 2], [207, 218, 0.9824356, 4]]
graph feat

Keypoints:  [[258, 367, 0.9999596, 1], [304, 218, 0.9997787, 3], [321, 230, 0.999824, 4], [377, 148, 0.9976005, 5], [385, 128, 0.99847513, 6]]
graph feature:  tensor([[-1.9375e-01,  5.2917e-01,  9.9996e-01,  1.0000e+00],
        [-5.0000e-02, -9.1667e-02,  9.9978e-01,  3.0000e+00],
        [ 3.1250e-03, -4.1667e-02,  9.9982e-01,  4.0000e+00],
        [ 1.7813e-01, -3.8333e-01,  9.9760e-01,  5.0000e+00],
        [ 2.0312e-01, -4.6667e-01,  9.9848e-01,  6.0000e+00]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10,

Keypoints:  [[258, 283, 0.99993336, 2], [283, 207, 0.99974173, 3], [303, 214, 0.999796, 4], [416, 259, 0.99724567, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1156, -0.1375,  0.9997,  3.0000],
        [-0.0531, -0.1083,  0.9998,  4.0000],
        [ 0.3000,  0.0792,  0.9972,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[257, 283, 0.9998789, 2], [178, 276, 0.9998919, 3], [179, 255, 0.9995696, 4], [152, 156,

Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999534, 2], [326, 196, 0.99933213, 5], [344, 209, 0.9980634, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  1.0000,  2.0000],
        [ 0.0188, -0.1833,  0.9993,  5.0000],
        [ 0.0750, -0.1292,  0.9981,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 283, 0.9999577, 2], [211, 218, 0.9998468, 3], [228, 206, 0.9996909, 4], [321, 170, 

Keypoints:  [[258, 203, 0.9995005, 3], [278, 203, 0.99986506, 4]]
graph feature:  tensor([[-0.1938, -0.1542,  0.9995,  3.0000],
        [-0.1313, -0.1542,  0.9999,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999596, 1], [178, 276, 0.99982005, 3], [180, 256, 0.99969006, 4], [230, 170, 0.9861404, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.4437,  0.1500,  0.9998,  3.0000],
        [-0.4375,  

Keypoints:  [[258, 283, 0.9999285, 2], [294, 111, 0.9987651, 5], [297, 88, 0.9975352, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0813, -0.5375,  0.9988,  5.0000],
        [-0.0719, -0.6333,  0.9975,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999614, 1], [233, 207, 0.99968994, 3], [252, 200, 0.9997317, 4], [352, 196, 0.99659663, 5]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.00

Keypoints:  [[258, 367, 0.9999578, 1], [258, 283, 0.99991655, 2], [322, 232, 0.9998363, 4], [436, 229, 0.99237084, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [ 0.0063, -0.0333,  0.9998,  4.0000],
        [ 0.3625, -0.0458,  0.9924,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999424, 2], [282, 207, 0.99984336, 3], [302, 213

Keypoints:  [[258, 283, 0.99989796, 2], [257, 203, 0.99965465, 3], [278, 203, 0.9998816, 4], [372, 178, 0.9736937, 5]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.1969, -0.1542,  0.9997,  3.0000],
        [-0.1313, -0.1542,  0.9999,  4.0000],
        [ 0.1625, -0.2583,  0.9737,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999579, 1], [258, 283, 0.9998845, 2], [192, 234, 0.9996778, 4], [181, 259,

shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99989474, 2], [178, 276, 0.9998172, 3], [146, 181, 0.84684706, 3], [180, 255, 0.9997788, 4], [116, 163, 0.9946483, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1500,  0.9998,  3.0000],
        [-0.4375,  0.0625,  0.9998,  4.0000],
        [-0.6375, -0.3208,  0.9946,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
sha

Keypoints:  [[258, 367, 0.9999558, 1], [258, 283, 0.99984896, 2], [255, 203, 0.98148376, 3]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9998,  2.0000],
        [-0.2031, -0.1542,  0.9815,  3.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999589, 1], [183, 254, 0.99982613, 3], [191, 235, 0.99989974, 4], [149, 144, 0.9978046, 5], [140, 124, 0.99715745, 6]]
graph feature:  tensor([[-0

Keypoints:  [[258, 283, 0.99990845, 2], [178, 276, 0.9998116, 3], [179, 256, 0.99985003, 4], [268, 212, 0.9991346, 5], [289, 203, 0.997009, 6]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.4437,  0.1500,  0.9998,  3.0000],
        [-0.4406,  0.0667,  0.9999,  4.0000],
        [-0.1625, -0.1167,  0.9991,  5.0000],
        [-0.0969, -0.1542,  0.9970,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999

Keypoints:  [[258, 283, 0.99991786, 2], [193, 231, 0.99745053, 3], [257, 377, 0.878238, 4], [304, 231, 0.9978387, 5]]
graph feature:  tensor([[-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.3969, -0.0375,  0.9975,  3.0000],
        [-0.1969,  0.5708,  0.8782,  4.0000],
        [-0.0500, -0.0375,  0.9978,  5.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [193, 237, 0.9864833, 3], [297, 177, 0.99881077, 5], [319, 174

Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.99990964, 2], [311, 223, 0.9994204, 3], [326, 238, 0.99977976, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0281, -0.0708,  0.9994,  3.0000],
        [ 0.0188, -0.0083,  0.9998,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999583, 1], [258, 283, 0.9999386, 2], [301, 220, 0.77162695, 3]]
graph fea

shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999385, 2], [305, 149, 0.9609406, 6]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  0.9999,  2.0000],
        [-0.0469, -0.3792,  0.9609,  6.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.999958

Keypoints:  [[258, 367, 0.99995816, 1], [258, 283, 0.9999583, 2], [211, 218, 0.9998518, 3], [228, 206, 0.9996729, 4]]
graph feature:  tensor([[-0.1938,  0.5292,  1.0000,  1.0000],
        [-0.1938,  0.1792,  1.0000,  2.0000],
        [-0.3406, -0.0917,  0.9999,  3.0000],
        [-0.2875, -0.1417,  0.9997,  4.0000]], device='cuda:0')
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
shape of x before attention torch.Size([1, 6, 512])
shape of x after attn torch.Size([1, 6, 512])
batch norm input torch.Size([1, 6, 512])
shape of x before attention torch.Size([1, 10, 512])
shape of x after attn torch.Size([1, 10, 512])
batch norm input torch.Size([1, 10, 512])
Keypoints:  [[258, 367, 0.9999584, 1], [258, 283, 0.9999597, 2], [211, 218, 0.99988246, 3], [228, 206,

KeyboardInterrupt: 