In [1]:
import os, json, cv2, numpy as np, matplotlib.pyplot as plt
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import torch
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as F

In [2]:
!git clone https://github.com/pytorch/vision.git
cwd = os.getcwd()
os.chdir('vision/references/detection')
import transforms, utils, engine, train
from utils import collate_fn
from engine import train_one_epoch, evaluate
os.chdir(cwd)

fatal: destination path 'vision' already exists and is not an empty directory.


In [3]:
import cv2
import os
import numpy as np

def load_images_from_folder(folder):
    images = []
    i=0
    filenames = []
    for filename in os.listdir(folder):
      i+=1
      filenames.append(filename)
      if(i%20==0):
        print('#',end = '')
      img = cv2.imread(os.path.join(folder,filename))
      if img is not None:
          images.append(img)
    print('Done loading images :)')
    return images,filenames

images,filenames = load_images_from_folder('baxter_real_dataset/Dataset/img')
idx = [int(f.split('.')[0]) for f in filenames]
key_points = np.loadtxt('baxter_real_dataset/Dataset/pixels.txt')

key_points_ = np.copy(key_points)
for i in range(len(images)):
  j = idx[i]
  key_points_[3*i:3*i+2,:] = key_points[3*j:3*j+2,:]

key_points = key_points_

################################Done loading images :)


In [4]:
def visualize(cv2_img,kp):
  img = np.copy(cv2_img)
  kp = np.round(kp)
  kp = kp.astype(np.int32)
  for i in range(4):
    x1 = kp[0,i]
    y1 = kp[1,i]
    x2 = kp[0,i+1]
    y2 = kp[1,i+1]
    cv2.line(img,(x1,y1),(x2,y2),(0,255,0),5)
  cv2.imshow('',img)
  cv2.waitKey(0)
  cv2.destroyAllWindows()

n = 345
kp = key_points[3*n:3*n+2,:]
visualize(images[n],kp)

In [5]:
class ClassDataset(Dataset):
    def __init__(self):                
        self.dataset = True
    
    def __getitem__(self, idx):
      #print(idx)
      keypoints_original = key_points[3*idx:3*idx+2,:]
      keypoints_original = keypoints_original.T
      #print("############: "+str(idx))
      min_ = np.min(keypoints_original,0)-5
      max_ = np.max(keypoints_original,0)+5
      bboxes = [[min_[0],min_[1],max_[0],max_[1]]]
      keypoints_original = np.hstack([keypoints_original,np.ones((5,1))])
      keypoints_original = np.array([keypoints_original])

      bboxes = torch.as_tensor(bboxes, dtype=torch.float32)
      target = {}
      target["boxes"] = bboxes
      target["labels"] = torch.as_tensor([1]) # all objects are glue tubes
      target["image_id"] = torch.tensor([i])
      target["area"] = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
      target["iscrowd"] = torch.zeros(len(bboxes), dtype=torch.int64)
      target["keypoints"] = torch.as_tensor(keypoints_original, dtype=torch.float32)     
      img = F.to_tensor(images[idx])
      return img,target

    def __len__(self):
        return 650

In [6]:
def get_model(num_keypoints, weights_path=None):
    
    anchor_generator = AnchorGenerator(sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.25, 0.5, 0.75, 1.0, 2.0, 3.0, 4.0))
    model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=False,
                                                                   pretrained_backbone=True,
                                                                   num_keypoints=num_keypoints,
                                                                   num_classes = 2, # Background is the first class, object is the second class
                                                                   rpn_anchor_generator=anchor_generator)

    if weights_path:
        state_dict = torch.load(weights_path)
        model.load_state_dict(state_dict)        
        
    return model

In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = get_model(num_keypoints = 5)
model.to(device)
print(device)
dataset_train = ClassDataset()
data_loader = DataLoader(dataset_train, batch_size=3, shuffle=True, collate_fn=collate_fn)

cuda


In [9]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.3)
num_epochs = 5

loss_list = []
epoch = 0
for i in range(num_epochs):
  train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=20)
  lr_scheduler.step()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [  0/217]  eta: 0:03:12  lr: 0.000006  loss: 9.4957 (9.4957)  loss_classifier: 0.7402 (0.7402)  loss_box_reg: 0.0004 (0.0004)  loss_keypoint: 8.0554 (8.0554)  loss_objectness: 0.6942 (0.6942)  loss_rpn_box_reg: 0.0055 (0.0055)  time: 0.8882  data: 0.1180  max mem: 3750
Epoch: [0]  [ 20/217]  eta: 0:01:44  lr: 0.000098  loss: 9.3491 (9.3336)  loss_classifier: 0.6812 (0.6693)  loss_box_reg: 0.0003 (0.0003)  loss_keypoint: 7.9606 (7.9648)  loss_objectness: 0.6928 (0.6930)  loss_rpn_box_reg: 0.0058 (0.0061)  time: 0.5115  data: 0.0063  max mem: 4079
Epoch: [0]  [ 40/217]  eta: 0:01:32  lr: 0.000191  loss: 8.3288 (8.8331)  loss_classifier: 0.2463 (0.4712)  loss_box_reg: 0.0006 (0.0007)  loss_keypoint: 7.4200 (7.6663)  loss_objectness: 0.6870 (0.6890)  loss_rpn_box_reg: 0.0043 (0.0058)  time: 0.5119  data: 0.0064  max mem: 4079
Epoch: [0]  [ 60/217]  eta: 0:01:21  lr: 0.000283  loss: 6.5625 (8.1281)  loss_classifier: 0.0471 (0.3331)  loss_box_reg: 0.0045 (0.0030)  loss_keypoint: 

Epoch: [0]  [100/217]  eta: 0:01:10  lr: 0.000468  loss: 2.1843 (2.2898)  loss_classifier: 0.0373 (0.0386)  loss_box_reg: 0.0679 (0.0696)  loss_keypoint: 2.0462 (2.1653)  loss_objectness: 0.0107 (0.0117)  loss_rpn_box_reg: 0.0046 (0.0045)  time: 0.6137  data: 0.0072  max mem: 4079
Epoch: [0]  [120/217]  eta: 0:00:58  lr: 0.000561  loss: 2.1516 (2.2774)  loss_classifier: 0.0394 (0.0390)  loss_box_reg: 0.0658 (0.0694)  loss_keypoint: 2.0077 (2.1531)  loss_objectness: 0.0089 (0.0114)  loss_rpn_box_reg: 0.0043 (0.0046)  time: 0.6096  data: 0.0068  max mem: 4079
Epoch: [0]  [140/217]  eta: 0:00:46  lr: 0.000653  loss: 2.2184 (2.2779)  loss_classifier: 0.0361 (0.0388)  loss_box_reg: 0.0694 (0.0689)  loss_keypoint: 2.0806 (2.1543)  loss_objectness: 0.0100 (0.0114)  loss_rpn_box_reg: 0.0039 (0.0046)  time: 0.6189  data: 0.0073  max mem: 4079
Epoch: [0]  [160/217]  eta: 0:00:34  lr: 0.000746  loss: 2.2923 (2.2829)  loss_classifier: 0.0386 (0.0389)  loss_box_reg: 0.0666 (0.0687)  loss_keypoint: 

Epoch: [0]  [200/217]  eta: 0:00:10  lr: 0.000931  loss: 1.9102 (1.7991)  loss_classifier: 0.0288 (0.0294)  loss_box_reg: 0.0563 (0.0547)  loss_keypoint: 1.8128 (1.7054)  loss_objectness: 0.0052 (0.0059)  loss_rpn_box_reg: 0.0027 (0.0036)  time: 0.6183  data: 0.0068  max mem: 4079
Epoch: [0]  [216/217]  eta: 0:00:00  lr: 0.001000  loss: 1.9389 (1.8233)  loss_classifier: 0.0292 (0.0296)  loss_box_reg: 0.0522 (0.0545)  loss_keypoint: 1.8358 (1.7297)  loss_objectness: 0.0043 (0.0059)  loss_rpn_box_reg: 0.0030 (0.0036)  time: 0.5975  data: 0.0064  max mem: 4079
Epoch: [0] Total time: 0:02:13 (0.6131 s / it)


In [10]:
torch.save(model,'real_keypoint.pt')
os.getcwd()

'/home/cs/Downloads/keypoint rcnn'

#**Validation**

In [11]:
model = torch.load('real_keypoint.pt')
model.to(device)

KeypointRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(640, 672, 704, 736, 768, 800), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, 

In [18]:
n=439
img = images[n]
img = F.to_tensor(img).to(device)
model.eval()
output = model([img])

scores = output[0]['scores'].detach().cpu().numpy()
np.where(scores > 0.6)[0].tolist()

kp = output[0]['keypoints'][np.where(scores > 0.01)[0].tolist()].detach().cpu().numpy().astype(np.int32)
kp = kp[0,:,0:2]

img = images[n]
visualize(img,np.array(kp).T)

In [26]:
img = cv2.imread('/home/cs/Downloads/keypoint rcnn/baxter_real_dataset/Dataset/test1.jpg')
img = F.to_tensor(img).to(device)
model.eval()
output = model([img])

scores = output[0]['scores'].detach().cpu().numpy()
np.where(scores > 0.6)[0].tolist()

kp = output[0]['keypoints'][np.where(scores > 0.7)[0].tolist()].detach().cpu().numpy().astype(np.int32)
kp = kp[0,:,0:2]

img = cv2.imread('/home/cs/Downloads/keypoint rcnn/baxter_real_dataset/Dataset/test1.jpg')
visualize(img,np.array(kp).T)

In [27]:
img = cv2.imread('/home/cs/Downloads/keypoint rcnn/baxter_real_dataset/Dataset/test2.png')
img = F.to_tensor(img).to(device)
model.eval()
output = model([img])

scores = output[0]['scores'].detach().cpu().numpy()
np.where(scores > 0.6)[0].tolist()

kp = output[0]['keypoints'][np.where(scores > 0.7)[0].tolist()].detach().cpu().numpy().astype(np.int32)
kp = kp[0,:,0:2]

img = cv2.imread('/home/cs/Downloads/keypoint rcnn/baxter_real_dataset/Dataset/test2.png')
visualize(img,np.array(kp).T)