In [None]:
import zipfile
from google.colab import drive

drive.mount('/content/drive')

# Unzip training data
#zip_ref = zipfile.ZipFile('/content/drive/Shareddrives/OpenMonkeyChallenge/train.zip', 'r')
#zip_ref.extractall('/content/data')
#zip_ref.close()

# Unzip validation data
zip_ref = zipfile.ZipFile('/content/drive/Shareddrives/OpenMonkeyChallenge/val.zip', 'r')
zip_ref.extractall('/content/data')
zip_ref.close()

# Unzip testing data
#zip_ref = zipfile.ZipFile('/content/drive/MyDrive/Colab/test.zip', 'r')
#zip_ref.extractall('/content/data')
#zip_ref.close()

In [None]:
import json
from PIL import Image
from torch.utils.data import Dataset
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as f
import matplotlib.pyplot as plt

cuda = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
input_size = 368
output_size = input_size // 8
species = ['Rhesus_macaque', 'Olive_baboon', 'Gibbon', 'Golden_lion_tamarin', 'Common_marmoset', 'Bonobo', 'Siamang', 'Crab-eating_macaque', 'Vervet_monkey', 'Orangutan', 'Gorilla', 'Chacma_baboon', 'Chimpanzee', 'Golden_snub-nosed_monkey', 'Hamadryas_baboon', 'Cotton-top_tamarin', 'Proboscis_monkey', 'Barbary_macaque', 'Dusky_leaf_monkey', 'Squirrel_monkey', 'Emperor_tamarin', 'Tufted_capuchin', 'Mandrill', 'Lion-tailed_macaque', 'Formosan_rock_macaque', 'Japanese_macaque']
num_species = len(species)
apes = ['Gibbon', 'Siamang', 'Bonobo', 'Orangutan', 'Gorilla', 'Chimpanzee']
new_world = ['Golden_lion_tamarin', 'Common_marmoset', 'Emperor_tamarin', 'Tufted_capuchin', 'Cotton-top_tamarin', 'Squirrel_monkey']
old_world = ['Rhesus_macaque', 'Olive_baboon', 'Crab-eating_macaque', 'Vervet_monkey', 'Chacma_baboon', 'Golden_snub-nosed_monkey', 'Hamadryas_baboon', 'Proboscis_monkey', 'Barbary_macaque', 'Dusky_leaf_monkey', 'Mandrill', 'Lion-tailed_macaque', 'Formosan_rock_macaque', 'Japanese_macaque']

class OpenMonkeyDataset(Dataset):
    def __init__(self, images_root, annotations_path, input_size, output_size, c = None):
        super().__init__()
        self.images_root = images_root
        file = open(annotations_path)
        self.annotations = json.load(file)
        file.close()
        if c == 'ape':
          self.annotations['data'] = [self.annotations['data'][i] for i in range(len(self.annotations['data'])) if self.annotations['data'][i]['species'] in apes]
        elif c == 'new_world':
          self.annotations['data'] = [self.annotations['data'][i] for i in range(len(self.annotations['data'])) if self.annotations['data'][i]['species'] in new_world]
        elif c == 'old_world':
          self.annotations['data'] = [self.annotations['data'][i] for i in range(len(self.annotations['data'])) if self.annotations['data'][i]['species'] in old_world]
        self.convert = transforms.ToTensor()
        self.normalize = transforms.Normalize((0.4954, 0.4718, 0.4286), (0.2471, 0.2424, 0.2512))
        self.resize_in = transforms.Resize((input_size, input_size))
        self.resize_out = transforms.Resize((output_size, output_size))
        self.gaussian = transforms.GaussianBlur((51, 51), 9.0)

    def __len__(self):
        return len(self.annotations['data'])

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        cuda = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        # Create images
        image = Image.open(self.images_root + self.annotations['data'][idx]['file'])
        image = self.convert(image).to(cuda)
        c, h, w = image.size()
        image = self.normalize(image)
        bbox = self.annotations['data'][idx]['bbox']
        image = self.resize_in(image[:, bbox[1] : bbox[1] + bbox[3], bbox[0] : bbox[0] + bbox[2]])

        # Create ground truth belief images
        gt_belief_images = torch.zeros((18, 1, h, w), device = cuda)
        labels = self.annotations['data'][idx]['landmarks']
        for j in range(17):
            gt_belief_images[j, :, labels[2 * j + 1] - 1, labels[2 * j] - 1] = 275.0
        gt_belief_images = self.gaussian(gt_belief_images)[:, 0, :, :]
        gt_belief_images = self.resize_out(gt_belief_images[:, bbox[1] : bbox[1] + bbox[3], bbox[0] : bbox[0] + bbox[2]])
        gt_belief_images[17, :, :] = f.threshold(1.0 - torch.sum(gt_belief_images[0:17, :, :], 0), 0.0, 0.0)
        return (image, gt_belief_images)

    def get_original(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        cuda = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        image = Image.open(self.images_root + self.annotations['data'][idx]['file'])
        image = self.convert(image).to(cuda)
        landmarks = torch.tensor(self.annotations['data'][idx]['landmarks'], dtype = torch.float)
        return (image, landmarks)

class Stage1Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(3, 96, 9, padding = 'same')
        self.conv2 = nn.Conv2d(96, 256, 9, padding = 'same')
        self.conv3 = nn.Conv2d(256, 384, 9, padding = 'same')
        self.conv4 = nn.Conv2d(384, 384, 5, padding = 'same')
        self.conv5 = nn.Conv2d(384, 256, 9, padding = 'same')
        self.conv6 = nn.Conv2d(256, 256, 1, padding = 'same')
        self.conv7 = nn.Conv2d(256, 18, 1, padding = 'same')

    def forward(self, x):
        x = self.pool(f.relu(self.conv1(x)))
        x = self.pool(f.relu(self.conv2(x)))
        x = self.pool(f.relu(self.conv3(x)))
        x = f.relu(self.conv4(x))
        x = f.relu(self.conv5(x))
        x = f.relu(self.conv6(x))
        x = self.conv7(x)
        return x

class StageTNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(3, 96, 9, padding = 'same')
        self.conv2 = nn.Conv2d(96, 256, 9, padding = 'same')
        self.conv3 = nn.Conv2d(256, 384, 9, padding = 'same')
        self.conv4 = nn.Conv2d(384, 384, 5, padding = 'same')
        self.conv5 = nn.Conv2d(402, 402, 11, padding = 'same')
        self.conv6 = nn.Conv2d(402, 256, 11, padding = 'same')
        self.conv7 = nn.Conv2d(256, 256, 11, padding = 'same')
        self.conv8 = nn.Conv2d(256, 256, 1, padding = 'same')
        self.conv9 = nn.Conv2d(256, 18, 1, padding = 'same')

    def forward(self, x, belief_images):
        x = self.pool(f.relu(self.conv1(x)))
        x = self.pool(f.relu(self.conv2(x)))
        x = self.pool(f.relu(self.conv3(x)))
        x = f.relu(self.conv4(x))
        x = torch.cat((x, belief_images.detach()), 0 if len(belief_images.size()) == 3 else 1)
        x = f.relu(self.conv5(x))
        x = f.relu(self.conv6(x))
        x = f.relu(self.conv7(x))
        x = f.relu(self.conv8(x))
        x = self.conv9(x)
        return x

**TRAINING**

In [None]:
trainset = OpenMonkeyDataset('/content/data/train/', '/content/drive/Shareddrives/OpenMonkeyChallenge/train_annotation.json', input_size, output_size, 'old_world')
trainloader = DataLoader(trainset, batch_size = 16, shuffle = True, num_workers = 0)
valset = OpenMonkeyDataset('/content/data/val/', '/content/drive/Shareddrives/OpenMonkeyChallenge/val_annotation.json', input_size, output_size, 'old_world')
valloader = DataLoader(valset, batch_size = 16, shuffle = True, num_workers = 0)

stage1 = Stage1Net().to(cuda)
optimizer1 = torch.optim.SGD(stage1.parameters(), lr = 1e-7, momentum = 0.9)
stage2 = StageTNet().to(cuda)
optimizer2 = torch.optim.SGD(stage2.parameters(), lr = 1e-7, momentum = 0.9)
stage3 = StageTNet().to(cuda)
optimizer3 = torch.optim.SGD(stage3.parameters(), lr = 1e-7, momentum = 0.9)
criterion = nn.MSELoss(reduction = 'sum')
stage1.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/old_world_stage1.pth'))
stage2.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/old_world_stage2.pth'))
stage3.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/old_world_stage3.pth'))
for epoch in range(100):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        images, gt_belief_images = data

        # Stage 1 forward
        optimizer1.zero_grad()
        belief_images = stage1.forward(images)
        loss1 = criterion(belief_images, gt_belief_images)

        # Stage 2 forward
        optimizer2.zero_grad()
        belief_images = stage2.forward(images, belief_images)
        loss2 = criterion(belief_images, gt_belief_images)

        # Stage 3 forward
        optimizer3.zero_grad()
        belief_images = stage3.forward(images, belief_images)
        loss3 = criterion(belief_images, gt_belief_images)

        # All stages backward + optimize
        losses = loss1 + loss2 + loss3
        losses.backward()
        running_loss += losses.item()
        optimizer1.step()
        optimizer2.step()
        optimizer3.step()

        # Print statistics
        if i % 100 == 99:
            print("%d, %d, %f" %(epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
            plt.imshow(belief_images[0][0].detach().cpu(), vmin = 0.0, vmax = 1.0)
            plt.show()
            plt.imshow(gt_belief_images[0][0].cpu())
            plt.show()
            plt.imshow(belief_images[0][17].detach().cpu(), vmin = 0.0, vmax = 1.0)
            plt.show()
            plt.imshow(gt_belief_images[0][17].cpu())
            plt.show()

    # Validation
    torch.save(stage1.state_dict(), '/content/drive/Shareddrives/OpenMonkeyChallenge/' + str(epoch) + 'old_world_stage1.pth')
    torch.save(stage2.state_dict(), '/content/drive/Shareddrives/OpenMonkeyChallenge/' + str(epoch) + 'old_world_stage2.pth')
    torch.save(stage3.state_dict(), '/content/drive/Shareddrives/OpenMonkeyChallenge/' + str(epoch) + 'old_world_stage3.pth')
    with torch.no_grad():
        running_loss = 0.0
        for i, data in enumerate(valloader, 0):
            images, gt_belief_images = data
            belief_images = stage1(images)
            loss1 = criterion(belief_images, gt_belief_images)
            belief_images = stage2(images, belief_images)
            loss2 = criterion(belief_images, gt_belief_images)
            belief_images = stage3(images, belief_images)
            loss3 = criterion(belief_images, gt_belief_images)
            losses = loss1 + loss2 + loss3
            running_loss += losses.item()
    print("%d, %f" %(epoch + 1, running_loss / i))

**TESTING**

In [None]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights

valset = OpenMonkeyDataset('/content/data/val/', '/content/drive/Shareddrives/OpenMonkeyChallenge/val_annotation.json', input_size, output_size)
detector = fasterrcnn_resnet50_fpn().to(cuda)
in_features = detector.roi_heads.box_predictor.cls_score.in_features
detector.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_species).to(cuda)
detector.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/detector.pth'))

stage1 = Stage1Net().to(cuda)
stage2 = StageTNet().to(cuda)
stage3 = StageTNet().to(cuda)
stage1.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/base_stage1.pth'))
stage2.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/base_stage2.pth'))
stage3.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/base_stage3.pth'))

stage1.eval()
stage2.eval()
stage3.eval()
detector.eval()
normalize = transforms.Normalize((0.4954, 0.4718, 0.4286), (0.2471, 0.2424, 0.2512))
mpjpe = torch.zeros((valset.__len__(), 17))
with torch.no_grad():
    for i in range(valset.__len__()): # Iterate over samples
      bbox_gt = valset.annotations['data'][i]['bbox']
      label_gt = valset.annotations['data'][i]['species']
      
      image_orig, landmarks = valset.get_original(i)
      results = detector(image_orig[None, :, :, :])
      if results[0]['boxes'].size()[0] == 0:
        c, img_h, img_w = image_orig.size()
        bbox = torch.tensor([3 * img_w / 8, 3 * img_h / 8, 5 * img_w / 8, 5 * img_h / 8]).to(torch.long)
        label = torch.randint(num_species, (1,))
      else:
        bbox = results[0]['boxes'][0].to(torch.long)
        label = species[results[0]['labels'][0]]
      h = bbox[3] - bbox[1]
      w = bbox[2] - bbox[0]
      input = normalize(image_orig)
      input = input[:, bbox[1] : bbox[3], bbox[0] : bbox[2]]
      belief_images = stage1(input)
      belief_images = stage2(input, belief_images)
      belief_images = stage3(input, belief_images)
      resize = transforms.Resize((h, w))
      resized = resize(belief_images)
      for j in range(17): # Iterate over landmarks
        indices = torch.cartesian_prod(torch.arange(h), torch.arange(w)).to(cuda)
        indices = torch.stack([indices[j : j + w] for j in range(0, h * w, w)])
        softmax = f.softmax(resized[j].view(h * w) * 1000, dim = 0).view(h, w)
        bbox_pos = torch.tensor([torch.sum(softmax * indices[:, :, 1]), torch.sum(softmax * indices[:, :, 0])]).to(torch.long)
        pos = bbox_pos + torch.tensor([bbox[0], bbox[1]])
        mpjpe[i, j] = torch.linalg.norm(pos - landmarks[2 * j : 2 * j + 2]) / bbox_gt[2]
    torch.save(mpjpe, '/content/drive/Shareddrives/OpenMonkeyChallenge/mpjpe.pt')

**CLASS TESTING**

In [None]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights

valset = OpenMonkeyDataset('/content/data/val/', '/content/drive/Shareddrives/OpenMonkeyChallenge/val_annotation.json', input_size, output_size)
detector = fasterrcnn_resnet50_fpn().to(cuda)
in_features = detector.roi_heads.box_predictor.cls_score.in_features
detector.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_species).to(cuda)
detector.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/detector.pth'))
detector.eval()

# Ape
ape1 = Stage1Net().to(cuda)
ape2 = StageTNet().to(cuda)
ape3 = StageTNet().to(cuda)
ape1.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/ape_stage1.pth'))
ape2.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/ape_stage2.pth'))
ape3.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/ape_stage3.pth'))
ape1.eval()
ape2.eval()
ape3.eval()

# New World
new1 = Stage1Net().to(cuda)
new2 = StageTNet().to(cuda)
new3 = StageTNet().to(cuda)
new1.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/new_world_stage1.pth'))
new2.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/new_world_stage2.pth'))
new3.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/new_world_stage3.pth'))
new1.eval()
new2.eval()
new3.eval()

# Old World
old1 = Stage1Net().to(cuda)
old2 = StageTNet().to(cuda)
old3 = StageTNet().to(cuda)
old1.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/old_world_stage1.pth'))
old2.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/old_world_stage2.pth'))
old3.load_state_dict(torch.load('/content/drive/Shareddrives/OpenMonkeyChallenge/old_world_stage3.pth'))
old1.eval()
old2.eval()
old3.eval()

normalize = transforms.Normalize((0.4954, 0.4718, 0.4286), (0.2471, 0.2424, 0.2512))
mpjpe = torch.zeros((valset.__len__(), 17))
with torch.no_grad():
    for i in range(valset.__len__()): # Iterate over samples
      bbox_gt = valset.annotations['data'][i]['bbox']
      label_gt = valset.annotations['data'][i]['species']
      
      image_orig, landmarks = valset.get_original(i)
      results = detector(image_orig[None, :, :, :])
      if results[0]['boxes'].size()[0] == 0:
        c, img_h, img_w = image_orig.size()
        bbox = torch.tensor([3 * img_w / 8, 3 * img_h / 8, 5 * img_w / 8, 5 * img_h / 8]).to(torch.long)
        label = torch.randint(num_species, (1,))
      else:
        bbox = results[0]['boxes'][0].to(torch.long)
        label = species[results[0]['labels'][0]]
      if label in apes:
        stage1 = ape1
        stage2 = ape2
        stage3 = ape3
      elif label in new_world:
        stage1 = new1
        stage2 = new2
        stage3 = new3
      else:
        stage1 = old1
        stage2 = old2
        stage3 = old3
      h = bbox[3] - bbox[1]
      w = bbox[2] - bbox[0]
      input = normalize(image_orig)
      input = input[:, bbox[1] : bbox[3], bbox[0] : bbox[2]]
      belief_images = stage1(input)
      belief_images = stage2(input, belief_images)
      belief_images = stage3(input, belief_images)
      resize = transforms.Resize((h, w))
      resized = resize(belief_images)
      #bbox_positions = torch.zeros((17, 2))
      for j in range(17): # Iterate over landmarks
        indices = torch.cartesian_prod(torch.arange(h), torch.arange(w)).to(cuda)
        indices = torch.stack([indices[j : j + w] for j in range(0, h * w, w)])
        softmax = f.softmax(resized[j].view(h * w) * 1000, dim = 0).view(h, w)
        bbox_pos = torch.tensor([torch.sum(softmax * indices[:, :, 1]), torch.sum(softmax * indices[:, :, 0])]).to(torch.long)
        pos = bbox_pos + torch.tensor([bbox[0], bbox[1]])
        mpjpe[i, j] = torch.linalg.norm(pos - landmarks[2 * j : 2 * j + 2]) / bbox_gt[2]

        # Belief Image Visualization
        #bbox_positions[j, 0] = bbox_pos[0]
        #bbox_positions[j, 1] = bbox_pos[1]
        #plt.imshow(image_orig[:, bbox[1] : bbox[3], bbox[0] : bbox[2]].movedim(0, 2).cpu())
        #plt.imshow(resized[j].cpu(), cmap = 'jet', vmin = 0.0, vmax = 1.0, alpha = 0.5)
        #plt.show()

      # Landmark Visualization
      #plt.imshow(image_orig[:, bbox[1] : bbox[3], bbox[0] : bbox[2]].movedim(0, 2).cpu())
      #plt.scatter(x = bbox_positions[:, 0], y = bbox_positions[:, 1], c = 'r', marker = 'o', s = 40)
      #plt.scatter(x = landmarks[[i for i in range(0, 34, 2)]] - bbox[0].cpu(), y = landmarks[[i for i in range(1, 34, 2)]] - bbox[1].cpu(), c = 'b', marker = 'x', s = 40)
      #plt.show()
    torch.save(mpjpe, '/content/drive/Shareddrives/OpenMonkeyChallenge/our_mpjpe.pt')