## Get the dataset and unzip it

In [21]:
!pip install ijson



In [24]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models , transforms 
import json
import cv2 as cv2
import numpy as np 

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Creating a torch dataset

In [27]:

class keypointdetect(Dataset):
    def __init__(self , img_dir , file) -> None:
        self.img_dir = img_dir
        with open(file , "r") as f:
            self.data = json.load(f)


        self.transform = transforms.Compose(
            [
                transforms.ToPILImage(),
                transforms.Resize((224, 224)),
                transforms.ToTensor(),

                # default mean and std value
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]
        )

    def __len__(self):
        return len(self.data)

    def __getitem__(self , ind):
        item = self.data[ind]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        h , w = img.shape[:2]

        img = cv2.cvtColor(img , cv2.COLOR_BGR2RGB) # convert it to RGB format
        img = self.transform(img)

        # keypoints are in the list -> we need to convert to numpy and also convert nD to 1D
        keypoints = np.array(item['kps']).flatten()
        keypoints = kps.astype(np.float32)

        # adjust the (x, y) of the keypoints as we resize the image 
        keypoints[::2]*= 224.0/w # adjust x -> skips 2 at a time from 0
        keypoints[1::2]*= 224.0/h 
        return img , keypoints
        
        

In [None]:
train_dtset = KeypointsDataset("data/images" , "data/data_train.json")
val_dtset = KeypointsDataset("data/images" , "data/data_val.json")

train_loader = DataLoader(train_dtset , batch_size= 8 , shuffle= True)
val_loader = DataLoader(val_dtset , batch_size= 8 , shuffle= True)



## Create a model -> Use ResNet


In [None]:
model = models.resnet50(pretrained = True)

# we just change the last layer so that we get the 28 vector as prediction 
# last layer = fully connected
model.fc = torch.nn.Linear(model.fc.in_features , 14*2) 

model = model.to(device)

## Training the model

In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.option.Adam(model.parameters() , lr = 1e-4)

epochs = 25

for epoch in range(epochs):
    for i , (img , keypoints) in enumerate(train_loader):
        imgs = imgs.to(device)
        keypoints =keypoints.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs , keypoints)

        loss.backwards()
        optimizer.step()

        if(i%10==0):
            print(f"Epochs {epoch} , iter {i} , loss : {loss.item()}")



In [None]:
model.save(model._save_to_state_dict() , "Keypoints_detect.pth")