In [None]:
## Dataset Link: https://drive.google.com/file/d/1lhAaeQCmk2y440PmagA0KmIVBIysVMwu/view?usp=drive_link

# Unziping the dataset

In [14]:
import zipfile

zip_path = "tennis_court_det_dataset.zip"
extract_to = "./"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print("Extraction complete! ✅")

Extraction complete! ✅


# Importing the libraries

In [6]:
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import os 
import json
import cv2
import numpy as np

print("GPU:",torch.cuda.is_available())
print("No of GPUs available: ",torch.cuda.device_count())
print("GPU name: ", torch.cuda.get_device_name(0))

GPU: True
No of GPUs available:  1
GPU name:  NVIDIA GeForce RTX 3050 Laptop GPU


In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Creating Torch Dataset

In [8]:
class KeyPointDataset(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file, 'r') as f:
            self.data = json.load(f)

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        item = self.data[index]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        h, w = img.shape[:2]

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transform(img)
        kps = np.array(item["kps"]).flatten()   ##Convert 1D to 2D array
        kps = kps.astype(np.float32)

        kps[::2] *= 224.0 / w 
        kps[1::2] *= 224.0 / h

        return img, kps



In [9]:
img_dir = "data\images"
train_data_file = "data\data_train.json"
valid_data_file = "data\data_val.json"

train_dataset = KeyPointDataset(img_dir=img_dir, data_file=train_data_file)
valid_dataset = KeyPointDataset(img_dir=img_dir, data_file=valid_data_file)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=4,  shuffle=True)


  img_dir = "data\images"
  train_data_file = "data\data_train.json"
  valid_data_file = "data\data_val.json"


# Building Model

In [13]:
model = models.resnet50(pretrained=True)

model.fc = torch.nn.Linear(model.fc.in_features, 14*2)  ##Because we have 14 keypoints and each keypoint has two coordinates (x,y)
model = model.to(device)

# Training the Model

In [14]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [17]:
eopchs = 20
for epoch in range(eopchs):
    for i, (image, kps) in enumerate(train_loader):
        image = image.to(device)
        kps = kps.to(device)

        optimizer.zero_grad()

        output = model(image)

        loss = criterion(output, kps)

        loss.backward()

        optimizer.step()

        if i % 50 == 0:
            print(f"Epoch: {epoch+1}/{eopchs}, Batch: {i+1}/{len(train_loader)}, Loss: {loss.item():.4f}")


Epoch: 1/20, Batch: 1/415, Loss: 8016.5312
Epoch: 1/20, Batch: 51/415, Loss: 6877.3252
Epoch: 1/20, Batch: 101/415, Loss: 5318.0972
Epoch: 1/20, Batch: 151/415, Loss: 4748.4619
Epoch: 1/20, Batch: 201/415, Loss: 3237.2556
Epoch: 1/20, Batch: 251/415, Loss: 2406.4805
Epoch: 1/20, Batch: 301/415, Loss: 1940.8003
Epoch: 1/20, Batch: 351/415, Loss: 1764.3033
Epoch: 1/20, Batch: 401/415, Loss: 1061.2107
Epoch: 2/20, Batch: 1/415, Loss: 938.3990
Epoch: 2/20, Batch: 51/415, Loss: 744.2564
Epoch: 2/20, Batch: 101/415, Loss: 545.9596
Epoch: 2/20, Batch: 151/415, Loss: 421.9608
Epoch: 2/20, Batch: 201/415, Loss: 310.3069
Epoch: 2/20, Batch: 251/415, Loss: 251.7309
Epoch: 2/20, Batch: 301/415, Loss: 145.5948
Epoch: 2/20, Batch: 351/415, Loss: 83.3557
Epoch: 2/20, Batch: 401/415, Loss: 78.1134
Epoch: 3/20, Batch: 1/415, Loss: 113.8022
Epoch: 3/20, Batch: 51/415, Loss: 74.0767
Epoch: 3/20, Batch: 101/415, Loss: 47.8127
Epoch: 3/20, Batch: 151/415, Loss: 37.5737
Epoch: 3/20, Batch: 201/415, Loss: 51

# Saving Model

In [19]:
torch.save(model.state_dict(), "keypoints_model.pth")

# Again Building and Training the Model

In [21]:
model2 = models.resnet18(pretrained=True)

model2.fc = torch.nn.Linear(model.fc.in_features, 14*2)  ##Because we have 14 keypoints and each keypoint has two coordinates (x,y)
model2 = model.to(device)



In [22]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=1e-3)

In [23]:
eopchs = 5
for epoch in range(eopchs):
    for i, (image, kps) in enumerate(train_loader):
        image = image.to(device)
        kps = kps.to(device)

        optimizer.zero_grad()

        output = model2(image)

        loss = criterion(output, kps)

        loss.backward()

        optimizer.step()

        if i % 50 == 0:
            print(f"Epoch: {epoch+1}/{eopchs}, Batch: {i+1}/{len(train_loader)}, Loss: {loss.item():.4f}")


Epoch: 1/5, Batch: 1/415, Loss: 15046.4219
Epoch: 1/5, Batch: 51/415, Loss: 7838.5029
Epoch: 1/5, Batch: 101/415, Loss: 3341.0828
Epoch: 1/5, Batch: 151/415, Loss: 1148.2333
Epoch: 1/5, Batch: 201/415, Loss: 262.3004
Epoch: 1/5, Batch: 251/415, Loss: 63.8837
Epoch: 1/5, Batch: 301/415, Loss: 75.9883
Epoch: 1/5, Batch: 351/415, Loss: 77.3623
Epoch: 1/5, Batch: 401/415, Loss: 37.6867
Epoch: 2/5, Batch: 1/415, Loss: 38.1912
Epoch: 2/5, Batch: 51/415, Loss: 32.7832
Epoch: 2/5, Batch: 101/415, Loss: 23.9866
Epoch: 2/5, Batch: 151/415, Loss: 30.0792
Epoch: 2/5, Batch: 201/415, Loss: 17.8012
Epoch: 2/5, Batch: 251/415, Loss: 23.1415
Epoch: 2/5, Batch: 301/415, Loss: 11.6759
Epoch: 2/5, Batch: 351/415, Loss: 18.9961
Epoch: 2/5, Batch: 401/415, Loss: 12.8060
Epoch: 3/5, Batch: 1/415, Loss: 32.6098
Epoch: 3/5, Batch: 51/415, Loss: 15.2283
Epoch: 3/5, Batch: 101/415, Loss: 12.9378
Epoch: 3/5, Batch: 151/415, Loss: 13.1549
Epoch: 3/5, Batch: 201/415, Loss: 12.1140
Epoch: 3/5, Batch: 251/415, Loss:

In [24]:
torch.save(model2.state_dict(), "keypoints_model_2.pth")