<a href="https://colab.research.google.com/github/Adithya-A-R/AI-Gym-Mate/blob/main/AI_Gymmate_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import json
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt


In [7]:

!ls annotations/

keypoints.json


In [5]:
class CocoPoseDataset(Dataset):
    def __init__(self, img_dir, ann_file, img_size=256):
        self.img_dir = img_dir
        self.img_size = img_size

        with open(ann_file) as f:
            coco = json.load(f)

        self.images = {img["id"]: img for img in coco["images"]}
        self.annotations = coco["annotations"]

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        ann = self.annotations[idx]
        img_info = self.images[ann["image_id"]]

        img_path = os.path.join(self.img_dir, img_info["file_name"])
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (256, 256))
        image = image / 255.0
        image = torch.tensor(image).permute(2, 0, 1).float()

        keypoints = np.array(ann["keypoints"]).reshape(-1, 3)

        # ðŸ”¥ TAKE ONLY FIRST 16 KEYPOINTS â†’ (32 values)
        keypoints = keypoints[:16, :2].flatten()

        keypoints = torch.tensor(keypoints).float()

        return image, keypoints


In [8]:
dataset = CocoPoseDataset(
    "images",
    "annotations/keypoints.json"
)

img, kp = dataset[0]
print("Image shape:", img.shape)
print("Keypoints shape:", kp.shape)


Image shape: torch.Size([3, 256, 256])
Keypoints shape: torch.Size([32])


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PoseNet(nn.Module):
    def __init__(self):
        super(PoseNet, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(128 * 32 * 32, 256)
        self.fc2 = nn.Linear(256, 32)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 256 â†’ 128
        x = self.pool(F.relu(self.conv2(x)))  # 128 â†’ 64
        x = self.pool(F.relu(self.conv3(x)))  # 64 â†’ 32

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        return x


In [11]:
model = PoseNet()
dummy = torch.randn(1, 3, 256, 256)
print(model(dummy).shape)


torch.Size([1, 32])


In [12]:
loader = DataLoader(dataset, batch_size=8, shuffle=True)

model = PoseNet()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

losses = []

for epoch in range(5):
    epoch_loss = 0.0

    for images, keypoints in loader:
        preds = model(images)
        loss = criterion(preds, keypoints)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(loader)
    losses.append(avg_loss)

    print(f"Epoch {epoch+1} | Loss: {avg_loss:.4f}")


Epoch 1 | Loss: 22642.2774
Epoch 2 | Loss: 22215.8289
Epoch 3 | Loss: 22074.5307
Epoch 4 | Loss: 21729.4072
Epoch 5 | Loss: 21991.5430
