<a href="https://colab.research.google.com/github/Adithya-A-R/AI-Gym-Mate/blob/main/pose_model_def.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn

In [2]:
class PoseKeypointModel(nn.Module):
    def __init__(self, num_keypoints=16):
        super(PoseKeypointModel, self).__init__()

        # -------- CNN FEATURE EXTRACTOR --------
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        # -------- REGRESSION HEAD --------
        self.regressor = nn.Sequential(
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, num_keypoints * 2)  # (x, y) for each keypoint
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.regressor(x)
        return x


In [3]:
model = PoseKeypointModel(num_keypoints=16)
print(model)


PoseKeypointModel(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (regressor): Sequential(
    (0): Linear(in_features=100352, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=32, bias=True)
  )
)


In [4]:
# Fake input batch: (batch_size, channels, height, width)
dummy_input = torch.randn(1, 3, 224, 224)

# Forward pass
output = model(dummy_input)

print("Output shape:", output.shape)


Output shape: torch.Size([1, 32])


In [5]:
from torch.utils.data import Dataset
import numpy as np

class PoseKeypointDataset(Dataset):
    def __init__(self, images, keypoints):
        """
        images: numpy array of shape (N, 3, 224, 224)
        keypoints: numpy array of shape (N, 32)
        """
        self.images = images
        self.keypoints = keypoints

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = torch.tensor(self.images[idx], dtype=torch.float32)
        keypoint = torch.tensor(self.keypoints[idx], dtype=torch.float32)
        return image, keypoint


In [6]:
# Create dummy dataset (10 samples)
num_samples = 10

dummy_images = np.random.rand(num_samples, 3, 224, 224)
dummy_keypoints = np.random.rand(num_samples, 32)

dataset = PoseKeypointDataset(dummy_images, dummy_keypoints)


In [7]:
from torch.utils.data import DataLoader

dataloader = DataLoader(
    dataset,
    batch_size=2,
    shuffle=True
)


In [8]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [9]:
model.train()

for epoch in range(1):  # only 1 epoch for test
    for images, targets in dataloader:
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")


Epoch 1, Loss: 0.1695
