In [None]:
#download data from https://drive.google.com/file/d/17pLUyKY09aYcbpZNRsZy9qW_iJcXSDle/view
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! unzip /content/drive/MyDrive/data/public_tests.wCCU_c0U.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 00_test_img_input/train/images/04503.jpg  
  inflating: 00_test_img_input/train/images/03834.jpg  
  inflating: 00_test_img_input/train/images/02981.jpg  
  inflating: 00_test_img_input/train/images/03509.jpg  
  inflating: 00_test_img_input/train/images/05617.jpg  
  inflating: 00_test_img_input/train/images/03788.jpg  
  inflating: 00_test_img_input/train/images/00696.jpg  
  inflating: 00_test_img_input/train/images/00065.jpg  
  inflating: 00_test_img_input/train/images/05270.jpg  
  inflating: 00_test_img_input/train/images/02194.jpg  
  inflating: 00_test_img_input/train/images/04461.jpg  
  inflating: 00_test_img_input/train/images/03729.jpg  
  inflating: 00_test_img_input/train/images/05386.jpg  
  inflating: 00_test_img_input/train/images/04277.jpg  
  inflating: 00_test_img_input/train/images/05063.jpg  
  inflating: 00_test_img_input/train/images/03776.jpg  
  inflating: 00_test_img_input/train/im

In [None]:
import torch
import torchvision
import torchvision.transforms.v2 as T
import matplotlib.pyplot as plt
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader
import pathlib
import os
import pandas as pd
from torchvision import transforms
from PIL import Image

In [None]:
class FacialKeypointsDataset(Dataset):
    def __init__(self, csv_file, img_dir):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.target_size = (100, 100)  # (H, W)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        transfrom = T.Compose(
          [
            T.ToImage(),
            T.ToDtype(torch.float32, scale=True),
            T.Resize((100, 100)),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
          ]
        )
        row = self.data.iloc[idx]
        img_path = os.path.join(self.img_dir, row['filename'])
        image = Image.open(img_path).convert('RGB')
        orig_w, orig_h = image.size

        # Resize image
        image_tensor = transfrom(image)

        # Get keypoints and scale
        keypoints = row.iloc[1:].astype('float').values.reshape(-1, 2)
        scale_x = self.target_size[1] / orig_w
        scale_y = self.target_size[0] / orig_h
        keypoints_scaled = keypoints * [scale_x, scale_y]
        keypoints_tensor = torch.tensor(keypoints_scaled, dtype=torch.float32)
        keypoints_tensor = torch.tensor(keypoints_scaled.flatten(), dtype=torch.float32)

        return image_tensor, keypoints_tensor

In [None]:
root_dir = '00_test_img_input'
train_data = FacialKeypointsDataset(csv_file=root_dir+'/train/gt.csv', img_dir=root_dir+'/train/images')
test_data = FacialKeypointsDataset(csv_file='00_test_img_gt/gt.csv', img_dir=root_dir+'/test/images')

In [None]:
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=False)

In [None]:
from torch import nn
class MyModel(nn.Module):
  def __init__(self):
      super(MyModel, self).__init__()
      self.f = nn.Sequential(
      nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=2),
      nn.ReLU(),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.ReLU(),
      nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=2),
      nn.ReLU(),
      nn.BatchNorm2d(128),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=2),
      nn.ReLU(),
      nn.BatchNorm2d(256),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Flatten(),
      nn.Linear(50176, 64),
      nn.ReLU(),
      nn.Linear(64, 28),
      )

  def forward(self, x):
      return self.f(x)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MyModel()
model.to(device)
model(torch.randn(64,3,100,100).to(device))

tensor([[-0.0054, -0.1266,  0.3239,  ...,  0.1248,  0.1260, -0.3159],
        [ 0.0614, -0.4991,  0.2818,  ...,  0.0941, -0.1668,  0.1347],
        [ 0.3168, -0.2565, -0.0274,  ...,  0.0325, -0.1170, -0.2302],
        ...,
        [-0.2182, -0.3329,  0.0155,  ..., -0.0481, -0.1269,  0.3973],
        [-0.3330, -0.3113,  0.1281,  ..., -0.0635, -0.3694,  0.2067],
        [-0.4888, -0.3013,  0.3016,  ...,  0.1464, -0.1356, -0.0097]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [None]:
from torch.optim import SGD
from tqdm import tqdm
import numpy as np
from sklearn.metrics import mean_squared_error
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=0.003, momentum=0.87)
#model = model.to(device)
for epoch in range(20):
    model.train()
    running_loss = 0.0
    for x, y in tqdm(test_dataloader, desc=f"Epoch {epoch+1}"):
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        y_pred = model(x)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * x.size(0)
    avg_loss = running_loss / len(train_dataloader.dataset)
    model.eval()
    all_predictions = []
    all_targets = []
    with torch.no_grad():
        for x, y in test_dataloader:
            x = x.to(device)
            y = y.to(device)
            y_pred = model(x)
            all_predictions.extend(y_pred.cpu().numpy())
            all_targets.extend(y.cpu().numpy())
    mse = mean_squared_error(all_targets, all_predictions)
    print(f"Epoch {epoch+1}, Train Loss: {avg_loss:.4f}, Test MSE: {mse:.4f}")
    #ran loop many times

Epoch 1: 100%|██████████| 94/94 [00:38<00:00,  2.41it/s]


Epoch 1, Train Loss: 29.8774


Epoch 2: 100%|██████████| 94/94 [00:39<00:00,  2.38it/s]


Epoch 2, Train Loss: 29.8770


Epoch 3: 100%|██████████| 94/94 [00:38<00:00,  2.42it/s]


Epoch 3, Train Loss: 29.8766


Epoch 4:  26%|██▌       | 24/94 [00:09<00:28,  2.42it/s]


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'facial_keypoints_model.pth')