In [None]:
# imports
import os
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import DataLoader
from tqdm.autonotebook import tqdm

In [None]:
annotations=pd.read_csv("/kaggle/input/dataset-1-and-dataframe/dataframe/key_points.csv")
annotations.volumns=[i.lower() for i in annotations.columns]
annotations.dropna(axis=0,inplace=True)
selected_images=set(annotations['img_name'].values)

  


In [None]:
data_path_1="/kaggle/input/dataset-1-and-dataframe/dataset_1"
data_path_2="/kaggle/input/dataset-2/dataset_2"

path_1_images=list(set(os.listdir(data_path_1)).intersection(selected_images))
path_2_images=list(set(os.listdir(data_path_2)).intersection(selected_images))

class Config:
    num_train_samples=1200
    num_val_samples=len(path_1_images)+len(path_2_images)-num_train_samples #debugged
    train_batch_size=2
    val_batch_size=2
    epochs=5
    device= 'cuda'if torch.cuda.is_available else 'cpu'
    lr=1e-9


#debugged -changed os.listdir(data_path_1) and os.listdir(data_path_2) to path_1_images and path_2_images
train_data_list=list(map(lambda x : os.path.join(data_path_1,x),path_1_images))+list(map(lambda x :os.path.join(data_path_2,x),path_2_images[:len(path_2_images)-Config.num_val_samples]))
val_data_list=list(map(lambda x : os.path.join(data_path_2,x),path_2_images[(len(path_2_images)-Config.num_val_samples):]))


class FaceData:
    def __init__(self,data,annotations,transforms=None):
        self.transforms=transforms
        self.data=data
        self.annotations=annotations
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        data=self.data[idx]
        labels=torch.tensor(self.annotations.loc[annotations['img_name']==data[-9:]].iloc[:,1:].values.reshape(-1,2),dtype=torch.float32)
        
        if self.transforms is not None:
            image=Image.open(data)
            image=self.transforms(image)
        
        return {"x":image,'y':labels}
            
image_transforms=transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5), #can be a hyper param
    transforms.ColorJitter(brightness=0.2,contrast=0.2,saturation=0.2)
])
   
train_data=FaceData(train_data_list,annotations,image_transforms)
val_data=FaceData(val_data_list,annotations,image_transforms)

train_loader=DataLoader(train_data,Config.train_batch_size)
val_loader=DataLoader(val_data,Config.val_batch_size)


In [None]:
Config.device

'cuda'

In [None]:
# model

class KeypointModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1=nn.Conv2d(3,32,kernel_size=3,stride=1,padding=1,bias=True)
  
        self.relu1=nn.ReLU(inplace=True)
        self.pool1=nn.MaxPool2d(kernel_size=2,stride=2)
        
        self.conv2=nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1,bias=True)
  
        self.relu2=nn.ReLU(inplace=True)
        self.pool2=nn.MaxPool2d(kernel_size=2,stride=2)
        
        self.conv3=nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1,bias=True)
  
        self.relu3=nn.ReLU(inplace=True)
        self.pool3=nn.MaxPool2d(kernel_size=2,stride=2)
        
        self.fc1=nn.Linear(128*64*64,512,bias=True)
        self.relu4=nn.ReLU(inplace=True)
        self.fc2=nn.Linear(512,67*2,bias=True)
        
    def forward(self,x):
        x=self.conv1(x)
        x=self.relu1(x)
        x=self.pool1(x)
        
        x=self.conv2(x)
        x=self.relu2(x)
        x=self.pool2(x)
        
        x=self.conv3(x)
        x=self.relu3(x)
        x=self.pool3(x)
        
        x=x.view((-1,128*64*64))
        x=self.fc1(x)
        x=self.relu4(x)
        x=self.fc2(x)
        x=x.view(-1,67,2)
        
        return x

In [None]:
model=KeypointModel()
model=model.to(device=Config.device)
criterion=nn.L1Loss()
optimizer=torch.optim.AdamW(model.parameters(),lr=Config.lr)

# training
avg_train_loss=[]
avg_val_loss=[]

for epoch in tqdm(range(Config.epochs)):
    train_loss=0
    for batch in train_loader:
        X=batch['x'].to(device=Config.device)
        Y=batch['y'].to(device=Config.device)
        
        out=model(X)
        loss=criterion(out,Y)
        loss.backward()
        optimizer.zero_grad()
        optimizer.step()
        train_loss+=loss.item()
    train_loss/=len(train_loader)
    
    with torch.no_grad():
        val_loss=0
        for batch in val_loader:
            x=batch['x'].to(device=Config.device)
            y=batch['y'].to(device=Config.device)
            out=model(x)
            loss=criterion(out,y)
            val_loss+=loss.item()
        val_loss/=len(val_loader)
    
    avg_train_loss.append(train_loss)
    avg_val_loss.append(val_loss)

  0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
import matplotlib.pyplot as plt
plt.plot(range(len(avg_train_loss)),avg_train_loss,'-r');
plt.plot(range(len(avg_val_loss)),avg_val_loss,'-b');
plt.legend(['avg_train_loss','avg_val_loss']);

In [None]:
def plot_preds(model,image_path):
    from PIL import ImageDraw
    import torchvision.transforms.functional as tf
    
    im=Image.open(image_path)
    draw=ImageDraw.Draw(im)
    tensor=tf.to_tensor(im)
    out=model(tensor.to(device=Config.device))
    out = np.round(out.detach().to('cpu').numpy()).astype(int) #changing real number predictions into nearest integers


    for feature in out[0]:
        x,y=feature[0],feature[1]
        draw.ellipse((x-2,y-2,x+2,y+2),fill=(255,255,255))
    return im