In [8]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
from tqdm import tqdm
import os
from sklearn.model_selection import train_test_split
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [9]:
df = pd.read_csv('var_3.csv', index_col='ID').drop(columns=['Unnamed: 0'])
df.head()

Unnamed: 0_level_0,NAME,r ankle_X,r ankle_Y,r knee_X,r knee_Y,r hip_X,r hip_Y,l hip_X,l hip_Y,l knee_X,...,r shoulder_Y,l shoulder_X,l shoulder_Y,l elbow_X,l elbow_Y,l wrist_X,l wrist_Y,Scale,Activity,Category
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1579,068840502.jpg,301,436,298,358,297,298,324,293,328,...,211,334,216,319,256,285,242,1.968146,"children_s games, adults playing (e.g., hopsco...",sports
1580,037455289.jpg,407,292,401,248,398,197,414,194,414,...,139,413,140,403,177,374,165,1.463016,"children_s games, adults playing (e.g., hopsco...",sports
1581,016686020.jpg,-1,-1,-1,-1,285,465,339,462,-1,...,305,321,291,337,313,334,224,4.194579,"children_s games, adults playing (e.g., hopsco...",sports
1582,043052934.jpg,298,234,295,187,298,148,321,151,325,...,110,354,113,350,142,354,171,0.952773,"children_s games, adults playing (e.g., hopsco...",sports
1583,014259414.jpg,-1,-1,1196,617,1212,487,1147,485,1101,...,360,1143,355,1095,429,1074,492,2.801428,"children_s games, adults playing (e.g., hopsco...",sports


In [10]:
def draw(points, image, activity, category):
    fig, ax = plt.subplots()
    
    ax.imshow(image)
    
    keypoints_x = points[::2]
    keypoints_y = points[1::2]
    
    bones = [(0, 1), (1, 2), (2, 3), (3, 4), (2, 5), (5, 6), (6, 7), (7, 8), 
             (2, 9), (9, 10), (10, 11), (11, 12), (2, 13), (13, 14), (14, 15), (15, 16)]
    
    for i in range(len(keypoints_x)):
        ax.scatter(keypoints_x[i], keypoints_y[i], color='red', marker='o')
        ax.text(keypoints_x[i], keypoints_y[i], str(i), color='white', fontsize=8, ha='center', va='center')
    
    for bone in bones:
        x_values = [keypoints_x[bone[0]], keypoints_x[bone[1]]]
        y_values = [keypoints_y[bone[0]], keypoints_y[bone[1]]]
        ax.plot(x_values, y_values, color='blue')
    
    ax.text(10, 20, f'Activity: {activity}', color='white', fontsize=10)
    ax.text(10, 40, f'Category: {category}', color='white', fontsize=10)
    
    ax.axis('off')
    plt.show()

In [11]:
activities_count = df['Activity'].unique().shape[0]
categories_count = df['Category'].unique().shape[0]
keypoints_count = sum(col.endswith('X') or col.endswith('Y') for col in df.columns)
activities_count, categories_count, keypoints_count

(56, 16, 32)

In [12]:
class KeypointActivityDataset(Dataset):
    def __init__(self, csv_file, root_dir, size, mode, transform=None):
        self.keypoints_frame = csv_file
        self.root_dir = root_dir
        self.transform = transform
        self.size = size
        self.mode = mode

    def __len__(self):
        return len(self.keypoints_frame)

    def __getitem__(self, idx):

        img_name = self.keypoints_frame.iloc[idx, 0]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        scale_x = self.size / image.size[0]
        scale_y = self.size / image.size[1]
        
        keypoints = self.keypoints_frame.iloc[idx, 1:-3].values.astype('float')
        keypoints = [keypoints[i] * scale_x if i % 2 == 0 else keypoints[i] * scale_y for i in range(len(keypoints))]
        keypoints = torch.tensor(keypoints, dtype=torch.float32)


        label_encoder_activity = LabelEncoder()
        labels_activity = self.keypoints_frame['Activity'].values
        label_encoder_activity.fit(labels_activity)
        self.keypoints_frame['Activity'] = label_encoder_activity.transform(self.keypoints_frame['Activity'])

        label_encoder_category = LabelEncoder()
        labels_category = self.keypoints_frame['Category'].values
        label_encoder_category.fit(labels_category)
        self.keypoints_frame['Category'] = label_encoder_category.transform(self.keypoints_frame['Category'])

        activity_category = self.keypoints_frame.iloc[idx, -2:]
        activity = torch.tensor(activity_category['Activity'], dtype=torch.long)
        category = torch.tensor(activity_category['Category'], dtype=torch.long)

        sample = {'image': image, 'keypoints': keypoints, 'activity': activity, 'category': category}

        if self.transform:
            sample['image'] = self.transform(sample['image'])
        if self.mode == 'test' or self.mode == 'val':
            return sample, scale_x, scale_y
        else:
            return sample

In [13]:
size = 224
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
transform = transforms.Compose([
    transforms.Resize((size, size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])
batch_size = 8

data = df.sample(frac=1, random_state=42).reset_index(drop=True)

train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

train_data, temp_data = train_test_split(data, test_size=(1 - train_ratio), random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=test_ratio/(test_ratio + val_ratio), random_state=42)

train_dataset = KeypointActivityDataset(train_data, 'images/', size, 'train', transform=transform)
val_dataset = KeypointActivityDataset(val_data, 'images/', size, 'val', transform=transform)
test_dataset = KeypointActivityDataset(test_data, 'images/', size, 'test', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [14]:
def train(model, train_loader, val_loader, optimizer, criterion_keypoints, criterion_activities, criterion_categories, num_epochs):

    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for data in tqdm(train_loader):
            inputs, keypoints, activities, categories = data['image'].to(device), data['keypoints'].to(device), data['activity'].to(device), data['category'].to(device)
            
            optimizer.zero_grad()
            
            keypoints_pred, activities_pred, categories_pred = model(inputs)

            loss_keypoints = criterion_keypoints(keypoints_pred, keypoints)
            loss_activities = criterion_activities(activities_pred, activities)
            loss_categories = criterion_categories(categories_pred, categories)
            
            loss = loss_keypoints + loss_activities + loss_categories
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

        train_epoch_loss = running_loss/len(train_loader)
        train_losses.append(train_epoch_loss)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data, _, _ in tqdm(val_loader):
                inputs, keypoints, activities, categories = data['image'].to(device), data['keypoints'].to(device), data['activity'].to(device), data['category'].to(device)
                
                keypoints_pred, activities_pred, categories_pred = model(inputs)
                
                loss_keypoints = criterion_keypoints(keypoints_pred, keypoints)
                loss_activities = criterion_activities(activities_pred, activities)
                loss_categories = criterion_categories(categories_pred, categories)
                
                loss = loss_keypoints + loss_activities + loss_categories
                
                val_loss += loss.item()

            val_epoch_loss = val_loss/len(val_loader)
            val_losses.append(val_epoch_loss)
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Train loss: {train_epoch_loss:.4f}, Val loss: {val_epoch_loss:.4f}")
    
    return model, train_losses, val_losses

In [15]:
def predict(model, loader):
    
    all_predictions = {'keypoints': [], 'activities': [], 'categories': []}
    
    model.eval()
    with torch.no_grad():
        for data, scale_x, scale_y in tqdm(loader):
            inputs, keypoints, activities, categories = data['image'].to(device), data['keypoints'].to(device), data['activity'].to(device), data['category'].to(device)
            
            keypoints_pred, activities_pred, categories_pred = model(inputs)

            keypoints_pred = [keypoints_pred[i] / scale_x () if i % 2 == 0 else keypoints[i] / scale_y for i in range(len(keypoints))]
            
            all_predictions['pred_keypoints'].append(keypoints_pred.cpu().numpy())
            all_predictions['pred_activities'].append(activities_pred.argmax(dim=1).cpu().numpy())
            all_predictions['pred_categories'].append(categories_pred.argmax(dim=1).cpu().numpy())
            all_predictions['true_keypoints'].append(keypoints.cpu().numpy())
            all_predictions['true_activities'].append(activities.cpu().numpy())
            all_predictions['true_categories'].append(categories.cpu().numpy())
            all_predictions['inputs'].append(inputs.cpu().numpy())
    
    for key in all_predictions.keys():
        all_predictions[key] = np.concatenate(all_predictions[key])
    
    return all_predictions

In [16]:
def calculate_metrics(keypoints_true, keypoints_pred, activities_true, activities_pred, categories_true, categories_pred, loader_type):
    mse_keypoints = mean_squared_error(keypoints_true, keypoints_pred)
    rmse_keypoints = np.sqrt(mse_keypoints)
    mae_keypoints = np.mean(np.abs(keypoints_true - keypoints_pred))
    
    accuracy_activities = accuracy_score(activities_true, activities_pred)
    precision_activities = precision_score(activities_true, activities_pred, average='weighted')
    recall_activities = recall_score(activities_true, activities_pred, average='weighted')
    f1_activities = f1_score(activities_true, activities_pred, average='weighted')
    
    accuracy_categories = accuracy_score(categories_true, categories_pred)
    precision_categories = precision_score(categories_true, categories_pred, average='weighted')
    recall_categories = recall_score(categories_true, categories_pred, average='weighted')
    f1_categories = f1_score(categories_true, categories_pred, average='weighted')
    
    mtl_score = (accuracy_activities + accuracy_categories) / 2
    
    print(f"Metrics for {loader_type}:\nKeypoint MSE: {mse_keypoints},\nKeypoint RMSE: {rmse_keypoints},\nKeypoint MAE: {mae_keypoints},\nActivity Accuracy: {accuracy_activities},\nActivity Precision: {precision_activities},\nActivity Recall: {recall_activities},\nActivity F1-score: {f1_activities},\nCategory Accuracy: {accuracy_categories},\nCategory Precision: {precision_categories},\nCategory Recall: {recall_categories},\nCategory F1-score: {f1_categories},\nMTL Score: {mtl_score}")

In [17]:
def validate(model, loader, images_to_draw, loader_type):
    predictions = predict(model, loader)

    calculate_metrics(predictions['true_keypoints'], predictions['pred_keypoints'], predictions['true_activities'], 
                      predictions['pred_activities'], predictions['true_categories'], predictions['pred_categories'], loader_type)

    for i in range(images_to_draw):
        draw(predictions['pred_keypoints'][i], predictions['inputs'][i], predictions['pred_activities'][i], predictions['pred_categories'][i])

In [18]:
class KeypointActivityModel(nn.Module):
    def __init__(self, num_keypoints, num_activities, num_categories):
        super(KeypointActivityModel, self).__init__()
        
        self.backbone = models.resnet50(pretrained=True)    
        
        self.keypoint_fc1 = nn.Linear(1000, 256)
        self.keypoint_fc2 = nn.Linear(256, num_keypoints)
        
        self.activity_fc1 = nn.Linear(1000, 256)
        self.activity_fc2 = nn.Linear(256, num_activities)
        
        self.category_fc1 = nn.Linear(1000, 256)
        self.category_fc2 = nn.Linear(256, num_categories)
        
    def forward(self, x):
        features = self.backbone(x)
        
        x_keypoints = nn.functional.relu(self.keypoint_fc1(features))
        keypoints = self.keypoint_fc2(x_keypoints)
        
        x_activity = nn.functional.relu(self.activity_fc1(features))
        activities = self.activity_fc2(x_activity)
        
        x_category = nn.functional.relu(self.category_fc1(features))
        categories = self.category_fc2(x_category)
        
        return keypoints, activities, categories

In [23]:
model = KeypointActivityModel(num_keypoints=keypoints_count, num_activities=activities_count, num_categories=categories_count).to(device)
model



KeypointActivityModel(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequent

In [24]:
learning_rate = 0.001
num_epochs = 10

criterion_keypoints = nn.MSELoss()
criterion_activities = nn.CrossEntropyLoss()
criterion_categories = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [25]:
model, train_losses, val_losses = train(model, train_loader, val_loader, optimizer, criterion_keypoints, criterion_activities, criterion_categories, num_epochs)

100%|██████████| 69/69 [07:23<00:00,  6.43s/it]
100%|██████████| 15/15 [00:16<00:00,  1.13s/it]


Epoch [1/10], Train loss: 3677.4348, Val loss: 27249.0321


 26%|██▌       | 18/69 [01:52<05:11,  6.11s/it]

In [None]:
plt.subplot(2, 1, 1)
plt.plot(train_losses, label='Training Losses', color='blue')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Losses')
plt.legend()

plt.subplot(2, 1, 2)
plt.plot(val_losses, label='Validation Losses', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Validation Losses')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
validate(model, val_loader, 10, 'validation')

In [None]:
validate(model, test_loader, 10, 'testing')