# Results Fully Connected

In This notebook the results on the test split for the tuned Resnet50 FC model are generated

### imports

In [None]:
from court_dataset import CourtDataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision.models as models
import torch
from tqdm import tqdm
from torchvision.transforms import functional as F
import numpy as np
import pandas as pd

### Functions

In [2]:
def create_dataloader(dataset_path, batch_size):
    test_dataset = CourtDataset(
        path=dataset_path,
        split="test",
        input_height=720,
        input_width=1280,
        model_height=288,
        model_width=512,
        augment=False,
        selected_points=None
    )
    
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

    return test_loader, test_dataset


def create_resnet50(num_coordinates=15):
    # Load pretrained ResNet50
    model = models.resnet50(weights='IMAGENET1K_V2')
    
    # Modify the output layer
    num_outputs = num_coordinates * 2  
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 256), 
        nn.ReLU(),
        nn.Linear(256, num_outputs)           
    )
    return model



def compute_mse(points, positions, img_size):
    #loop over points
    mse = []

    for i in range(len(points)):
        # get the point
        point = points[i]
        # get the position
        position = positions[i]

        # if the point is outside the image, skip
        if point[0] < 0 or point[0] >= img_size[0] or point[1] < 0 or point[1] >= img_size[1]:
            continue

        # if the position is -1,-1 return the maximum distance
        if position[0] == -1 and position[1] == -1:
            continue
        
        # calculate the distance
        distance = np.linalg.norm(np.array(point) - np.array(position))

        # add to mse
        mse.append(distance**2)

    # return the mean
    return np.mean(mse)


def compute_counts(points, positions, img_size, threshold=4):
    # check that len is the same
    assert len(points) == len(positions) 
    
    #loop over points
    tp = 0
    fp = 0
    fn = 0
    tn = 0

    for i in range(len(points)):
        # get the point
        point = points[i]
        # get the position
        position = positions[i]
        
        # calculate the distance
        distance = np.linalg.norm(np.array(point) - np.array(position))

        # add to tp, fp, fn, tn
        if distance <= threshold:
            # if point is outside the frame
            if point[0] < 0 or point[0] >= img_size[0] or point[1] < 0 or point[1] >= img_size[1]:
                tn += 1
            else:
                tp += 1
        else:
            #if point is outside the frame
            if point[0] < 0 or point[0] >= img_size[0] or point[1] < 0 or point[1] >= img_size[1]:
                fn += 1
            else:
                fp += 1

    # return the metrics
    return tp, fp, tn, fn


def calculate_metrics(tp, fp, tn, fn):
    # Avoid division by zero for precision and recall
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0

    # F1-score calculation
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

    # Accuracy calculation
    total = tp + fp + tn + fn
    accuracy = (tp + tn) / total if total > 0 else 0.0

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }
    

def validate(model, val_loader, device, dataset, criterion=torch.nn.MSELoss(), threshold=4, img_size=(512, 288)):
    model.eval()  # Set the model to evaluation mode
    tp, fp, tn, fn = 0, 0, 0, 0  # Initialize counts
    losses = []
    mse_scores = []
    predictions = []


    with tqdm(total=len(val_loader), desc="Validation", unit="batch") as pbar:
        for batch_idx, batch in enumerate(val_loader):
            with torch.no_grad():
                # Prepare data
                inputs = batch[0].float().to(device)  # Input images
                keypoints_gt = batch[1].float().to(device)  # Ground truth keypoints
                idx = batch[2].cpu().numpy()  # Sample indices

                outputs = model(inputs)  # Predicted keypoints
                
                # Compute loss
                loss = criterion(outputs, keypoints_gt.view(outputs.size()))  # Match shapes for regression
                losses.append(loss.item())

                # First loop: Iterate over each item in the batch
                for i in range(outputs.shape[0]):  # Loop over batch samples
                    # get predictions
                    positions = outputs[i].view(-1, 2).cpu().numpy()
                    # move to cpu
                    keypoints = keypoints_gt[i].cpu().numpy()

                    # get entry from dataset
                    entry = dataset.data.iloc[idx[i]].to_dict()
                    
                    #Compute MSE for the batch
                    mse = compute_mse(keypoints, positions, img_size)
                    mse_scores.append(mse)

                    predictions.append({
                        "idx": idx[i],
                        "points_transformed": keypoints,
                        "positions": positions,
                        "mse":mse,
                        **entry})

                    # Compute TP, FP, TN, FN for the batch
                    item_tp, item_fp, item_tn, item_fn = compute_counts(keypoints, positions, img_size, threshold)
                    tp += item_tp
                    fp += item_fp
                    tn += item_tn
                    fn += item_fn

                # Update the tqdm bar
                pbar.set_postfix({
                    'loss': round(np.mean(losses), 6)
                })
                pbar.update(1)

    # Calculate overall metrics
    mean_loss = np.mean(losses)
    mean_mse = np.mean(mse_scores)
    metrics = calculate_metrics(tp, fp, tn, fn)

    #convert predictions to dataframe
    predictions_df = pd.DataFrame(predictions)

    return mean_loss, mean_mse, metrics, predictions_df


def load_model(best_model_path):
    selected_gpus = [0, 1]
    device = torch.device(f'cuda:{selected_gpus[0]}' if torch.cuda.is_available() else 'cpu')

    # Recreate the model architecture
    model = create_resnet50(15)
    model.load_state_dict(torch.load(best_model_path, map_location=device))  # Load weights
    model = model.to(device)  # Move the model to the specified device

    print(f"Model loaded from: {best_model_path}")
    return model

## Resnet50

In [None]:
best_model_path = "./exps_hyperparameter/serene-sweep-10_resnet50_adam_l1/model_best.pth"

model = load_model(best_model_path)

# create dataloader
dataset_path = "../../00_Dataset"
batch_size = 32
val_loader, val_dataset = create_dataloader(dataset_path, batch_size)

  model.load_state_dict(torch.load(best_model_path, map_location=device))  # Load weights


Model loaded from: ./exps_hyperparameter/serene-sweep-10_resnet50_adam_l1/model_best.pth
Samples: 199


In [4]:
selected_gpus = [0,1]
device = torch.device(f'cuda:{selected_gpus[0]}')
criterion = torch.nn.MSELoss()

# validate the model
mean_loss, mean_mse, metrics, predictions_df = validate(model, val_loader, device, val_dataset, criterion=criterion)

Validation: 100%|██████████| 7/7 [00:01<00:00,  4.98batch/s, loss=146]


In [5]:
print(mean_loss)
print(mean_mse)
print(metrics)

145.63202503749304
221.2128
{'accuracy': 0.2576214405360134, 'precision': 0.2606924643584521, 'recall': 0.9528535980148883, 'f1': 0.40938166311300633}


In [6]:
predictions_df.head()

Unnamed: 0,idx,points_transformed,positions,mse,subset,video,clip,frame,points
0,0,"[[179.2, 179.2], [197.6, 178.40001], [300.0, 1...","[[152.11049, 174.79532], [175.00192, 174.58383...",1167.539917,New,Video_1,clip_6,250,"{'top_left_corner': [448, 448], 'top_left_sing..."
1,1,"[[179.2, 179.2], [197.6, 178.40001], [300.0, 1...","[[153.07959, 175.55824], [175.8067, 175.34052]...",1168.081787,New,Video_1,clip_6,300,"{'top_left_corner': [448, 448], 'top_left_sing..."
2,2,"[[179.2, 179.2], [197.6, 178.40001], [300.0, 1...","[[152.45457, 175.63797], [175.21523, 175.41908...",1167.584839,New,Video_1,clip_6,350,"{'top_left_corner': [448, 448], 'top_left_sing..."
3,3,"[[197.6, 81.6], [208.8, 81.6], [273.6, 80.4], ...","[[203.7736, 83.53658], [218.14622, 83.23999], ...",1125.257324,New,Video_2,clip_7,300,"{'top_left_corner': [494, 204], 'top_left_sing..."
4,4,"[[197.6, 81.6], [208.8, 81.6], [273.6, 80.4], ...","[[203.29466, 83.29217], [217.722, 82.995384], ...",1129.026611,New,Video_2,clip_7,350,"{'top_left_corner': [494, 204], 'top_left_sing..."


In [None]:
predictions_df.to_csv("results/FC_test_results.csv")