# Fine-Tuning the model

The script is to fine-tune the KTP's Fall-Detection model with the newly reconstructed data.

Part of the code is provided from the KTP project (Model Architecture, OFDataset)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset, Dataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
import matplotlib.pyplot as plt
from datetime import datetime
import cv2
import seaborn as sns
import os
import random
import gc
import pandas as pd


# Set seeds for reproducibility
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.cuda.empty_cache()
gc.collect()

class FallDetectionCNN(nn.Module):
    def __init__(self):
        super(FallDetectionCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv3d(2, 64, (3, 3, 3), padding=1)
        self.bn1 = nn.BatchNorm3d(64)
        self.conv2 = nn.Conv3d(64, 128, (3, 3, 3), padding=1)
        self.bn2 = nn.BatchNorm3d(128)
        self.conv3 = nn.Conv3d(128, 256, (3, 3, 3), padding=1)
        self.bn3 = nn.BatchNorm3d(256)
        self.conv4 = nn.Conv3d(256, 256, (3, 3, 3), padding=1)
        self.bn4 = nn.BatchNorm3d(256)

        # Global average pooling
        self.global_avg_pool = nn.AdaptiveAvgPool3d(1)

        # Fully connected layers
        self.fc1 = nn.Linear(256, 128)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x = F.gelu(self.bn1(self.conv1(x)))
        x = F.max_pool3d(x, 2)
        x = F.gelu(self.bn2(self.conv2(x)))
        x = F.max_pool3d(x, 2)
        x = F.gelu(self.bn3(self.conv3(x)))
        # x = F.max_pool3d(x, 2)
        x = F.gelu(self.bn4(self.conv4(x)))
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)
        x = F.gelu(self.fc1(x))
        x = self.dropout1(x)
        x = F.gelu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

In [3]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision.transforms import Compose, Normalize
from concurrent.futures import ThreadPoolExecutor

class OpticalFlow3DDataset(Dataset):
    def __init__(self, base_folder):
        self.base_folder = base_folder
        self.file_paths = []

        # Collect all .npy file paths
        for root, _, files in os.walk(base_folder):
            for file in files:
                if file.endswith(".npy"):
                    file_path = os.path.join(root, file)
                    self.file_paths.append(file_path)

        # Define a helper function to load a label from a file
        def load_label(file_path):
            data = np.load(file_path, allow_pickle=True).item()
            return data['label']

        # Load labels in parallel using ThreadPoolExecutor
        with ThreadPoolExecutor(max_workers=12) as executor:
            self.labels = list(executor.map(load_label, self.file_paths))

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        data = np.load(file_path, allow_pickle=True).item()
        if data['array'].ndim == 0:
            raise ValueError(f"Zero-dimensional array in file: {file_path}")
        grayscale_sequence = np.expand_dims(data['array'][..., 0], axis=-1)
        optical_flow_sequence = data['array'][..., 1:3]

        combined_sequence = np.concatenate([grayscale_sequence, optical_flow_sequence], axis=-1)
        combined_sequence = np.transpose(combined_sequence, (3, 0, 1, 2))

        label = int(data['label'])
        if label in range(1, 6):
            label = 1
        else:
            label = 0

        return torch.tensor(combined_sequence, dtype=torch.float32), torch.tensor(label, dtype=torch.long)



In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = '/content/drive/MyDrive/Herts - BSc /3rd Year/FYP/trained_models/fine-tuning.pth'
model = FallDetectionCNN().to(device)
model.load_state_dict(torch.load(model_path))
model.eval()

FallDetectionCNN(
  (conv1): Conv3d(2, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn2): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn3): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn4): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (global_avg_pool): AdaptiveAvgPool3d(output_size=1)
  (fc1): Linear(in_features=256, out_features=128, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc3): L

In [5]:
import torch.optim as optim
learning_rate = 0.000001
optimizer = optim.Adam(model.parameters(), lr = learning_rate, weight_decay = 1e-5)

In [6]:
version_number = 'v10.10'
# Data root directory (
    # To get this, Run the Generate.ipynb to reconstruct the dataset
    # Run the Chunk-Prepaireo-for-Fine-Tuning-04.ipynb to rename the directories Then process to npy files and finaly label them
data_root = f'/content/drive/MyDrive/Herts - BSc /3rd Year/FYP/trained_models/vae_model_{version_number}/videos/processed-npy-files'

# Create the dataset
dataset = OpticalFlow3DDataset(data_root)
print(len(dataset))

621


In [7]:
# Split the dataset
train_val_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
train_dataset, val_dataset = train_test_split(train_val_dataset, test_size=0.2, random_state=42)

# Create DataLoaders
dataloader_train = DataLoader(train_dataset, batch_size=8, shuffle=True)
dataloader_val = DataLoader(val_dataset, batch_size=8, shuffle=False)
dataloader_test = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [8]:
from collections import Counter
import numpy as np

binary_labels = [1 if int(label) in range(1, 6) else 0 for label in dataset.labels]

# Count the each label.
label_counts = Counter(binary_labels)
print("Label balance:", label_counts)

print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")
print(f"Shape of a single training sample: {train_dataset[0][0].shape}")

Label balance: Counter({1: 621})
Number of training samples: 396
Number of validation samples: 100
Number of test samples: 125
Shape of a single training sample: torch.Size([2, 7, 51, 38])


In [9]:
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
import numpy as np
from collections import Counter

binary_labels = np.array([1 if int(label) in range(1, 6) else 0 for label in dataset.labels])
label_counts = Counter(binary_labels)
print("Overall Label balance:", label_counts)

total_samples = len(binary_labels)
class_weights = {cls: total_samples/count for cls, count in label_counts.items()}
print("Class weights:", class_weights)

train_labels = [1 if int(dataset.labels[i]) in range(1, 6) else 0 for i in train_dataset.indices] \
    if hasattr(train_dataset, 'indices') else [1 if int(dataset.labels[i]) in range(1, 6) else 0 for i in range(len(train_dataset))]

sample_weights = [class_weights[label] for label in train_labels]
sampler = WeightedRandomSampler(sample_weights, num_samples=len(train_dataset), replacement=True)

dataloader_train = DataLoader(train_dataset, batch_size=8, sampler=sampler)

print(f"Number of training samples (after balancing): {len(train_dataset)}")


Overall Label balance: Counter({np.int64(1): 621})
Class weights: {np.int64(1): 1.0}
Number of training samples (after balancing): 396


In [18]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0.0
    with torch.no_grad():
        for batch_features, batch_labels in dataloader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(batch_labels.cpu().numpy())

    accuracy, precision, recall, specificity, f1 = compute_metrics(all_labels, all_preds)
    avg_loss = total_loss / len(dataloader)

    return avg_loss, accuracy, precision, recall, specificity, f1

def compute_metrics(true_labels, predictions):
    labels = sorted(list(set(true_labels) | set(predictions)))

    # Single-class case (Falls only)
    if len(labels) < 2:
        accuracy = accuracy_score(true_labels, predictions)
        precision = 1.0 if all(p == labels[0] for p in predictions) else 0.0
        recall = 1.0
        specificity = 1.0
        f1 = f1_score(true_labels, predictions, zero_division=1)
        return accuracy, precision, recall, specificity, f1

    # Two-class case (Falls and non-falls)
    tn, fp, fn, tp = confusion_matrix(true_labels, predictions).ravel()
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, zero_division=1)
    recall = recall_score(true_labels, predictions, zero_division=1)
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 1.0
    f1 = f1_score(true_labels, predictions, zero_division=1)
    return accuracy, precision, recall, specificity, f1


# Model Evaluation before Fine tuning

In [11]:
test_npy_files = '/content/drive/MyDrive/Herts - BSc /3rd Year/FYP/Workflow/Final-WorkFlow/test_set_npy/'
test_dataset = OpticalFlow3DDataset(test_npy_files)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

### Because the training dataset contains only falls (One class) it's hard to get meaningfull evaluation results

In [19]:
criterion = nn.CrossEntropyLoss()
initial_loss, initial_acc, initial_prec, initial_rec, initial_spec, initial_f1 = evaluate_model(model, test_loader, criterion, device)

In [20]:
print("Initial Evaluation:")
print(f"Loss: {initial_loss:.4f}")
print(f"Accuracy: {initial_acc:.4f}")
print(f"Precision: {initial_prec:.4f}")
print(f"Recall: {initial_rec:.4f}")
print(f"Specificity: {initial_spec:.4f}")
print(f"F1-Score: {initial_f1:.4f}")

Initial Evaluation:
Loss: 6.6087
Accuracy: 0.0400
Precision: 1.0000
Recall: 0.0400
Specificity: 1.0000
F1-Score: 0.0769


# Model Training

In [15]:
fine_tune_epochs = 10
fine_tune_lr = 0.000001
fine_tune_weight_decay = 1e-5

model_finetune = FallDetectionCNN().to(device)
model_finetune.load_state_dict(torch.load(model_path, map_location=device))

optimizer = optim.Adam(model_finetune.parameters(), lr=fine_tune_lr, weight_decay=fine_tune_weight_decay)

train_losses = []
val_losses = []

for epoch in range(fine_tune_epochs):
    model_finetune.train()
    running_loss = 0.0
    for inputs, labels in dataloader_train:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model_finetune(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_losses.append(running_loss / len(dataloader_train))

    # Validation
    model_finetune.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in dataloader_val:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model_finetune(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_losses.append(val_loss / len(dataloader_val))

    print(f"Epoch {epoch+1}/{fine_tune_epochs} - Train Loss: {train_losses[-1]:.4f} | Val Loss: {val_losses[-1]:.4f}")


Epoch 1/10 - Train Loss: 1.1418 | Val Loss: 1.3709
Epoch 2/10 - Train Loss: 0.8865 | Val Loss: 0.7930
Epoch 3/10 - Train Loss: 0.6969 | Val Loss: 0.7560
Epoch 4/10 - Train Loss: 0.6415 | Val Loss: 0.5210
Epoch 5/10 - Train Loss: 0.5559 | Val Loss: 0.5477
Epoch 6/10 - Train Loss: 0.3989 | Val Loss: 0.4095
Epoch 7/10 - Train Loss: 0.3834 | Val Loss: 0.4461
Epoch 8/10 - Train Loss: 0.2829 | Val Loss: 0.3359
Epoch 9/10 - Train Loss: 0.2869 | Val Loss: 0.3316
Epoch 10/10 - Train Loss: 0.2286 | Val Loss: 0.2558


# Re-evaluate

In [17]:
final_loss, final_acc, final_prec, final_rec, final_spec, final_f1 = evaluate_model(model_finetune, test_loader, criterion, device)
print("\nRe-evaluation After Fine-tuning:")
print(f"Loss: {final_loss:.4f}")
print(f"Accuracy: {final_acc:.4f}")
print(f"Precision: {final_prec:.4f}")
print(f"Recall: {final_rec:.4f}")
print(f"Specificity: {final_spec:.4f}")
print(f"F1-Score: {final_f1:.4f}")


Re-evaluation After Fine-tuning:
Loss: 23.4968
Accuracy: 0.3358
Precision: 0.2115
Recall: 0.1833
Specificity: 0.4570
F1-Score: 0.1964
