## Tracking With Trained ReID

In [1]:
import os
import re
import cv2
import torch
import random
import numpy as np
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict
import os
import re
import cv2
import torch
import random
import numpy as np
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict
from PIL import Image
import torchvision.transforms as T
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset


def load_lazy_dataset_from_directory(base_dir, transform=None, object_transform=None, seq_length=5):

    video_paths = []
    annotation_paths = []
    lazy_datasets = []
    
    frames_dir = os.path.join(base_dir, "Frames")
    labels_dir = os.path.join(base_dir, "MOT labels")
    
    if not os.path.exists(frames_dir) or not os.path.exists(labels_dir):
        raise FileNotFoundError(f"Frames or labels directory does not exist at {base_dir}")
    
                    if os.path.isdir(os.path.join(frames_dir, folder))]
    
    for video_folder in video_folders:
        video_frames_path = os.path.join(frames_dir, video_folder)
        gt_file_path = os.path.join(labels_dir, video_folder, "gt", "gt.txt")
        
        if not os.path.exists(gt_file_path):
            print(f"Warning: No annotations found for {video_folder}, skipping")
            continue
        
        video_paths.append(video_frames_path)
        annotation_paths.append(gt_file_path)
        
        frames_loader = LazyFrameLoader(video_frames_path)
        annotations_loader = LazyAnnotationLoader(gt_file_path)
        
        video_dataset = LazyTrackingDataset(
            frames_loader=frames_loader,
            annotations_loader=annotations_loader,
            transform=transform,
            object_transform=object_transform,
            seq_length=seq_length
        )
        
        lazy_datasets.append(video_dataset)
    
    return lazy_datasets, video_paths, annotation_paths

In [2]:
class LazyFrameLoader:
    def __init__(self, video_path):
        self.video_path = video_path
        self.frame_files = sorted(os.listdir(video_path))
        self.frame_id_map = {}
        
        for file in self.frame_files:
            match = re.search(r'\d+', file)
            if match:
                frame_id = int(match.group())
                self.frame_id_map[frame_id] = file
    
    def __getitem__(self, frame_id):
        if frame_id not in self.frame_id_map:
            raise KeyError(f"Frame ID {frame_id} not found in dataset")
            
        file = self.frame_id_map[frame_id]
        img_path = os.path.join(self.video_path, file)
        img = cv2.imread(img_path)
        
        if img is None:
            raise IOError(f"Failed to load image {img_path}")
            
        return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    def keys(self):
        return list(self.frame_id_map.keys())

In [3]:
class LazyAnnotationLoader:
    def __init__(self, gt_path):
        self.gt_path = gt_path
        self.annotations = None    
    def _load_annotations(self):

        annotations = defaultdict(list)
        
        class_mapping = {
            "drowning": 1,
            "not drowning": 2
        }

        with open(self.gt_path, "r") as f:
            for line in f:
                parts = line.strip().split(",")
                
                if len(parts) >= 9: 
                    frame_id, obj_id, x, y, w, h, conf, cls_raw, vis = parts[:9]
                    
                    frame_id = int(float(frame_id))
                    obj_id = int(float(obj_id))
                    x, y, w, h = float(x), float(y), float(w), float(h)
                    conf = float(conf)
                    vis = float(vis)
                    
                    try:
                        cls = int(float(cls_raw))
                    except ValueError:
                        cls_str = cls_raw.strip().lower()
                        if cls_str in class_mapping:
                            cls = class_mapping[cls_str]
                        else:
                            print(f"Warning: Unknown class {cls_raw}, using default class 2")
                            cls = 2 
                    
                    annotations[frame_id].append({
                        "obj_id": obj_id,
                        "bbox": [x, y, w, h],
                        "class": cls,
                        "visibility": vis
                    })
                else:
                    print(f"Warning: Malformed annotation line: {line}")
        
        return annotations
    
    def __getitem__(self, frame_id):
        if self.annotations is None:
            self.annotations = self._load_annotations()
            
        return self.annotations.get(frame_id, [])
    
    def keys(self):
        if self.annotations is None:
            self.annotations = self._load_annotations()
            
        return list(self.annotations.keys())

In [4]:
import os
import torch
from torch.utils.data import Dataset
import torchvision.transforms as T
from PIL import Image
class LazyReIDPersonDataset(Dataset):
    def __init__(self, video_paths, annotation_paths, transform=None):
        self.video_paths = video_paths
        self.annotation_paths = annotation_paths
        self.transform = transform or T.Compose([
            T.ToPILImage(),
            T.Resize((256, 256)),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
        ])
        
        self.index = []
        
        for video_path, annotation_path in zip(video_paths, annotation_paths):
            frames_loader = LazyFrameLoader(video_path)
            annotations_loader = LazyAnnotationLoader(annotation_path)
            
            for frame_id in annotations_loader.keys():
                if frame_id in frames_loader.keys():
                    annots = annotations_loader[frame_id]
                    for ann in annots:
                        obj_id = ann["obj_id"]
                        self.index.append((video_path, frame_id, ann["bbox"], obj_id))

    def __len__(self):
        return len(self.index)

    def __getitem__(self, idx):
        video_path, frame_id, bbox, obj_id = self.index[idx]
        
        img_files = sorted(os.listdir(video_path))
        for file in img_files:
            match = re.search(r'\d+', file)
            if match and int(match.group()) == frame_id:
                img_path = os.path.join(video_path, file)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img_height, img_width = img.shape[:2]
                    
                    x, y, w, h = bbox
                    
                    x_clipped = max(0, int(x))
                    y_clipped = max(0, int(y))
                    x2_clipped = min(img_width, int(x + w))
                    y2_clipped = min(img_height, int(y + h))
                    
                    w_clipped = x2_clipped - x_clipped
                    h_clipped = y2_clipped - y_clipped
                    
                    if w_clipped > 0 and h_clipped > 0:
                        cropped = img[y_clipped:y2_clipped, x_clipped:x2_clipped]
                        
                        if cropped.size > 0:
                            img_tensor = self.transform(cropped)
                            return img_tensor, obj_id
        
        print("can't load, so loading a blank")
        blank = np.zeros((128, 64, 3), dtype=np.uint8)
        return self.transform(blank), obj_id

In [6]:
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import matplotlib.pyplot as plt
import numpy as np

class ReIDModel(nn.Module):
    def __init__(self, num_classes, dropout_rate=0.5, feature_dim=128, backbone='resnet18'):
        super().__init__()
        
        if backbone == 'convnext_tiny':
            convnext = models.convnext_tiny(weights="IMAGENET1K_V1")
            self.backbone = convnext.features 
            self.pool = nn.AdaptiveAvgPool2d((1, 1))
            backbone_dim = 768  
        elif backbone == 'resnet18':
            resnet = models.resnet18(weights="IMAGENET1K_V1")
            self.backbone = nn.Sequential(*list(resnet.children())[:-2])
            self.pool = nn.AdaptiveAvgPool2d((1, 1))
            backbone_dim = 512 
        else:
            raise ValueError(f"Unsupported backbone: {backbone}")
        
        self.dropout = nn.Dropout(p=dropout_rate)
        self.embedding = nn.Linear(backbone_dim, feature_dim)
        self.dropout_feat = nn.Dropout(p=dropout_rate)
        self.classifier = nn.Linear(feature_dim, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.pool(x).view(x.size(0), -1)
        x = self.dropout(x)
        feat = self.embedding(x)
        feat = self.dropout_feat(feat)
        out = self.classifier(feat)
        return F.normalize(feat, dim=1), out
# ##with hyperparameter tuning
# class ReIDModel(nn.Module):
#     def __init__(self, num_classes, backbone_name='resnet18', dropout_rate=0.5, feature_dim=128, input_dim=512):
#         super().__init__()
#         self.backbone, _ = get_backbone(backbone_name)  
#         self.pool = nn.AdaptiveAvgPool2d((1, 1))
#         self.dropout = nn.Dropout(p=dropout_rate)
#         self.embedding = nn.Linear(input_dim, feature_dim)  
#         self.dropout_feat = nn.Dropout(p=dropout_rate)
#         self.classifier = nn.Linear(feature_dim, num_classes)
    
#     def forward(self, x):
#         x = self.backbone(x)
#         x = self.pool(x).view(x.size(0), -1)
#         x = self.dropout(x)
#         feat = self.embedding(x)
#         feat = self.dropout_feat(feat)
#         out = self.classifier(feat)
#         return feat, out

In [5]:
from sklearn.model_selection import train_test_split

def split_dataset(dataset, train_ratio=0.7, val_ratio=0.15):
    total = len(dataset)
    indices = list(range(total))
    train_idx, test_idx = train_test_split(indices, test_size=1-train_ratio)
    val_relative = val_ratio / (1 - train_ratio)
    val_idx, test_idx = train_test_split(test_idx, test_size=1 - val_relative)

    from torch.utils.data import Subset
    return Subset(dataset, train_idx), Subset(dataset, val_idx), Subset(dataset, test_idx)


In [7]:
def train_reid(model, train_loader, val_loader, epochs=10, lr=1e-3, patience=5, 
               optimizer_type='adam', scheduler_type='plateau'):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    if optimizer_type.lower() == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    elif optimizer_type.lower() == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    elif optimizer_type.lower() == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    else:
        raise ValueError(f"Unsupported optimizer: {optimizer_type}")
    
    if scheduler_type.lower() == 'plateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=2)
    elif scheduler_type.lower() == 'step':
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    elif scheduler_type.lower() == 'cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    else:
        raise ValueError(f"Unsupported scheduler: {scheduler_type}")
    
    criterion = nn.CrossEntropyLoss()
    
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []
    
    best_val_acc = 0.0
    no_improve_epochs = 0
    best_model_state = None
    
    for epoch in range(epochs):
        model.train()
        total_loss, correct = 0, 0
        batch_count = 0
        
        for imgs, labels in train_loader:
            batch_count += 1
            imgs, labels = imgs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            embeddings, logits = model(imgs)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            correct += (logits.argmax(1) == labels).sum().item()
        
        train_loss = total_loss / batch_count
        train_acc = correct / len(train_loader.dataset)
        
        model.eval()
        val_loss, val_correct = 0, 0
        val_batch_count = 0
        
        with torch.no_grad():
            for imgs, labels in val_loader:
                val_batch_count += 1
                imgs, labels = imgs.to(device), labels.to(device)
                embeddings, logits = model(imgs)
                loss = criterion(logits, labels)
                val_loss += loss.item()
                val_correct += (logits.argmax(1) == labels).sum().item()
        
        val_loss = val_loss / val_batch_count
        val_acc = val_correct / len(val_loader.dataset)
        
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)
        
        print(f"Epoch {epoch+1}: Train Loss={train_loss:.3f}, Train Acc={train_acc:.3f}, "
              f"Val Loss={val_loss:.3f}, Val Acc={val_acc:.3f}")
        
        if scheduler_type.lower() == 'plateau':
            scheduler.step(val_acc)
        else:
            scheduler.step()
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict().copy()
            no_improve_epochs = 0
            print(f"New best validation accuracy: {best_val_acc:.3f}")
        else:
            no_improve_epochs += 1
            print(f"No improvement for {no_improve_epochs} epochs")
            
            if no_improve_epochs >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
    
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"Restored best model with validation accuracy: {best_val_acc:.3f}")
    
    plot_training_metrics(train_losses, val_losses, train_accuracies, val_accuracies)
    
    return model


def plot_training_metrics(train_losses, val_losses, train_accuracies, val_accuracies):
    """
    Plot training and validation metrics
    """
    epochs_range = range(1, len(train_losses) + 1)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    ax1.plot(epochs_range, train_losses, 'b-', label='Training Loss', linewidth=2)
    ax1.plot(epochs_range, val_losses, 'r-', label='Validation Loss', linewidth=2)
    ax1.set_title('Training and Validation Loss', fontsize=14, fontweight='bold')
    ax1.set_xlabel('Epochs', fontsize=12)
    ax1.set_ylabel('Loss', fontsize=12)
    ax1.legend(fontsize=10)
    ax1.grid(True, alpha=0.3)
    
    ax2.plot(epochs_range, train_accuracies, 'b-', label='Training Accuracy', linewidth=2)
    ax2.plot(epochs_range, val_accuracies, 'r-', label='Validation Accuracy', linewidth=2)
    ax2.set_title('Training and Validation Accuracy', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Epochs', fontsize=12)
    ax2.set_ylabel('Accuracy', fontsize=12)
    ax2.legend(fontsize=10)
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nFinal Training Accuracy: {train_accuracies[-1]:.3f}")
    print(f"Final Validation Accuracy: {val_accuracies[-1]:.3f}")
    print(f"Best Validation Accuracy: {max(val_accuracies):.3f}")

# ##with hyperparameters tuning
# def train_reid(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=10, patience=5):
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     model.to(device)

#     best_val_acc = 0.0
#     no_improve_epochs = 0
#     best_model_state = None

#     for epoch in range(epochs):
#         model.train()
#         total_loss, correct = 0, 0
#         batch_count = 0
#         for imgs, labels in train_loader:
#             batch_count += 1
#             imgs, labels = imgs.to(device), labels.to(device)
#             optimizer.zero_grad()
#             _, logits = model(imgs)
#             loss = criterion(logits, labels)
#             loss.backward()
#             optimizer.step()
#             total_loss += loss.item()
#             correct += (logits.argmax(1) == labels).sum().item()
            
#         total_loss += loss.item()
#         train_acc = correct / len(train_loader.dataset)

#         model.eval()
#         val_correct, val_loss = 0, 0
#         val_batch_count = 0
#         with torch.no_grad():
#             for imgs, labels in val_loader:
#                 val_batch_count += 1
#                 imgs, labels = imgs.to(device), labels.to(device)
#                 _, logits = model(imgs)
#                 loss = criterion(logits, labels)
#                 val_loss += loss.item()
#                 val_correct += (logits.argmax(1) == labels).sum().item()

#         val_acc = val_correct / len(val_loader.dataset)
#         val_loss = val_loss / val_batch_count
        
#         print(f"Epoch {epoch+1}: Train Loss={total_loss/batch_count:.3f}, Train Acc={train_acc:.3f}, "
#                f"Val Loss={val_loss:.3f}, Val Acc={val_acc:.3f}")
        
#         scheduler.step(val_acc)
        
#         if val_acc > best_val_acc:
#             best_val_acc = val_acc
#             best_model_state = deepcopy(model.state_dict())
#             no_improve_epochs = 0
#             print(f"New best validation accuracy: {best_val_acc:.3f}")
#         else:
#             no_improve_epochs += 1
#             if no_improve_epochs >= patience:
#                 break

#     if best_model_state:
#         model.load_state_dict(best_model_state)
#         print(f"Restored best model with validation accuracy: {best_val_acc:.3f}")
#     return model, best_val_acc



In [8]:
from sklearn.metrics import classification_report

def evaluate_reid(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    y_true, y_pred = [], []
    
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)
            embeddings, logits = model(imgs) 
            y_true.extend(labels.tolist())
            y_pred.extend(logits.argmax(1).cpu().tolist()) 
    
    print(classification_report(y_true, y_pred))

In [9]:

class LazyTrackingDataset(Dataset):
    def __init__(self, frames_loader, annotations_loader, transform=None, object_transform=None, seq_length=5):
        self.frames_loader = frames_loader
        self.annotations_loader = annotations_loader
        self.transform = transform
        self.object_transform = object_transform
        self.seq_length = seq_length
        
        self.frame_keys = sorted(frames_loader.keys())
    
    def __len__(self):
        return max(0, len(self.frame_keys) - self.seq_length + 1)
    
    def __getitem__(self, index):
        frame_ids = self.frame_keys[index: index + self.seq_length]
        frames = []
        
        objects_across_sequence = {}
        
        for frame_id in frame_ids:
            frame = self.frames_loader[frame_id]
            
            if self.transform:
                transformed_frame = self.transform(frame)
            else:
                transformed_frame = torch.from_numpy(frame.transpose(2, 0, 1)).float() / 255.0
                
            frames.append(transformed_frame)
            
            frame_idx = frame_ids.index(frame_id)
            for ann in self.annotations_loader[frame_id]:
                obj_id = ann["obj_id"]
                
                if obj_id not in objects_across_sequence:
                    objects_across_sequence[obj_id] = {
                        "class": ann["class"],
                        "bboxes": [None] * self.seq_length,
                        "appearances": [None] * self.seq_length,
                        "valid_mask": [0] * self.seq_length
                    }
                
                objects_across_sequence[obj_id]["bboxes"][frame_idx] = ann["bbox"]
                objects_across_sequence[obj_id]["valid_mask"][frame_idx] = 1
                
                x, y, w, h = map(int, ann["bbox"])
                if x >= 0 and y >= 0 and w > 0 and h > 0 and x+w <= frame.shape[1] and y+h <= frame.shape[0]:
                    crop = frame[y:y+h, x:x+w]
                    
                    if self.object_transform and crop.size > 0:
                        crop_tensor = self.object_transform(crop)
                    else:
                        crop_tensor = torch.from_numpy(crop.transpose(2, 0, 1)).float() / 255.0
                        
                    objects_across_sequence[obj_id]["appearances"][frame_idx] = crop_tensor
        
        frame_sequence = torch.stack(frames)
        
        for obj_id in objects_across_sequence:
            objects_across_sequence[obj_id]["valid_mask"] = torch.tensor(
                objects_across_sequence[obj_id]["valid_mask"], dtype=torch.int64
            )
            
            appearances = objects_across_sequence[obj_id]["appearances"]
            valid_appearances = [a for a in appearances if a is not None]
            
            if valid_appearances:
                template = valid_appearances[0]
                
                padded_appearances = []
                for app in appearances:
                    if app is not None:
                        padded_appearances.append(app)
                    else:
                        padded_appearances.append(torch.zeros_like(template))
                
                objects_across_sequence[obj_id]["appearances"] = torch.stack(padded_appearances)
            else:
                default_shape = (3, 64, 128) 
                objects_across_sequence[obj_id]["appearances"] = torch.zeros(
                    (self.seq_length, *default_shape), dtype=torch.float32
                )
            
            padded_bboxes = []
            for bbox in objects_across_sequence[obj_id]["bboxes"]:
                if bbox is not None:
                    padded_bboxes.append(torch.tensor(bbox, dtype=torch.float32))
                else:
                    padded_bboxes.append(torch.zeros(4, dtype=torch.float32))
            
            objects_across_sequence[obj_id]["bboxes"] = torch.stack(padded_bboxes)
            
        return {
            "frames": frame_sequence,
            "objects": objects_across_sequence
        }

In [11]:
class RemappedDataset(Dataset):
    def __init__(self, dataset, id_mapping):
        self.dataset = dataset
        self.id_mapping = id_mapping
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        img, orig_id = self.dataset[idx]
        remapped_id = self.id_mapping[orig_id]
        return img, remapped_id

### training with grid search

In [12]:
# from itertools import product
# from torchvision import models
# import torch.nn as nn
# import torch.optim as optim

# search_space = {
#     #"dropout_rate": [0.0,0.1,0.2, 0.3, 0.5],
#     #"feature_dim": [128, 256, 512],
#     #"backbone": ["resnet18", "resnet34", "efficientnet_v2_s","mobilenet_v3_large", "convnext_tiny","swin_t", "mobilenet_v3_small","efficientnet_b0", "shufflenet_v2_x0_5"  ],
#     "optimizer": ["adam"],
#     "scheduler": ["step", "plateau"],
#     "lr": [2e-4, 3e-4,4e-4,  5e-4],
# }



In [13]:
# def get_backbone(name):
#     if name == "resnet18":
#         base = models.resnet18(weights="IMAGENET1K_V1")
#         input_dim = 512
#     elif name == "resnet34":
#         base = models.resnet34(weights="IMAGENET1K_V1")
#         input_dim = 512
#     elif name == "efficientnet_v2_s":
#         base = models.efficientnet_v2_s(weights="IMAGENET1K_V1")
#         input_dim = 1280
#     elif name == "mobilenet_v3_large":
#         base = models.mobilenet_v3_large(weights="IMAGENET1K_V1")
#         input_dim = 960
#     elif name == "convnext_tiny":
#         base = models.convnext_tiny(weights="IMAGENET1K_V1")
#         input_dim = 768
#     elif name == "swin_t":
#         base = models.swin_t(weights="IMAGENET1K_V1")
#         input_dim = 768
#     elif name == "mobilenet_v3_small":
#         base = models.mobilenet_v3_small(weights="IMAGENET1K_V1")
#         input_dim = 576
#     elif name == "efficientnet_b0":
#         base = models.efficientnet_b0(weights="IMAGENET1K_V1")
#         input_dim = 1280
#     elif name == "shufflenet_v2_x0_5":
#         base = models.shufflenet_v2_x0_5(weights="IMAGENET1K_V1")
#         input_dim = 1024
#     else:
#         raise ValueError("Unsupported backbone")
    
#     backbone = nn.Sequential(*list(base.children())[:-2])
#     return backbone, input_dim

In [14]:
# def train_with_config(config):
#     backbone, input_dim = get_backbone(config["backbone"])
    
#     model = ReIDModel(
#         num_classes=len(id2label),
#         dropout_rate=config["dropout_rate"],
#         feature_dim=config["feature_dim"],
#         input_dim=input_dim  
#     )
#     model.backbone = backbone
    
#     
#     criterion = nn.CrossEntropyLoss()
    
#     if config["optimizer"] == "adam":
#         optimizer = optim.Adam(model.parameters(), lr=config["lr"])
#     elif config["optimizer"] == "sgd":
#         optimizer = optim.SGD(model.parameters(), lr=config["lr"], momentum=0.9)
    
#     if config["scheduler"] == "plateau":
#         scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)
#     elif config["scheduler"] == "step":
#         scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    
#     model, best_val_acc = train_reid(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=100, patience=10)
#     return model, best_val_acc

In [15]:
# lazy_datasets, video_paths, annotation_paths = load_lazy_dataset_from_directory(
#     "/kaggle/input/aquagaurd-drowning-tracking-dataset/Drowning Tracking Dataset", 
#     seq_length=25
# )

# reid_dataset = LazyReIDPersonDataset(video_paths, annotation_paths)

# unique_ids = set()
# for _, obj_id in reid_dataset:
#     unique_ids.add(obj_id)
# unique_ids = sorted(unique_ids)
# id2label = {orig: idx for idx, orig in enumerate(unique_ids)}

# remapped_dataset = RemappedDataset(reid_dataset, id2label)

# indices = list(range(len(remapped_dataset)))
# train_indices, val_test_indices = train_test_split(indices, test_size=0.4, random_state=42)
# val_indices, test_indices = train_test_split(val_test_indices, test_size=0.5, random_state=42)

# train_data = Subset(remapped_dataset, train_indices)
# val_data = Subset(remapped_dataset, val_indices)
# test_data = Subset(remapped_dataset, test_indices)

# batch_size=32
# train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
# val_loader = DataLoader(val_data, batch_size=batch_size, num_workers=4)
# test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=4)

In [16]:
# from itertools import product
# from copy import deepcopy
# best_acc = 0
# best_config = None
# best_model = None

# for combo in product(*search_space.values()):
#     keys = list(search_space.keys())
#     config = dict(zip(keys, combo))
#     print(f"\n🚀 Running config: {config}")
    
#     model, val_acc = train_with_config(config)
#     print(f"✅ Validation Accuracy: {val_acc:.4f}")
    
#     if val_acc > best_acc:
#         best_acc = val_acc
#         best_config = config
#         best_model = model
#         print(f"🔥 New best config found with acc: {best_acc:.4f}")


## without grid search

In [17]:

lazy_datasets, video_paths, annotation_paths = load_lazy_dataset_from_directory(
    "/kaggle/input/aquagaurd-drowning-tracking-dataset/Drowning Tracking Dataset", 
    seq_length=25
)

reid_dataset = LazyReIDPersonDataset(video_paths, annotation_paths)

unique_ids = set()
for _, obj_id in reid_dataset:
    unique_ids.add(obj_id)
unique_ids = sorted(unique_ids)
id2label = {orig: idx for idx, orig in enumerate(unique_ids)}

remapped_dataset = RemappedDataset(reid_dataset, id2label)

indices = list(range(len(remapped_dataset)))
train_indices, val_test_indices = train_test_split(indices, test_size=0.4, random_state=42)
val_indices, test_indices = train_test_split(val_test_indices, test_size=0.5, random_state=42)

train_data = Subset(remapped_dataset, train_indices)
val_data = Subset(remapped_dataset, val_indices)
test_data = Subset(remapped_dataset, test_indices)

batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_data, batch_size=batch_size, num_workers=4)
test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=4)

In [None]:
config = {
    'dropout_rate': 0.1, 
    'feature_dim': 512, 
    'backbone': 'convnext_tiny', 
    'optimizer': 'adam', 
    'scheduler': 'plateau', 
    'lr': 0.0002
}
num_epochs = 100

model = ReIDModel(
    num_classes=len(unique_ids),
    dropout_rate=config['dropout_rate'],
    feature_dim=config['feature_dim'],
    backbone=config['backbone']
)

trained_model = train_reid(
    model, 
    train_loader, 
    val_loader, 
    epochs=num_epochs, 
    lr=config['lr'], 
    patience=10,
    optimizer_type=config['optimizer'],
    scheduler_type=config['scheduler']
)

Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /root/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth
100%|██████████| 109M/109M [00:00<00:00, 190MB/s]  


Epoch 1: Train Loss=2.764, Train Acc=0.278, Val Loss=2.333, Val Acc=0.341
New best validation accuracy: 0.341
Epoch 2: Train Loss=1.984, Train Acc=0.425, Val Loss=1.761, Val Acc=0.446
New best validation accuracy: 0.446
Epoch 3: Train Loss=1.284, Train Acc=0.613, Val Loss=1.224, Val Acc=0.629
New best validation accuracy: 0.629
Epoch 4: Train Loss=0.820, Train Acc=0.749, Val Loss=0.990, Val Acc=0.709
New best validation accuracy: 0.709
Epoch 5: Train Loss=0.534, Train Acc=0.835, Val Loss=0.948, Val Acc=0.737
New best validation accuracy: 0.737
Epoch 6: Train Loss=0.358, Train Acc=0.888, Val Loss=0.892, Val Acc=0.761
New best validation accuracy: 0.761
Epoch 7: Train Loss=0.258, Train Acc=0.920, Val Loss=0.980, Val Acc=0.754
No improvement for 1 epochs
Epoch 8: Train Loss=0.204, Train Acc=0.939, Val Loss=0.921, Val Acc=0.779
New best validation accuracy: 0.779
