# Group Activity Recognition - Kaggle Training Notebook

This notebook pulls code from the GitHub repo and runs training on Kaggle GPUs.
All the actual logic lives in the repo - this file is just the entry point.

Repo: https://github.com/AbdelRahman-Madboly/Group_Activity_Recognition

In [None]:
# Install dependencies
!pip install -q scikit-learn tqdm seaborn

In [None]:
# Clone the repo (replace with your actual repo URL)
import os

REPO_URL = 'https://github.com/AbdelRahman-Madboly/Group_Activity_Recognition.git'
REPO_DIR = '/kaggle/working/Group_Activity_Recognition'

if os.path.exists(REPO_DIR):
    # Pull latest changes if already cloned
    !cd {REPO_DIR} && git pull
else:
    !git clone {REPO_URL} {REPO_DIR}

# Add repo to path so imports work
import sys
sys.path.insert(0, REPO_DIR)

print('Repo ready:', REPO_DIR)

In [None]:
import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
# Verify dataset paths before training
import os
from config import VIDEOS_ROOT, ANNOT_ROOT

print('Checking dataset paths...')
if os.path.exists(VIDEOS_ROOT):
    videos = [d for d in os.listdir(VIDEOS_ROOT) if os.path.isdir(os.path.join(VIDEOS_ROOT, d))]
    print(f'Videos: {len(videos)} found')
else:
    print(f'Videos root not found: {VIDEOS_ROOT}')
    print('Update DATASET_ROOT in config/config.py')

In [None]:
# Run data sanity checks before training
from tests.test_data import (
    test_annotation_parser,
    test_frame_dataset_loads,
    test_person_crop_dataset_loads,
)
from tests.test_models import (
    test_b1_forward,
    test_person_classifier_forward,
)

test_annotation_parser()
test_frame_dataset_loads()
test_person_crop_dataset_loads()
test_b1_forward()
test_person_classifier_forward()
print('All checks passed. Ready to train.')

## Train Baseline B1

In [None]:
from training.train_b1 import train as train_b1

model_b1, history_b1 = train_b1(
    num_epochs=20,
    batch_size=16,
    lr=1e-4,
)

## Train Baseline B3

In [None]:
from training.train_b3 import train as train_b3

train_b3()

## Explore Dataset (optional)

In [None]:
from utils.visualization import visualize_frame, print_dataset_statistics
from config import TRAIN_VIDEOS

# Show class distributions for the training set
print_dataset_statistics(TRAIN_VIDEOS)

In [None]:
# Visualize a sample frame with player bounding boxes
visualize_frame(video_id=1, clip_id=9575)