# Data Preview - HAR Theft Detection

This notebook helps you visualize the dataset and verify data loading.

In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from src.datasets import VideoClipDataset, create_dummy_dataset
from src.transforms import VideoTransform

%matplotlib inline

## Create Dummy Dataset (for testing)

In [None]:
# Create a small dummy dataset
train_csv, val_csv = create_dummy_dataset(
    output_dir='../data',
    num_videos_per_class=5,
    num_frames=32
)

## Load Dataset

In [None]:
# Load training dataset
dataset = VideoClipDataset(
    metadata_csv=train_csv,
    clips_dir='../data/clips',
    num_frames=32,
    mode='train'
)

print(f"Dataset size: {len(dataset)}")
print(f"Classes: {dataset.labels}")
print(f"\nClass distribution:")
print(dataset.metadata['label'].value_counts())

## Visualize Samples

In [None]:
def visualize_clip(clip_tensor, label, num_frames_to_show=8):
    """Visualize frames from a video clip"""
    # clip_tensor shape: (T, C, H, W)
    T = clip_tensor.shape[0]
    indices = np.linspace(0, T-1, num_frames_to_show, dtype=int)
    
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.flatten()
    
    for i, idx in enumerate(indices):
        frame = clip_tensor[idx].permute(1, 2, 0).numpy()
        # Denormalize
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        frame = frame * std + mean
        frame = np.clip(frame, 0, 1)
        
        axes[i].imshow(frame)
        axes[i].set_title(f'Frame {idx}')
        axes[i].axis('off')
    
    plt.suptitle(f'Label: {label}', fontsize=16)
    plt.tight_layout()
    plt.show()

# Visualize samples from each class
for class_name in dataset.labels:
    # Find a sample from this class
    class_idx = dataset.label_to_idx[class_name]
    for i in range(len(dataset)):
        sample = dataset[i]
        if sample['label'] == class_idx:
            visualize_clip(sample['video'], class_name)
            break

## Check Data Loader

In [None]:
from torch.utils.data import DataLoader

# Create dataloader
dataloader = DataLoader(
    dataset,
    batch_size=4,
    shuffle=True,
    num_workers=0
)

# Get a batch
batch = next(iter(dataloader))

print(f"Batch video shape: {batch['video'].shape}")
print(f"Batch labels: {batch['label']}")
print(f"Batch clip IDs: {batch['clip_id']}")

## Class Weights

In [None]:
# Compute class weights for imbalanced data
class_weights = dataset.get_class_weights()

print("Class weights:")
for class_name, weight in zip(dataset.labels, class_weights):
    print(f"  {class_name}: {weight:.3f}")