# 01 - Data Exploration

This notebook explores the tennis/pickleball video dataset:
- Video properties (resolution, fps, duration)
- Frame sampling and visualization
- Label distribution analysis
- Data augmentation preview

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

# Add project root to path
sys.path.insert(0, os.path.abspath('..'))

%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Video Properties

In [None]:
# Update this path to your video file
VIDEO_PATH = '../data/raw/sample_match.mp4'

def analyze_video(video_path):
    """Analyze video properties."""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f'Cannot open: {video_path}')
        return None
    
    props = {
        'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
        'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
        'fps': cap.get(cv2.CAP_PROP_FPS),
        'total_frames': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
        'codec': int(cap.get(cv2.CAP_PROP_FOURCC)),
    }
    props['duration_sec'] = props['total_frames'] / props['fps']
    cap.release()
    
    for k, v in props.items():
        print(f'{k}: {v}')
    return props

if os.path.exists(VIDEO_PATH):
    props = analyze_video(VIDEO_PATH)
else:
    print(f'Video not found at {VIDEO_PATH}')
    print('Please place a tennis/pickleball video in data/raw/')

## 2. Sample Frames Visualization

In [None]:
def show_sample_frames(video_path, num_samples=8):
    """Display evenly-spaced sample frames from video."""
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    indices = np.linspace(0, total - 1, num_samples, dtype=int)
    
    fig, axes = plt.subplots(2, num_samples // 2, figsize=(20, 8))
    axes = axes.flatten()
    
    for i, idx in enumerate(indices):
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            axes[i].imshow(rgb)
            axes[i].set_title(f'Frame {idx}')
            axes[i].axis('off')
    
    cap.release()
    plt.tight_layout()
    plt.show()

if os.path.exists(VIDEO_PATH):
    show_sample_frames(VIDEO_PATH)

## 3. Frame Extraction & Preprocessing

In [None]:
from src.pipeline import DataPreprocessor

preprocessor = DataPreprocessor()

# Extract frames (every frame)
# preprocessor.extract_frames(VIDEO_PATH, '../data/frames/', frame_interval=1)

# Resize for TrackNet (640x360)
# preprocessor.resize_frames('../data/frames/', '../data/frames/tracknet/', (640, 360))

# Resize for YOLO (640x640)
# preprocessor.resize_frames('../data/frames/', '../data/frames/yolo/', (640, 640))

print('Data preprocessing utilities ready.')
print('Uncomment the lines above to extract and resize frames.')

## 4. Data Augmentation Preview

In [None]:
try:
    import albumentations as A
    
    # Define augmentation pipeline
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.3),
        A.HueSaturationValue(p=0.3),
        A.GaussNoise(p=0.2),
        A.Blur(blur_limit=3, p=0.2),
    ])
    
    # Show augmented samples
    sample_img = np.random.randint(0, 255, (360, 640, 3), dtype=np.uint8)
    
    fig, axes = plt.subplots(2, 4, figsize=(16, 6))
    axes[0, 0].imshow(sample_img)
    axes[0, 0].set_title('Original')
    
    for i in range(1, 8):
        augmented = transform(image=sample_img)['image']
        ax = axes[i // 4, i % 4]
        ax.imshow(augmented)
        ax.set_title(f'Aug {i}')
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()
    print('Albumentations augmentation pipeline ready.')
    
except ImportError:
    print('Install albumentations: pip install albumentations')