In [1]:
import sys
import os

# Import necessary components
from train import BoxingDataset, LABEL2ID, ID2LABEL
from transformers import AutoImageProcessor

# Test dataset loading
print("=" * 60)
print("Testing BoxingDataset with Olympic Boxing dataset...")
print("=" * 60)

DATASET_DIR = "Olympic Boxing Punch Classification Video Dataset"
model_name = "MCG-NJU/videomae-base"

print("\n1. Loading image processor...")
image_processor = AutoImageProcessor.from_pretrained(model_name)
print("✓ Image processor loaded successfully")

print("\n2. Creating train dataset...")
train_dataset = BoxingDataset(
    dataset_dir=DATASET_DIR,
    split="train",
    image_processor=image_processor,
)
print(f"✓ Train dataset created with {len(train_dataset)} samples")

 

  from .autonotebook import tqdm as notebook_tqdm


Testing BoxingDataset with Olympic Boxing dataset...

1. Loading image processor...


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


✓ Image processor loaded successfully

2. Creating train dataset...
Loaded 3223 samples for train split
Label distribution for train: {'RHMP': 322, 'RHHP': 583, 'RHBP': 121, 'RHBlP': 112, 'LHHP': 1155, 'LHBlP': 273, 'LHMP': 528, 'LHBP': 129}
✓ Train dataset created with 3223 samples


In [None]:
print("\n3. Creating val dataset...")
val_dataset = BoxingDataset(
    dataset_dir=DATASET_DIR,
    split="val",
    image_processor=image_processor,
)
print(f"✓ Val dataset created with {len(val_dataset)} samples")

print("\n4. Creating test dataset...")
test_dataset = BoxingDataset(
    dataset_dir=DATASET_DIR,
    split="test",
    image_processor=image_processor,
)
print(f"✓ Test dataset created with {len(test_dataset)} samples")



3. Creating val dataset...
Loaded 690 samples for val split
Label distribution for val: {'LHMP': 100, 'RHHP': 135, 'LHHP': 259, 'RHBP': 22, 'RHBlP': 31, 'LHBlP': 49, 'RHMP': 61, 'LHBP': 33}
✓ Val dataset created with 690 samples

4. Creating test dataset...
Loaded 692 samples for test split
Label distribution for test: {'LHHP': 266, 'LHMP': 106, 'RHBlP': 35, 'LHBP': 31, 'RHMP': 65, 'RHBP': 36, 'RHHP': 115, 'LHBlP': 38}
✓ Test dataset created with 692 samples

5. Testing data loading (first sample from train)...


In [None]:

print("\n5. Testing data loading (first sample from train)...")
sample = train_dataset[0]
print(f"✓ Sample loaded successfully")
print(f"  - pixel_values shape: {sample['pixel_values'].shape}")
print(f"  - label ID: {sample['labels']}")
print(f"  - label name: {ID2LABEL[sample['labels']]}")

print("\n" + "=" * 60)
print("ALL TESTS PASSED! ✓")
print("=" * 60)

