In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/CS_6998')

In [21]:
from functools import total_ordering
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import AutoModelForVideoClassification
from FitnessData import FitnessData
from tqdm.notebook import tqdm
# Load a pre-trained TimeSformer model from Hugging Face.
# Note: "facebook/timesformer-base-finetuned-k400" is pre-trained on Kinetics-400 (400 classes).
# model = TimeSformerForVideoClassification.from_pretrained("facebook/timesformer-base-finetuned-k400").to(device)
model = AutoModelForVideoClassification.from_pretrained("facebook/timesformer-base-finetuned-k400")
# Modify the classification head to output 22 classes (matching your dataset)
model.config.num_labels = 22
# Replace the classifier with a new linear layer.
model.classifier = nn.Linear(model.config.hidden_size, 22)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

def train_and_validate(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    best_acc = 0.0
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        # 使用 tqdm 包裹 train_loader，并添加描述信息
        loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]")
        for videos, labels in train_loader:
            # videos (B, C, T, H, W) = (B, 3, 16, 224, 224)
            # need to become (B, T, C, H, W) = (B, 16, 3, 224, 224)
            videos = videos.permute(0, 2, 1, 3, 4)
            videos, labels = videos.to(device), labels.to(device)
            optimizer.zero_grad()
            # TimeSformer expects inputs in shape (B, C, T, H, W)
            outputs = model(videos).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * labels.size(0)
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {loss.item():.4f} Acc: {100. * correct / total:.2f}%")
            # 更新 tqdm 的显示信息
            loop.set_postfix(loss=loss.item(), acc=100. * correct / total)
        train_acc = 100. * correct / total
        val_acc = validate(model, val_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {running_loss/total:.4f} Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "timesformer_best.pth")
            print(f"New Best Model Saved with Accuracy: {best_acc:.2f}%")
    print(f"Finished Training. Best Validation Accuracy: {best_acc:.2f}%")

def validate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for videos, labels in val_loader:
            videos = videos.permute(0, 2, 1, 3, 4)
            videos, labels = videos.to(device), labels.to(device)
            outputs = model(videos).logits
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    return 100. * correct / total

def evaluate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for videos, labels in val_loader:
            videos = videos.permute(0, 2, 1, 3, 4)
            videos, labels = videos.to(device), labels.to(device)
            outputs = model(videos).logits
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    val_acc = 100. * correct / total
    print(f"Val Acc: {val_acc:.2f}%")

In [22]:
# Create dataset and DataLoaders using the provided FitnessData class.
train_dataset = FitnessData(root_dir="/content/drive/MyDrive/CS_6998/workoutfitness-video", train=True, transform=None, frames_per_clip=16)
val_dataset = FitnessData(root_dir="/content/drive/MyDrive/CS_6998/workoutfitness-video", train=False, transform=None, frames_per_clip=16)

batch_size = 2
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Train and validate the model.
train_and_validate(model, train_loader, val_loader, criterion, optimizer, num_epochs=10)

# Load the best saved model for final testing.
model.load_state_dict(torch.load("timesformer_best.pth"))
print("Loaded Best Model for Final Testing")
final_acc = validate(model, val_loader)
print(f"Final Model Accuracy: {final_acc:.2f}%")

Epoch [1/10]:   0%|          | 0/218 [00:00<?, ?it/s]

Epoch [1/10] Loss: 3.3916 Acc: 0.00%
Epoch [1/10] Loss: 3.1693 Acc: 0.00%
Epoch [1/10] Loss: 3.6560 Acc: 0.00%
Epoch [1/10] Loss: 2.7209 Acc: 0.00%
Epoch [1/10] Loss: 3.8801 Acc: 0.00%
Epoch [1/10] Loss: 1.4196 Acc: 16.67%
Epoch [1/10] Loss: 3.3361 Acc: 21.43%
Epoch [1/10] Loss: 3.3071 Acc: 18.75%
Epoch [1/10] Loss: 3.1172 Acc: 16.67%
Epoch [1/10] Loss: 1.9021 Acc: 20.00%
Epoch [1/10] Loss: 3.1826 Acc: 18.18%
Epoch [1/10] Loss: 1.5950 Acc: 20.83%
Epoch [1/10] Loss: 4.6918 Acc: 19.23%
Epoch [1/10] Loss: 3.3071 Acc: 17.86%
Epoch [1/10] Loss: 2.0148 Acc: 16.67%
Epoch [1/10] Loss: 3.8444 Acc: 15.62%
Epoch [1/10] Loss: 3.1831 Acc: 14.71%
Epoch [1/10] Loss: 1.1819 Acc: 16.67%
Epoch [1/10] Loss: 1.9648 Acc: 18.42%
Epoch [1/10] Loss: 1.3095 Acc: 22.50%
Epoch [1/10] Loss: 3.0473 Acc: 21.43%
Epoch [1/10] Loss: 1.4718 Acc: 22.73%
Epoch [1/10] Loss: 2.2984 Acc: 21.74%
Epoch [1/10] Loss: 3.4071 Acc: 20.83%
Epoch [1/10] Loss: 2.4207 Acc: 20.00%
Epoch [1/10] Loss: 1.2731 Acc: 23.08%
Epoch [1/10] Loss

Epoch [2/10]:   0%|          | 0/218 [00:00<?, ?it/s]

Epoch [2/10] Loss: 0.3148 Acc: 100.00%
Epoch [2/10] Loss: 0.0791 Acc: 100.00%
Epoch [2/10] Loss: 0.3364 Acc: 100.00%
Epoch [2/10] Loss: 0.0158 Acc: 100.00%
Epoch [2/10] Loss: 0.2613 Acc: 100.00%
Epoch [2/10] Loss: 0.0214 Acc: 100.00%
Epoch [2/10] Loss: 1.4611 Acc: 92.86%
Epoch [2/10] Loss: 0.5906 Acc: 87.50%
Epoch [2/10] Loss: 0.9940 Acc: 83.33%
Epoch [2/10] Loss: 0.0910 Acc: 85.00%
Epoch [2/10] Loss: 0.0527 Acc: 86.36%
Epoch [2/10] Loss: 0.3774 Acc: 87.50%
Epoch [2/10] Loss: 0.0288 Acc: 88.46%
Epoch [2/10] Loss: 0.4397 Acc: 89.29%
Epoch [2/10] Loss: 0.1607 Acc: 90.00%
Epoch [2/10] Loss: 0.0775 Acc: 90.62%
Epoch [2/10] Loss: 0.3057 Acc: 91.18%
Epoch [2/10] Loss: 0.1655 Acc: 91.67%
Epoch [2/10] Loss: 0.0797 Acc: 92.11%
Epoch [2/10] Loss: 0.2798 Acc: 92.50%
Epoch [2/10] Loss: 0.2019 Acc: 92.86%
Epoch [2/10] Loss: 0.7116 Acc: 90.91%
Epoch [2/10] Loss: 0.0733 Acc: 91.30%
Epoch [2/10] Loss: 0.0147 Acc: 91.67%
Epoch [2/10] Loss: 0.2878 Acc: 92.00%
Epoch [2/10] Loss: 0.0569 Acc: 92.31%
Epoch 

Epoch [3/10]:   0%|          | 0/218 [00:00<?, ?it/s]

Epoch [3/10] Loss: 0.0249 Acc: 100.00%
Epoch [3/10] Loss: 0.0867 Acc: 100.00%
Epoch [3/10] Loss: 0.0491 Acc: 100.00%
Epoch [3/10] Loss: 0.0224 Acc: 100.00%
Epoch [3/10] Loss: 0.1863 Acc: 100.00%
Epoch [3/10] Loss: 0.0076 Acc: 100.00%
Epoch [3/10] Loss: 0.0975 Acc: 100.00%
Epoch [3/10] Loss: 0.0317 Acc: 100.00%
Epoch [3/10] Loss: 0.0166 Acc: 100.00%
Epoch [3/10] Loss: 0.0401 Acc: 100.00%
Epoch [3/10] Loss: 0.0229 Acc: 100.00%
Epoch [3/10] Loss: 0.0117 Acc: 100.00%
Epoch [3/10] Loss: 0.0161 Acc: 100.00%
Epoch [3/10] Loss: 0.0079 Acc: 100.00%
Epoch [3/10] Loss: 0.0114 Acc: 100.00%
Epoch [3/10] Loss: 0.0126 Acc: 100.00%
Epoch [3/10] Loss: 0.0172 Acc: 100.00%
Epoch [3/10] Loss: 0.0133 Acc: 100.00%
Epoch [3/10] Loss: 0.0137 Acc: 100.00%
Epoch [3/10] Loss: 0.0127 Acc: 100.00%
Epoch [3/10] Loss: 0.0103 Acc: 100.00%
Epoch [3/10] Loss: 0.0132 Acc: 100.00%
Epoch [3/10] Loss: 0.0122 Acc: 100.00%
Epoch [3/10] Loss: 0.0124 Acc: 100.00%
Epoch [3/10] Loss: 0.0822 Acc: 100.00%
Epoch [3/10] Loss: 0.0493

Epoch [4/10]:   0%|          | 0/218 [00:00<?, ?it/s]

Epoch [4/10] Loss: 0.0056 Acc: 100.00%
Epoch [4/10] Loss: 0.0040 Acc: 100.00%
Epoch [4/10] Loss: 0.0122 Acc: 100.00%
Epoch [4/10] Loss: 0.0051 Acc: 100.00%
Epoch [4/10] Loss: 0.0068 Acc: 100.00%
Epoch [4/10] Loss: 0.0079 Acc: 100.00%
Epoch [4/10] Loss: 0.0304 Acc: 100.00%
Epoch [4/10] Loss: 0.0132 Acc: 100.00%
Epoch [4/10] Loss: 0.0100 Acc: 100.00%
Epoch [4/10] Loss: 0.0020 Acc: 100.00%
Epoch [4/10] Loss: 0.0088 Acc: 100.00%
Epoch [4/10] Loss: 0.0095 Acc: 100.00%
Epoch [4/10] Loss: 0.0037 Acc: 100.00%
Epoch [4/10] Loss: 0.0014 Acc: 100.00%
Epoch [4/10] Loss: 0.0058 Acc: 100.00%


KeyboardInterrupt: 

In [None]:
import os
import shutil
import random
from pathlib import Path

def split_dataset(source_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    # Ensure ratios sum to 1
    assert train_ratio + val_ratio + test_ratio == 1, "Ratios must sum to 1"

    # Create train, val, test directories
    base_path = Path(source_dir).parent
    for split in ['train', 'val', 'test']:
        split_path = base_path / f'{source_dir}-{split}'
        if split_path.exists():
            shutil.rmtree(split_path)
        split_path.mkdir(exist_ok=True)

    # Get all class directories
    class_dirs = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d)) and not d.startswith('.')]

    for class_name in class_dirs:
        # Create class subdirectories in each split
        for split in ['train', 'val', 'test']:
            os.makedirs(os.path.join(f'{source_dir}-{split}', class_name), exist_ok=True)

        # Get all videos for this class
        videos = [v for v in os.listdir(os.path.join(source_dir, class_name))
                 if v.endswith(('.mp4', '.avi', '.mov'))]
        random.shuffle(videos)
        print(f"Class '{class_name}' has {len(videos)} videos.")
        # Ensure at least 1 sample per class in val and test sets
        n_videos = len(videos)
        if n_videos < 3:
            raise ValueError(f"Class '{class_name}' has less than 3 videos. At least 3 are needed for splitting!")

        # Assign at least one sample to val and test sets
        n_val = max(1, int(val_ratio * n_videos))
        n_test = max(1, int(test_ratio * n_videos))
        n_train = n_videos - n_val - n_test  # Remaining goes to train

        # Split videos
        train_videos = videos[:n_train]
        val_videos = videos[n_train:n_train + n_val]
        test_videos = videos[n_train + n_val:]

        # Copy videos to respective directories
        for video, split_dir in [
            (train_videos, f'{source_dir}-train'),
            (val_videos, f'{source_dir}-val'),
            (test_videos, f'{source_dir}-test')
        ]:
            for v in video:
                src = os.path.join(source_dir, class_name, v)
                dst = os.path.join(split_dir, class_name, v)
                shutil.copy2(src, dst)

        print(f"Class '{class_name}' split complete:")
        print(f"Train: {len(train_videos)}, Val: {len(val_videos)}, Test: {len(test_videos)}")

if __name__ == "__main__":
    split_dataset('/content/drive/MyDrive/CS_6998/workoutfitness-video')
    print("Dataset splitting complete!")

Class 'incline bench press' has 33 videos.
Class 'incline bench press' split complete:
Train: 25, Val: 4, Test: 4
Class 'leg raises' has 15 videos.
Class 'leg raises' split complete:
Train: 11, Val: 2, Test: 2
Class 'pull Up' has 26 videos.
Class 'pull Up' split complete:
Train: 20, Val: 3, Test: 3
Class 'romanian deadlift' has 12 videos.
Class 'romanian deadlift' split complete:
Train: 10, Val: 1, Test: 1
Class 'hip thrust' has 14 videos.
Class 'hip thrust' split complete:
Train: 10, Val: 2, Test: 2
Class 'plank' has 6 videos.
Class 'plank' split complete:
Train: 4, Val: 1, Test: 1
Class 'tricep dips' has 16 videos.
Class 'tricep dips' split complete:
Train: 12, Val: 2, Test: 2
Class 'chest fly machine' has 28 videos.
Class 'chest fly machine' split complete:
Train: 20, Val: 4, Test: 4
Class 'russian twist' has 12 videos.
Class 'russian twist' split complete:
Train: 10, Val: 1, Test: 1
Class 'hammer curl' has 12 videos.
Class 'hammer curl' split complete:
Train: 10, Val: 1, Test: 1
Cl

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("hasyimabdillah/workoutfitness-video")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/hasyimabdillah/workoutfitness-video?dataset_version_number=5...


100%|██████████| 4.32G/4.32G [00:59<00:00, 77.5MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/hasyimabdillah/workoutfitness-video/versions/5


In [None]:
import shutil
source_path = "/root/.cache/kagglehub/datasets/hasyimabdillah/workoutfitness-video/versions/5"
destination_path = "/content/drive/MyDrive/datasets/workoutfitness-video"
shutil.copytree(source_path, destination_path)
print("Copied dataset to:", destination_path)


Copied dataset to: /content/drive/MyDrive/datasets/workoutfitness-video
