In [None]:
from google.colab import files
files.upload()

!pip install kaggle

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download krishnwin/fitness-xclip


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/krishnwin/fitness-xclip
License(s): unknown
Downloading fitness-xclip.zip to /content
  0% 0.00/70.5M [00:00<?, ?B/s]
100% 70.5M/70.5M [00:00<00:00, 1.41GB/s]


In [None]:
!unzip fitness-xclip.zip

Archive:  fitness-xclip.zip
  inflating: data/videos/bench_press_1.mp4  
  inflating: data/videos/bench_press_10.mp4  
  inflating: data/videos/bench_press_11.mp4  
  inflating: data/videos/bench_press_12.mp4  
  inflating: data/videos/bench_press_13.mp4  
  inflating: data/videos/bench_press_14.mp4  
  inflating: data/videos/bench_press_17.mp4  
  inflating: data/videos/bench_press_18.mp4  
  inflating: data/videos/bench_press_2.mp4  
  inflating: data/videos/bench_press_3.mp4  
  inflating: data/videos/bench_press_5.mp4  
  inflating: data/videos/bench_press_7.mp4  
  inflating: data/videos/bench_press_8.mp4  
  inflating: data/videos/chest_fly_machine_10.mp4  
  inflating: data/videos/chest_fly_machine_11.mp4  
  inflating: data/videos/chest_fly_machine_12.mp4  
  inflating: data/videos/chest_fly_machine_13.mp4  
  inflating: data/videos/chest_fly_machine_2.mp4  
  inflating: data/videos/chest_fly_machine_20.mp4  
  inflating: data/videos/chest_fly_machine_21.mp4  
  inflating: data

In [None]:

import os
import pandas as pd
import numpy as np

labels_path = "/content/mylabels.csv"
videos_dir = "/content/data/videos"

df = pd.read_csv(labels_path)

if "filename" not in df.columns:
    all_files = os.listdir(videos_dir)
    filenames = []
    for _, row in df.iterrows():
        matches = [f for f in all_files if f.startswith(row['name']) and f.endswith('.mp4')]
        if matches:
            filenames.append(matches[0])
        else:
            print(f"No file found for class {row['name']}")
            filenames.append(None)
    df["filename"] = filenames


df = df[df["filename"].notna()]

df["filename"] = df["filename"].astype(str)

df.to_csv("/content/labels_fixed_new.csv", index=False)
print(df.head(), "\nTotal videos:",len(df))



   id               name                                           filename
0   0        bench_press  bench_press_1.mp4,bench_press_2.mp4,bench_pres...
1   1  chest_fly_machine  chest_fly_machine_2.mp4,chest_fly_machine_3.mp...
2   2           deadlift  deadlift_1.mp4,deadlift_2.mp4,deadlift_3.mp4,d...
3   3      lateral_raise  lateral_raise_7.mp4,lateral_raise_8.mp4,latera...
4   4            push-up  push-up_1.mp4,push-up_2.mp4,push-up_3.mp4,push... 
Total videos: 5


In [None]:
import pandas as pd

# Load your original CSV
df = pd.read_csv("mylabels.csv")

# Create a new expanded dataframe
rows = []
for _, row in df.iterrows():
    filenames = [f.strip() for f in row['filename'].split(',') if f.strip()]
    for f in filenames:
        rows.append({'id': row['id'], 'name': row['name'], 'filename': f})

df_expanded = pd.DataFrame(rows)

# Save it back
df_expanded.to_csv("labels_expanded.csv", index=False)
print(df_expanded.head(), len(df_expanded))


   id         name           filename
0   0  bench_press  bench_press_1.mp4
1   0  bench_press  bench_press_2.mp4
2   0  bench_press  bench_press_3.mp4
3   0  bench_press  bench_press_5.mp4
4   0  bench_press  bench_press_7.mp4 84


In [None]:
!pip install torch torchvision pytorchvideo

import os
import random
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.models.video import r3d_18
from torchvision.io import read_video
from torchvision.transforms import Compose, Resize, RandomCrop, RandomHorizontalFlip, ColorJitter, Lambda


VIDEO_DIR = "/content/data/videos"
LABELS_FILE = "/content/labels_expanded.csv"
CLIP_LEN = 16
BATCH_SIZE = 4
EPOCHS = 10
LR = 1e-4


df = pd.read_csv(LABELS_FILE)
unique_videos = df["filename"].unique()
train_videos, val_videos = train_test_split(unique_videos, test_size=0.2, random_state=42)

train_df = df[df["filename"].isin(train_videos)].reset_index(drop=True)
val_df = df[df["filename"].isin(val_videos)].reset_index(drop=True)

classes = sorted(df["name"].unique())
num_classes = len(classes)
print("Detected classes:", classes)


class RandomTemporalClip:
    """Randomly select a contiguous clip of frames from the video."""
    def __init__(self, clip_len):
        self.clip_len = clip_len

    def __call__(self, video):

        total_frames = video.shape[1]
        if total_frames <= self.clip_len:
            return video
        start = random.randint(0, total_frames - self.clip_len)
        return video[:, start:start + self.clip_len, :, :]

class VideoTransform:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, video):
        frames = []
        for t in range(video.shape[1]):
            frame = video[:, t, :, :]
            frame = self.transform(frame)
            frames.append(frame)
        return torch.stack(frames, dim=1)

transform_train_frames = Compose([
    Lambda(lambda x: x / 255.0),
    Resize((128, 171)),
    RandomCrop((112, 112)),
    RandomHorizontalFlip(),
    ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3)
])

transform_val_frames = Compose([
    Lambda(lambda x: x / 255.0),
    Resize((112, 112))
])


transform_train = Compose([
    RandomTemporalClip(CLIP_LEN),
    VideoTransform(transform_train_frames)
])
transform_val = Compose([
    RandomTemporalClip(CLIP_LEN),
    VideoTransform(transform_val_frames)
])


class VideoDataset(Dataset):
    def __init__(self, df, video_dir, transform=None, clip_len=16):
        self.df = df
        self.video_dir = video_dir
        self.transform = transform
        self.clip_len = clip_len
        self.classes = sorted(df["name"].unique())
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        video_path = os.path.join(self.video_dir, row["filename"])
        label = self.class_to_idx[row["name"]]

        try:
            video, _, _ = read_video(video_path, pts_unit='sec')
        except Exception as e:
            print(f"Error reading {video_path}: {e}")
            return self[(idx + 1) % len(self.df)]

        if video.shape[0] == 0:
            print(f" Empty video {video_path}, skipping...")
            return self[(idx + 1) % len(self.df)]


        if video.shape[0] < self.clip_len:
            reps = (self.clip_len // video.shape[0]) + 1
            video = video.repeat((reps, 1, 1, 1))


        video = video.permute(3, 0, 1, 2)

        if self.transform:
            video = self.transform(video)

        return video.float(), label


train_dataset =VideoDataset(train_df, VIDEO_DIR, clip_len=CLIP_LEN, transform=transform_train)
val_dataset   =VideoDataset(val_df, VIDEO_DIR, clip_len=CLIP_LEN, transform=transform_val)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = r3d_18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

# Training
for epoch in range(EPOCHS):
    model.train()
    train_loss, correct, total = 0, 0, 0

    for videos, labels in train_loader:
        videos, labels = videos.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum().item()
        total += labels.size(0)

    train_acc = 100.0 * correct / total

    # Validation
    model.eval()
    val_loss, val_correct, val_total = 0, 0, 0
    with torch.no_grad():
        for videos, labels in val_loader:
            videos, labels = videos.to(device), labels.to(device)
            outputs = model(videos)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, preds = outputs.max(1)
            val_correct += preds.eq(labels).sum().item()
            val_total += labels.size(0)

    val_acc = 100.0 * val_correct /val_total

    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss/len(train_loader):.4f} |Train Acc: {train_acc:.2f}% "
          f"Val Loss: {val_loss/len(val_loader):.4f} | Val Acc: {val_acc:.2f}%")
# saving model
torch.save(model.state_dict(), "exercise_r3d18.pth")
print("Model saved as exercise_r3d18.pth")


Detected classes: ['bench_press', 'chest_fly_machine', 'deadlift', 'lateral_raise', 'push-up']




KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "exercise_r3d18.pth")
print("Model saved as exercise_r3d18.pth")

Model saved as exercise_r3d18.pth


In [None]:
model.load_state_dict(torch.load("exercise_r3d18.pth", map_location=device))
model.eval()

VideoResNet(
  (stem): BasicStem(
    (0): Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False)
    (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Sequential(
        (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
        (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
      (conv2): Sequential(
        (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
        (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (relu): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Sequential(
        (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
        (1):

In [None]:
import torch
from torchvision.io import read_video


transform = Compose([
    Lambda(lambda x: x / 255.0),
    Resize((112, 112))
])


def predict_video(video_path):
    video, _, _ = read_video(video_path, pts_unit='sec')

    if video.shape[0] < 16:
        reps = (16 // video.shape[0]) + 1
        video = video.repeat((reps, 1, 1, 1))
    video = video[:16]

    video = video.permute(3, 0, 1, 2)
    video = transform(video)
    video = video.unsqueeze(0).to(device).float()

    with torch.no_grad():
        outputs = model(video)
        _, predicted = outputs.max(1)
    return classes[predicted.item()]





In [None]:
video_path = "/content/4110677-sd_426_240_25fps.mp4"
print("Prediction:", predict_video(video_path))

Prediction: chest_fly_machine
