In [None]:
%pip install opencv-python yt-dlp

In [None]:
%pip install matplotlib notebook


In [None]:
%pip install yt-dlp[web]

In [3]:
import cv2
import os
import glob

# Set output folder
output_dir = "dataset/forehand"
os.makedirs(output_dir, exist_ok=True)

# Get the highest existing index
existing_files = glob.glob(os.path.join(output_dir, "frame_*.jpg"))
existing_indices = [
    int(os.path.basename(f).split("_")[1].split(".")[0]) for f in existing_files
]
start_index = max(existing_indices) + 1 if existing_indices else 0

# Load video
cap = cv2.VideoCapture("murray_forehand.mp4")
frame_rate = 5
count = 0
saved = start_index

while True:
    ret, frame = cap.read()
    if not ret:
        break
    if count % frame_rate == 0:
        resized = cv2.resize(frame, (224, 224))
        filename = os.path.join(output_dir, f"frame_{saved:04d}.jpg")
        cv2.imwrite(filename, resized)
        saved += 1
    count += 1

cap.release()


In [None]:
%pip install torch torchvision

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report, confusion_matrix

# -----------------------------
# Parameters
# -----------------------------
BATCH_SIZE = 16
NUM_EPOCHS = 15
DATA_DIR = "dataset"

# -----------------------------
# Transforms
# -----------------------------
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.25, scale=(0.02, 0.2), ratio=(0.3, 3.3), value='random')
])



val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

# -----------------------------
# Dataset + Split
# -----------------------------
dataset = datasets.ImageFolder(DATA_DIR, transform=train_transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
val_dataset.dataset.transform = val_transform

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# -----------------------------
# Model
# -----------------------------
model = models.mobilenet_v2(pretrained=True)
for param in model.features.parameters():
    param.requires_grad = False  # freeze early layers
model.classifier[1] = nn.Linear(model.last_channel, 2)  # 2 classes: forehand, backhand
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# -----------------------------
# Training Loop
# -----------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.3)


for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()

    train_acc = correct / len(train_dataset)
    print(f"Epoch {epoch+1} - Loss: {running_loss:.4f}, Train Accuracy: {train_acc:.2f}")
    scheduler.step()


# -----------------------------
# Evaluation
# -----------------------------
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        outputs = model(images)
        preds = outputs.argmax(1).cpu()
        all_preds.extend(preds)
        all_labels.extend(labels)

print("Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=dataset.classes))


Epoch 1 - Loss: 155.4069, Train Accuracy: 0.91
Epoch 2 - Loss: 66.3244, Train Accuracy: 0.97
Epoch 3 - Loss: 44.6731, Train Accuracy: 0.98
Epoch 4 - Loss: 41.9368, Train Accuracy: 0.98
Epoch 5 - Loss: 33.0983, Train Accuracy: 0.98
Epoch 6 - Loss: 30.0996, Train Accuracy: 0.98
Epoch 7 - Loss: 28.2520, Train Accuracy: 0.99
Epoch 8 - Loss: 28.9821, Train Accuracy: 0.98
Epoch 9 - Loss: 27.8807, Train Accuracy: 0.98
Epoch 10 - Loss: 26.3793, Train Accuracy: 0.99
Epoch 11 - Loss: 25.8007, Train Accuracy: 0.98
Epoch 12 - Loss: 27.1603, Train Accuracy: 0.98
Epoch 13 - Loss: 26.4708, Train Accuracy: 0.98
Epoch 14 - Loss: 26.7911, Train Accuracy: 0.98
Epoch 15 - Loss: 25.5768, Train Accuracy: 0.98
Confusion Matrix:
[[1017   21]
 [   0  947]]

Classification Report:
              precision    recall  f1-score   support

    backhand       1.00      0.98      0.99      1038
    forehand       0.98      1.00      0.99       947

    accuracy                           0.99      1985
   macro avg    

In [6]:
torch.save(model.state_dict(), "forehand_backhand_model3.pt")

In [10]:
import torch
from PIL import Image
from torchvision import transforms, models

# Load your trained model
model = models.mobilenet_v2(weights=None)
model.classifier[1] = torch.nn.Linear(model.last_channel, 2)
model.load_state_dict(torch.load("forehand_backhand_model3.pt"))
model.eval()

# Set to CPU or CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define the same normalization as during training
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

# Load your test image
image_path = "murray_backhand.png"  # replace with your image path
image = Image.open(image_path).convert("RGB")
input_tensor = transform(image).unsqueeze(0).to(device)  # shape: [1, 3, 224, 224]

# Inference
with torch.no_grad():
    output = model(input_tensor)
    pred = output.argmax(1).item()

# Map label
class_names = ["backhand", "forehand"]  # assumes alphabetical folder order
print(f"Prediction: {class_names[pred]}")


Prediction: backhand
