In [25]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader
import cv2
from PIL import Image

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
num_classes = 1  # Binary classification
num_epochs = 10
batch_size = 8
learning_rate = 0.0001

# Data transformations with augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = datasets.ImageFolder(root='D:/yoga/train', transform=transform)
val_dataset = datasets.ImageFolder(root='D:/yoga/valid', transform=transform)
test_dataset = datasets.ImageFolder(root='D:/yoga/test', transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Load pre-trained model and modify it for binary classification
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1)  # Output a single value
model = model.to(device)

# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogitsLoss for binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device).float()  # Convert labels to float

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.view(-1), labels)  # Flatten the output for BCEWithLogitsLoss
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

# Validation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device).float()  # Convert labels to float
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()  # Apply sigmoid and threshold
        total += labels.size(0)
        correct += (predicted.view(-1) == labels).sum().item()

val_accuracy = correct / total
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')

# %%
# Testing loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float()  # Convert labels to float
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()  # Apply sigmoid and threshold
        total += labels.size(0)
        correct += (predicted.view(-1) == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')
# Save the model
torch.save(model.state_dict(), 'yoga_pose_detect_model.pth')



Epoch [1/10], Loss: 0.7116
Epoch [2/10], Loss: 0.6523
Epoch [3/10], Loss: 0.6085
Epoch [4/10], Loss: 0.5493
Epoch [5/10], Loss: 0.5429
Epoch [6/10], Loss: 0.4806
Epoch [7/10], Loss: 0.4596
Epoch [8/10], Loss: 0.4210
Epoch [9/10], Loss: 0.3816
Epoch [10/10], Loss: 0.3596
Validation Accuracy: 100.00%
Test Accuracy: 100.00%


In [27]:
# Real-time pose detection using webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Load the model for inference
model.load_state_dict(torch.load('yoga_pose_detect_model.pth'))
model.eval()

# Define the feedback function
def provide_feedback(prediction):
    if prediction == 1:
        return "Great job! Your pose is correct."
    else:
        return "Try to adjust your alignment."

while True:
    ret, frame = cap.read()
    if not ret:
        print('Failed to grab frame')
        break

    # Flip the frame horizontally to correct mirroring
    frame = cv2.flip(frame, 1)

    # Preprocess the frame
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(frame_rgb)
    input_tensor = transform(pil_image)
    input_batch = input_tensor.unsqueeze(0).to(device)  # Create a mini-batch as expected by the model

    with torch.no_grad():
        output = model(input_batch)
        probability = torch.sigmoid(output)  # Apply sigmoid to get probability
        predicted = (probability > 0.5).float()  # Apply threshold to get predicted class
    cv2.putText(frame, f'Predicted Pose: {"Correct" if predicted.item() == 1 else "Incorrect"}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    print(f'Raw output: {output.item()}, Probability: {probability.item()}, Predicted: {predicted.item()}')

    # Provide feedback based on prediction
    feedback = provide_feedback(predicted.item())
    
    # Display the feedback on the frame
    cv2.putText(frame, feedback, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the frame with feedback
    cv2.imshow('Yoga Pose Detection', frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()

Raw output: -0.022719833999872208, Probability: 0.4943203330039978, Predicted: 0.0
Raw output: 0.10267220437526703, Probability: 0.5256455540657043, Predicted: 1.0
Raw output: 0.005377314984798431, Probability: 0.5013443231582642, Predicted: 1.0
Raw output: -0.020284246653318405, Probability: 0.49492910504341125, Predicted: 0.0
Raw output: -0.11352646350860596, Probability: 0.47164881229400635, Predicted: 0.0
Raw output: -0.3651168644428253, Probability: 0.4097214639186859, Predicted: 0.0
Raw output: 0.12194888293743134, Probability: 0.5304495096206665, Predicted: 1.0
Raw output: -0.2177482396364212, Probability: 0.445777028799057, Predicted: 0.0
Raw output: 0.1750456690788269, Probability: 0.5436500310897827, Predicted: 1.0
Raw output: 0.22284038364887238, Probability: 0.5554807186126709, Predicted: 1.0
Raw output: 0.03410080447793007, Probability: 0.5085243582725525, Predicted: 1.0
Raw output: 0.07056418061256409, Probability: 0.5176337361335754, Predicted: 1.0
Raw output: 0.23698674