##Install & Import

In [None]:
!pip install mediapipe opencv-python torch torchvision matplotlib scikit-learn

In [None]:
import cv2
import mediapipe as mp
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import os
from google.colab import drive
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Mount Google Drive
drive.mount('/content/gdrive')

##Definitions

In [None]:
# Define the dataset path
data_dir = "/content/gdrive/MyDrive/E-RAU(DB)/MA680/data/Shooting/Extra Data/op_Processed_Frames"
model_save_path = "/content/gdrive/MyDrive/E-RAU(DB)/MA680/models/shooting_form_classifier_SFC_1.pth"

In [None]:
class ShootingFormDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []

        # Iterate over "Error Detected" and "No Error Detected" folders
        for label, folder in enumerate(["Error Detected", "No Error Detected"]):
            folder_path = os.path.join(data_dir, folder)
            for file in os.listdir(folder_path):
                if file.endswith(('.jpg', '.png')):
                    self.images.append(os.path.join(folder_path, file))
                    self.labels.append(label)  # Labels are 0 or 1

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        label = self.labels[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.float32)  # Ensure label is a float tensor

##Data & Preprocessing

In [None]:
# Define augmentation for the minority class
augmentation = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Upsample the minority class
minority_class_path = os.path.join(data_dir, "No Error Detected")
minority_images = [f for f in os.listdir(minority_class_path) if f.endswith(('.jpg', '.png'))]

# Apply augmentation to create more samples
upsampled_images = []
upsampled_labels = []
for image_file in minority_images:
    image_path = os.path.join(minority_class_path, image_file)
    image = cv2.imread(image_path)
    for _ in range(6):  # Upsample by a factor of 6 (360 / 60)
        augmented_image = augmentation(image)
        upsampled_images.append(augmented_image)
        upsampled_labels.append(1)  # Assuming "No Error Detected" is label 1

In [None]:
class AdvancedShootingFormClassifier(nn.Module):
    def __init__(self):
        super(AdvancedShootingFormClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.AdaptiveAvgPool2d((8, 8))
        self.fc1 = nn.Linear(128 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

        # Store the activations and gradients
        self.gradients = None
        self.activations = None

    def activations_hook(self, grad):
        self.gradients = grad

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))

        # Register hook to capture gradients
        if x.requires_grad:
            h = x.register_hook(self.activations_hook)
        self.activations = x  # Store activations

        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

    def get_activations_gradient(self):
        return self.gradients

    def get_activations(self):
        return self.activations

In [None]:

# Add upsampled images to the dataset
dataset = ShootingFormDataset(data_dir, transform=augmentation)
dataset.images.extend([image_path] * 6 for image_path in minority_images)
dataset.labels.extend(upsampled_labels)

# Transforms
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


In [None]:
# Dataset and DataLoader
dataset = ShootingFormDataset(data_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model, loss function, and optimizer
model = AdvancedShootingFormClassifier().to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([6.0]).to(device))  # Use pos_weight for class imbalance
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
def grad_cam(model, image, target_class=0, threshold=0.5):
    """
    Generate a Grad-CAM heatmap for the given image and target class.
    Args:
        model: The trained model.
        image: Input image tensor.
        target_class: The class index for which to generate the heatmap.
        threshold: Minimum intensity for the heatmap (default: 0.5).
    """
    # Ensure the input tensor requires gradients
    image = image.unsqueeze(0).to(device)
    image.requires_grad_(True)

    # Forward pass
    output = model(image)
    output[:, target_class].backward()  # Backpropagate for the target class

    # Get the gradients and activations
    gradients = model.get_activations_gradient()
    activations = model.get_activations()

    # Pool the gradients across the channels
    pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])

    # Weight the activations by the pooled gradients
    for i in range(activations.shape[1]):
        activations[:, i, :, :] *= pooled_gradients[i]

    # Average the weighted activations across the channels
    heatmap = torch.mean(activations, dim=1).squeeze().cpu()

    # Apply ReLU to the heatmap
    heatmap = torch.relu(heatmap)

    # Normalize the heatmap to the range [0, 1]
    heatmap -= heatmap.min()
    heatmap /= heatmap.max()

    # Apply a threshold to remove low-intensity regions
    heatmap[heatmap < threshold] = 0

    return heatmap.detach().numpy()

In [None]:
def visualize_grad_cam(model, dataloader, num_images=5, threshold=0.5):
    """
    Visualize Grad-CAM heatmaps for a few images.
    Args:
        model: The trained model.
        dataloader: DataLoader for the dataset.
        num_images: Number of images to visualize.
        threshold: Minimum intensity for the heatmap (default: 0.5).
    """
    model.eval()
    for i, (images, labels) in enumerate(dataloader):
        if i >= num_images:  # Limit the number of images to visualize
            break

        images, labels = images.to(device), labels.to(device)

        # Generate Grad-CAM heatmap
        heatmap = grad_cam(model, images[0], target_class=int(labels[0].item()), threshold=threshold)

        # Convert image to numpy for visualization
        image = images[0].cpu().numpy()
        image = np.transpose(image, (1, 2, 0))  # Convert from (C, H, W) to (H, W, C)
        image = (image * 0.5) + 0.5  # Undo normalization

        # Resize heatmap to match the image size
        heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))

        # Apply a more vibrant colormap (e.g., JET)
        heatmap = np.uint8(255 * heatmap)
        heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)

        # Overlay heatmap on the image with increased opacity
        superimposed_img = heatmap * 0.6 + image * 255 * 0.4  # Adjust opacity (0.6 for heatmap, 0.4 for image)
        superimposed_img = np.clip(superimposed_img, 0, 255).astype(np.uint8)

        # Display the results
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.imshow(image)
        plt.title(f"True: {'No Error Detected' if labels[0].item() == 1 else 'Error Detected'}")
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.imshow(superimposed_img)
        plt.title("Grad-CAM Heatmap")
        plt.axis('off')

        plt.show()

##Training

In [None]:
# Training Loop
epochs = 3
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device).float().unsqueeze(1)  # Add unsqueeze to make labels [batch_size, 1]

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update metrics
        running_loss += loss.item()
        predictions = (torch.sigmoid(outputs) > 0.5).float()  # Apply sigmoid to get probabilities
        correct_predictions += (predictions == labels).sum().item()
        total_predictions += labels.size(0)

    # Calculate epoch metrics
    epoch_loss = running_loss / len(dataloader)
    epoch_accuracy = (correct_predictions / total_predictions) * 100
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")


In [None]:
# Save the model
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

In [None]:
# Load the trained model
model = AdvancedShootingFormClassifier().to(device)
model.load_state_dict(torch.load(model_save_path, map_location=device))
model.eval()

# Create a DataLoader for visualization
visualization_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Visualize Grad-CAM heatmaps with increased intensity
visualize_grad_cam(model, visualization_dataloader, num_images=5, threshold=0.5)

##Application

In [None]:
import os
import cv2
import torch
from torchvision import transforms
import matplotlib.pyplot as plt

# Define paths
data_dir = "/content/gdrive/MyDrive/E-RAU(DB)/MA680/data/Shooting/op_Processed_Frames"
output_dir = "/content/gdrive/MyDrive/E-RAU(DB)/MA680/data/Shooting/Evaluated_Frames"
model_save_path = "/content/gdrive/MyDrive/E-RAU(DB)/MA680/models/shooting_form_classifier_SFC_1.pth"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Define transforms
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load the trained model
model = AdvancedShootingFormClassifier().to(device)
model.load_state_dict(torch.load(model_save_path, map_location=device))
model.eval()

# Function to evaluate frames
def evaluate_frames(model, data_dir, output_dir, transform):
    """
    Evaluate each frame in the directory and save the results.
    Args:
        model: The trained model.
        data_dir: Directory containing the frames.
        output_dir: Directory to save the evaluated frames.
        transform: Transforms to apply to each frame.
    """
    for folder in ["Error Detected", "No Error Detected"]:
        folder_path = os.path.join(data_dir, folder)
        for file_name in os.listdir(folder_path):
            if file_name.endswith(('.jpg', '.png')):
                # Load the image
                image_path = os.path.join(folder_path, file_name)
                image = cv2.imread(image_path)
                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Preprocess the image
                image_tensor = transform(image_rgb).unsqueeze(0).to(device)

                # Get the model's prediction
                with torch.no_grad():
                    output = model(image_tensor)
                    prediction = (torch.sigmoid(output) > 0.5).float().item()

                # Add the prediction label to the image
                label = "Error Detected" if prediction == 1 else "No Error Detected"
                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(image, label, (10, 30), font, 1, (0, 255, 0) if prediction == 0 else (0, 0, 255), 2)

                # Save the evaluated image
                output_path = os.path.join(output_dir, file_name)
                cv2.imwrite(output_path, image)
                print(f"Processed and saved: {output_path}")

# Evaluate the frames
evaluate_frames(model, data_dir, output_dir, transform)

##Evaluation

In [None]:
# Load the trained model
model = AdvancedShootingFormClassifier().to(device)
model.load_state_dict(torch.load(model_save_path, map_location=device))
model.eval()

# Create a DataLoader for visualization
visualization_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Visualize Grad-CAM heatmaps
visualize_grad_cam(model, visualization_dataloader, num_images=5)