In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Load Libraries

In [None]:
import torch
import gradio as gr
from torchvision import transforms
from PIL import Image
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

# Load Dataset (Train, Validation, Test)

In [None]:


data_dir = '/content/drive/MyDrive/Deep Learning with pytorch/rps_dataset/Rock-Paper-Scissors'

transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor()
])

# Load datasets
train_data = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=transform)
val_data = datasets.ImageFolder(os.path.join(data_dir, 'validation'), transform=transform)
test_data = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)


This section loads the Rock-Paper-Scissors image dataset from Google Drive and prepares it for training, validation, and testing. It resizes all images to 150x150 pixels and converts them to tensors using `transforms`. The `ImageFolder` function organizes the data based on folder structure (train, validation, test), and `DataLoader` is used to load the data in batches of 32. Shuffling is applied to the training data to improve model generalization.


# Define the CNN Model

In [None]:


class RPS_CNN(nn.Module):
    def __init__(self):
        super(RPS_CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 37 * 37, 128)
        self.fc2 = nn.Linear(128, 3)  # 3 classes: rock, paper, scissors

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 150x150 -> 75x75
        x = self.pool(F.relu(self.conv2(x)))  # 75x75 -> 37x37
        x = x.view(-1, 64 * 37 * 37)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


This code defines a Convolutional Neural Network (CNN) named `RPS_CNN` for classifying images into rock, paper, or scissors. It has two convolutional layers with ReLU activation and max pooling to reduce image size. After feature extraction, the image is flattened and passed through two fully connected layers. The final layer outputs predictions for the three classes.


# Training the Model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = RPS_CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss:.4f}, Accuracy: {acc:.2f}%")


Epoch [1/10], Loss: 43.3669, Accuracy: 80.16%
Epoch [2/10], Loss: 2.4939, Accuracy: 99.29%
Epoch [3/10], Loss: 0.4586, Accuracy: 100.00%
Epoch [4/10], Loss: 0.1593, Accuracy: 100.00%
Epoch [5/10], Loss: 0.0749, Accuracy: 100.00%
Epoch [6/10], Loss: 0.0418, Accuracy: 100.00%
Epoch [7/10], Loss: 0.0282, Accuracy: 100.00%
Epoch [8/10], Loss: 0.0217, Accuracy: 100.00%
Epoch [9/10], Loss: 0.0172, Accuracy: 100.00%
Epoch [10/10], Loss: 0.0142, Accuracy: 100.00%


This code trains the CNN model on the Rock-Paper-Scissors dataset. It uses the Adam optimizer and CrossEntropyLoss. The model runs for 10 epochs, and in each epoch, it processes all training images in batches, calculates loss and gradients, updates model weights, and tracks the accuracy. After each epoch, it prints the loss and training accuracy. The model is trained on GPU if available.


# Evaluate on Validation Set

In [None]:
model.eval()
val_correct = 0
val_total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        val_correct += predicted.eq(labels).sum().item()
        val_total += labels.size(0)

val_acc = 100 * val_correct / val_total
print(f"Validation Accuracy: {val_acc:.2f}%")


Validation Accuracy: 81.82%


This code evaluates the trained model on the validation set. It sets the model to evaluation mode (model.eval()), disables gradient calculations (with torch.no_grad()), and then predicts labels for the validation images. It compares the predictions with actual labels to compute the total correct predictions and calculates the validation accuracy as a percentage.

# Test Set Evaluation

In [None]:
test_correct = 0
test_total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        test_correct += predicted.eq(labels).sum().item()
        test_total += labels.size(0)

test_acc = 100 * test_correct / test_total
print(f"Test Accuracy: {test_acc:.2f}%")


Test Accuracy: 75.81%


This code evaluates the model on the test dataset. It disables gradient calculation for efficiency (`with torch.no_grad()`), feeds the test images to the model, and compares predicted labels with the true labels. The total correct predictions are tracked and used to calculate the test accuracy as a percentage, which is then printed.


# Hyperparameter tuning

In [None]:
def train_model(model, train_loader, optimizer, criterion, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

        acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss:.4f}, Accuracy: {acc:.2f}%")


In [None]:
for lr in [0.001, 0.0005, 0.0001]:
    print(f"\nTraining with learning rate = {lr}")
    model = RPS_CNN().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    train_model(model, train_loader, optimizer, criterion, num_epochs=5)



Training with learning rate = 0.001
Epoch [1/5], Loss: 54.0314, Accuracy: 76.15%
Epoch [2/5], Loss: 3.0983, Accuracy: 99.21%
Epoch [3/5], Loss: 0.5237, Accuracy: 99.96%
Epoch [4/5], Loss: 0.1770, Accuracy: 100.00%
Epoch [5/5], Loss: 0.0714, Accuracy: 100.00%

Training with learning rate = 0.0005
Epoch [1/5], Loss: 77.1582, Accuracy: 58.29%
Epoch [2/5], Loss: 18.7126, Accuracy: 94.25%
Epoch [3/5], Loss: 2.2562, Accuracy: 99.60%
Epoch [4/5], Loss: 0.5203, Accuracy: 99.92%
Epoch [5/5], Loss: 0.2205, Accuracy: 100.00%

Training with learning rate = 0.0001
Epoch [1/5], Loss: 65.9425, Accuracy: 66.59%
Epoch [2/5], Loss: 23.9922, Accuracy: 92.82%
Epoch [3/5], Loss: 7.3174, Accuracy: 99.05%
Epoch [4/5], Loss: 3.1539, Accuracy: 99.48%
Epoch [5/5], Loss: 1.6272, Accuracy: 99.88%


This section tests different learning rates (0.001, 0.0005, 0.0001) to improve model accuracy. A `train_model()` function was created to train the CNN using the specified optimizer and learning rate. For each value, a new model is trained for 5 epochs, and performance is printed. This helps identify which learning rate leads to faster convergence and higher accuracy. Among the tested values, **0.001 performed best**, reaching 100% accuracy in fewer epochs.


# Gardio

In [None]:
# Load model (make sure it's in eval mode)
model.eval()

# Define preprocessing
transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor()
])

# Define class names
class_names = ['paper', 'rock', 'scissors']


# Prediction function
def predict(image):
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
    return class_names[predicted.item()]


This code sets up a simple Gradio web app to predict whether an uploaded image is showing rock, paper, or scissors using the trained CNN model. The input image is resized and transformed into a tensor, then passed to the model for prediction. The predicted class (e.g., paper, rock, or scissors) is returned and displayed to the user.

In [None]:
gr.Interface(fn=predict,
             inputs=gr.Image(type="pil"),
             outputs="text",
             title="Rock Paper Scissors Classifier").launch(debug=True)


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://239e77bcaefd380bc3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://239e77bcaefd380bc3.gradio.live




This line creates and launches the Gradio interface. It takes an image input (in PIL format), passes it to the predict function, and returns the predicted label as text. The web app is titled "Rock Paper Scissors Classifier" and will open in a new tab when launched.