In [15]:
import gradio as gr
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
from PIL import Image

import gradio as gr

In [16]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
        # Fully connected layers
        self.fc1 = nn.Linear(21632, 128)  # Calculate input size after flattening
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        
        x = F.max_pool2d(x, 2)
        x = F.relu(self.fc1(x.view(-1, 21632)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [17]:
# Initialize the model
model = CNN()

# Check the available device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the state dictionary with appropriate mapping
model.load_state_dict(torch.load('modelCNN.pt', map_location=device))

# Move the model to the device
model.to(device)

model.eval()

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=21632, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [18]:
# Define preprocessing steps
def preprocess(image):
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    image = transform(image)
    image = image.unsqueeze(0)  # Add batch dimension
    return image.to(device)


In [32]:
# Define prediction function
def classify(image_dict):
    # Extract the image from the 'layers' key
    image = image_dict['layers'][0]  # Extract the first image from the layers list

    # Ensure the image is in RGBA format and convert it to grayscale
    if image.mode != 'L':
        image = image.convert('L')
    
    # Preprocess the image
    image = preprocess(image)
    # Forward pass through the model
    with torch.no_grad():
        output = model(image)
    # Get the predicted class
    pred = output.argmax(dim=1, keepdim=True).item()
    return pred


In [34]:

im = gr.ImageEditor(
            type="pil",
            crop_size="1:1",
            layers=False,
        )
# im = gr.Sketchpad(
#      type="pil",
#      crop_size="1:1",
#      layers=False,
# )
interface = gr.Interface(fn=classify, 
                         inputs = [im], 
                         outputs = [gr.Number(label="Digit Prediction")],
                         title = 'Digit Classifier')
interface.launch(debug=True,)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


<PIL.Image.Image image mode=RGBA size=600x600 at 0x1239B6130>
<class 'dict'>
Keyboard interruption in main thread... closing server.


