In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# ---------------- Device setup ----------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# ---------------- MNIST dataset ----------------
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

# ---------------- Define Neural Network ----------------
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = NeuralNet().to(device)

# ---------------- Loss and Optimizer ----------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ---------------- Training Loop ----------------
n_epochs = 5
print("Training started...")
for epoch in range(n_epochs):
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}")

# ---------------- Evaluation ----------------
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

# ---------------- Save the model ----------------
torch.save(model.state_dict(), "mnist_model.pth")
print("Model saved as mnist_model.pth")


Using device: cpu
Training started...
Epoch [1/5], Loss: 0.3844
Epoch [2/5], Loss: 0.1974
Epoch [3/5], Loss: 0.1389
Epoch [4/5], Loss: 0.1109
Epoch [5/5], Loss: 0.0929
Test Accuracy: 96.94%
Model saved as mnist_model.pth


In [8]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import numpy as np
from PIL import Image

# -------- Neural Net (Same as training model) --------
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# -------- Load Model --------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NeuralNet().to(device)
model.load_state_dict(torch.load("mnist_model.pth", map_location=device))
model.eval()

# -------- Image Preprocessing & Prediction --------
def preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError("Image not found or invalid format.")

    # Invert and threshold
    img = cv2.bitwise_not(img)
    _, img = cv2.threshold(img, 100, 255, cv2.THRESH_BINARY)

    # Find contour, assume largest is digit
    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        raise ValueError("No digit found in image.")

    cnt = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(cnt)
    digit = img[y:y+h, x:x+w]

    # Resize and pad to 28x28
    digit = cv2.resize(digit, (20, 20))
    padded = np.pad(digit, ((4, 4), (4, 4)), mode='constant', constant_values=0)

    # Convert to PIL and Tensor
    pil_img = Image.fromarray(padded)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    tensor = transform(pil_img).unsqueeze(0).to(device)
    return tensor

def predict_digit(image_path):
    input_tensor = preprocess_image(image_path)
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted = torch.max(output, 1)
    return predicted.item()

# -------- Run Prediction --------
if __name__ == "__main__":
    image_path = "digit.png"  # Replace with your image path
    try:
        digit = predict_digit(image_path)
        print(f"Predicted Digit: {digit}")
    except Exception as e:
        print("Error:", e)

Predicted Digit: 6
