In [12]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt


In [None]:
# download the data
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

# Download and load the training data
trainset = datasets.MNIST('/Users/log/Github/VisionML/data', download=True, train=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.MNIST('/Users/log/Github/VisionML/data', download=True, train=False, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=True)

In [13]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2)
        self.fc1 = nn.Linear(7*7*64, 1024)
        self.fc2 = nn.Linear(1024, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 7*7*64)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

model = CNN()


In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [23]:
epochs = 1
for epoch in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")


# Save the model weights
model_path = 'mnist_cnn_model.pth'
torch.save(model.state_dict(), model_path)


Epoch 1, Loss: 0.010158389571474828


In [21]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in testloader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on the test set: {100 * correct / total}%')


Accuracy on the test set: 98.95%


In [22]:
import os
import struct
import numpy as np
from PIL import Image

def read_mnist_images(filename, num_images):
    with open(filename, 'rb') as f:
        # Skip the magic number and dimensions information
        f.read(16)
        # Read the image data
        image_data = f.read(num_images * 28 * 28)
        # Convert to numpy array and reshape
        images = np.frombuffer(image_data, dtype=np.uint8).reshape(num_images, 28, 28)
        return images

def save_images(images, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for i, image in enumerate(images):
        img = Image.fromarray(image, mode='L')
        img.save(os.path.join(output_dir, f'image_{i+1}.png'))

# Paths to your MNIST binary files
test_images_file = '/Users/log/Github/VisionML/data/MNIST/raw/t10k-images-idx3-ubyte'

# Read and save 10 test images
test_images = read_mnist_images(test_images_file, 10)
save_images(test_images, 'mnist_png_test_images')

In [36]:
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image
import os

# Define a transform to convert the images to the format the model expects
transform = transforms.Compose([
    transforms.Grayscale(),            # Convert to grayscale if your images are RGB
    transforms.Resize((28, 28)),       # Resize to the same size as MNIST images
    transforms.ToTensor(),             # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Apply the same normalization as training
])

# Assume you've arranged the 10 images in a directory with a single class subfolder
dataset = ImageFolder(root='mnist_png_test_images', transform=transform)
loader = DataLoader(dataset, batch_size=1, shuffle=False)

# Load your trained model
model = CNN()
model.load_state_dict(torch.load('mnist_cnn_model.pth'))
model.eval()

# Ensure model is in evaluation mode
model.eval()

# Disable gradient calculation for efficiency and to avoid unwanted side effects during inference
# with torch.no_grad():
#     for i, (images, _) in enumerate(loader):
#         outputs = model(images)
#         _, predicted = torch.max(outputs, 1)
#         print(f'Image {i+1}: Predicted Digit - {predicted.item()}')

# If you don't have an ImageFolder structure and just have a list of images
# Here is how you could load and preprocess each image individually
for i in range(11):
    # print(i)
    img_path = f'mnist_png_test_images/0/image_{i+1}.png'  # Update this format based on your saved files
    img = Image.open(img_path)
    img = transform(img)
    img = img.unsqueeze(0)  # Add batch dimension

    # Make prediction
    output = model(img)
    _, predicted = torch.max(output, 1)
    print(f'Image {i+1}: Predicted Digit - {predicted.item()}')
# /Users/log/Github/VisionML/mnist_png_test_images/0/image_1.png


FileNotFoundError: [Errno 2] No such file or directory: 'mnist_png_test_images/image_1.png'