<a href="https://colab.research.google.com/github/RaincallerMei/PyTorch-Deep-Learning/blob/main/Claude%20MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Define the CNN architecture
class MNISTClassifier(nn.Module):
    def __init__(self):
        super(MNISTClassifier, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        # Second convolutional layer
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(32 * 7 * 7, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc(x)
        return x

# Training function
def train_model(model, train_loader, test_loader, num_epochs=5, device="cuda"):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Zero the gradient buffers
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if (i + 1) % 100 == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], '
                      f'Loss: {running_loss/100:.4f}, '
                      f'Accuracy: {100 * correct/total:.2f}%')
                running_loss = 0.0

        # Validation phase
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Validation Accuracy: {100 * correct/total:.2f}%')

# Main execution
def main():
    # Check if CUDA is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Data preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))  # MNIST mean and std
    ])

    # Load MNIST dataset
    train_dataset = torchvision.datasets.MNIST(root='./data',
                                             train=True,
                                             transform=transform,
                                             download=True)

    test_dataset = torchvision.datasets.MNIST(root='./data',
                                            train=False,
                                            transform=transform,
                                            download=True)

    # Create data loaders
    train_loader = DataLoader(dataset=train_dataset,
                            batch_size=64,
                            shuffle=True)

    test_loader = DataLoader(dataset=test_dataset,
                           batch_size=64,
                           shuffle=False)

    # Initialize the model
    model = MNISTClassifier()

    # Train the model
    train_model(model, train_loader, test_loader, num_epochs=5, device=device)

    # Save the model
    torch.save(model.state_dict(), 'mnist_cnn.pth')

if __name__ == '__main__':
    main()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 53.7MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.78MB/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 13.8MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 7.54MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






Epoch [1/5], Step [100/938], Loss: 0.8465, Accuracy: 72.47%
Epoch [1/5], Step [200/938], Loss: 0.2944, Accuracy: 81.96%
Epoch [1/5], Step [300/938], Loss: 0.2077, Accuracy: 85.92%
Epoch [1/5], Step [400/938], Loss: 0.1518, Accuracy: 88.33%
Epoch [1/5], Step [500/938], Loss: 0.1399, Accuracy: 89.83%
Epoch [1/5], Step [600/938], Loss: 0.1401, Accuracy: 90.89%
Epoch [1/5], Step [700/938], Loss: 0.1157, Accuracy: 91.70%
Epoch [1/5], Step [800/938], Loss: 0.1171, Accuracy: 92.29%
Epoch [1/5], Step [900/938], Loss: 0.1087, Accuracy: 92.80%
Validation Accuracy: 98.49%
Epoch [2/5], Step [100/938], Loss: 0.0920, Accuracy: 97.41%
Epoch [2/5], Step [200/938], Loss: 0.0798, Accuracy: 97.48%
Epoch [2/5], Step [300/938], Loss: 0.0888, Accuracy: 97.38%
Epoch [2/5], Step [400/938], Loss: 0.0923, Accuracy: 97.38%
Epoch [2/5], Step [500/938], Loss: 0.0834, Accuracy: 97.40%
Epoch [2/5], Step [600/938], Loss: 0.0839, Accuracy: 97.41%
Epoch [2/5], Step [700/938], Loss: 0.0810, Accuracy: 97.45%
Epoch [2/5],