Refer https://www.bilibili.com/video/BV1D1421r7sj?spm_id_from=333.788.videopod.sections&vd_source=b47e02f9c38ec61f19d536cee73b2322

In [19]:
import torch
print(torch.cuda.is_available()) 
print(torch.cuda.device_count()) 
print(torch.cuda.get_device_name(0))

True
1
NVIDIA GeForce RTX 2060


In [4]:
!pip install torchvision




[notice] A new release of pip is available: 24.1.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

![This is an image](image/svhn.jpg)


In [11]:
transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.SVHN(root='./svhn', split='train', download=True, transform=transform)
loader = DataLoader(train_dataset, batch_size=500, shuffle=False)

mean = torch.zeros(3)
std = torch.zeros(3)

for images, _ in loader:
    for c in range(3):
        mean[c] += images[:, c, :, :].mean()
        std[c] += images[:, c, :, :].std()

mean /= len(loader)
std /= len(loader)

print(f'Mean: {mean}')
print(f'Std: {std}')

Using downloaded and verified file: ./svhn\train_32x32.mat
Mean: tensor([0.4378, 0.4438, 0.4729])
Std: tensor([0.1977, 0.2007, 0.1967])


In [12]:
transform = transforms.Compose([
    transforms.ToTensor(), #Converts images from PIL or NumPy arrays to PyTorch tensors and scales pixel values from [0,255] to [0,1]
    transforms.Normalize(mean=[0.4378, 0.4438, 0.4729],
                         std=[0.1977, 0.2007, 0.1967])  # Standardizes data using the provided mean and standard deviation
])

#  download and load the SVHN dataset
train_dataset = datasets.SVHN(root='./svhn', split='train', download=True, transform=transform)
test_dataset = datasets.SVHN(root='./svhn', split='test', download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


Using downloaded and verified file: ./svhn\train_32x32.mat
Using downloaded and verified file: ./svhn\test_32x32.mat


In [13]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}]\tLoss: {loss.item():.4f}')

In [14]:
def test(model, device, test_loader):
    model.eval()
    correct = 0
    test_loss = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)

    print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)')


<!-- ![Image from URL](image/alexnet.jpg) -->
<img src="image/alexnet.jpg" alt="vgg" width="700"/>

output_size = (input_size + 2 × padding - kernel_size) / stride + 1

The size of SVHN's image is 3 channels and 32 x 32 

In [15]:
class ModifiedAlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=5, padding=2),  
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),

            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Flatten(),

            nn.Linear(128 * 4 * 4, 256), 
            nn.ReLU(),

            nn.Linear(256, 128),
            nn.ReLU(),

            nn.Linear(128, 64),
            nn.ReLU(),

            nn.Linear(64, 10)
        )

    def forward(self, x):
        return self.model(x)


In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

model = ModifiedAlexNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Using device: cuda
Train Epoch: 1 [0/73257]	Loss: 2.3166
Train Epoch: 1 [12800/73257]	Loss: 2.1686
Train Epoch: 1 [25600/73257]	Loss: 1.3917
Train Epoch: 1 [38400/73257]	Loss: 1.1152
Train Epoch: 1 [51200/73257]	Loss: 0.7532
Train Epoch: 1 [64000/73257]	Loss: 0.6842
Test set: Average loss: 0.6119, Accuracy: 21336/26032 (81.96%)
Train Epoch: 2 [0/73257]	Loss: 0.4753
Train Epoch: 2 [12800/73257]	Loss: 0.3502
Train Epoch: 2 [25600/73257]	Loss: 0.6210
Train Epoch: 2 [38400/73257]	Loss: 0.3264
Train Epoch: 2 [51200/73257]	Loss: 0.4748
Train Epoch: 2 [64000/73257]	Loss: 0.3533
Test set: Average loss: 0.3695, Accuracy: 23190/26032 (89.08%)
Train Epoch: 3 [0/73257]	Loss: 0.3558
Train Epoch: 3 [12800/73257]	Loss: 0.2510
Train Epoch: 3 [25600/73257]	Loss: 0.4988
Train Epoch: 3 [38400/73257]	Loss: 0.4240
Train Epoch: 3 [51200/73257]	Loss: 0.2043
Train Epoch: 3 [64000/73257]	Loss: 0.2469
Test set: Average loss: 0.3264, Accuracy: 23511/26032 (90.32%)
Train Epoch: 4 [0/73257]	Loss: 0.1919
Train Epoc