In [None]:
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from keras.utils import to_categorical
from torchvision.transforms import transforms, Compose, RandomRotation, RandomAffine, ToPILImage, RandomHorizontalFlip, RandomVerticalFlip
from torch.utils.data import TensorDataset, DataLoader, random_split, ConcatDataset, Dataset
from sklearn.model_selection import train_test_split
!pip install wandb
import wandb
from wandb.keras import WandbCallback
from torchvision.transforms.functional import to_pil_image, to_tensor

[0m

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# Dataset

In [None]:
batch_size = 50

In [None]:
(x, y), (test_X, test_y) = mnist.load_data()
train_X, val_X, train_y, val_y = train_test_split(x, y, test_size=0.16666666666666666667, random_state=42, stratify=y)

val_X = torch.tensor(val_X, dtype=torch.float32).unsqueeze(1)
val_y = torch.tensor(val_y, dtype=torch.long)
test_X = torch.tensor(test_X, dtype=torch.float32).unsqueeze(1)
test_y = torch.tensor(test_y, dtype=torch.long)

# changelog - augmentacia
train_transform = Compose([
    ToPILImage(),
    RandomRotation(degrees=(-20, 20)),
    RandomAffine(degrees=0, translate=(0.1, 0.1)),
])

# train_transform = Compose([
#     ToPILImage(),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomVerticalFlip()
# ])


train_X = [train_transform(x) for x in train_X]

train_X = torch.stack([transforms.ToTensor()(x) for x in train_X])
train_y = torch.tensor(train_y, dtype=torch.long)


train_dataset = TensorDataset(train_X, train_y)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(val_X, val_y)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(test_X, test_y)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

print('X_train: ' + str(train_X.shape))
print('Y_train: ' + str(train_y.shape))
print('X_val: ' + str(val_X.shape))
print('Y_val: ' + str(val_y.shape))
print('X_test:  '  + str(test_X.shape))
print('Y_test:  '  + str(test_y.shape))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
X_train: torch.Size([50000, 1, 28, 28])
Y_train: torch.Size([50000])
X_val: torch.Size([10000, 1, 28, 28])
Y_val: torch.Size([10000])
X_test:  torch.Size([10000, 1, 28, 28])
Y_test:  torch.Size([10000])


# Training

In [None]:
# changelog - hyperparametre, odstranovanie overfitu

num_epochs = 25
num_classes = 10
learning_rate = 0.00001
# learning_rate = 0.0001
regularizer_learning_rate = 0.0001

In [None]:
class Net(nn.Module):
    def __init__(self, num_classes, l2=regularizer_learning_rate):
        super(Net, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.conv_layer1.weight = nn.Parameter(self.conv_layer1.weight + l2 * torch.rand_like(self.conv_layer1.weight))
        self.max_pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv_layer2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.conv_layer2.weight = nn.Parameter(self.conv_layer2.weight + l2 * torch.rand_like(self.conv_layer2.weight))
        self.max_pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv_layer3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv_layer3.weight = nn.Parameter(self.conv_layer3.weight + l2 * torch.rand_like(self.conv_layer3.weight))
        self.max_pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.dropout = nn.Dropout(p=0.2)

        self.fc1 = nn.Linear(in_features=1152, out_features=512)
        self.fc1.weight = nn.Parameter(self.fc1.weight + l2 * torch.rand_like(self.fc1.weight))

        self.fc2 = nn.Linear(in_features=512, out_features=num_classes)
        self.fc2.weight = nn.Parameter(self.fc2.weight + l2 * torch.rand_like(self.fc2.weight))

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, out):
        out = self.conv_layer1(out)
        out = self.relu(out)
        out = self.max_pool1(out)

        out = self.conv_layer2(out)
        out = self.relu(out)
        out = self.max_pool2(out)

        out = self.conv_layer3(out)
        out = self.relu(out)
        out = self.max_pool3(out)

        out = self.dropout(out)
        out = out.view(out.size(0), -1)

        out = self.fc1(out)
        out = self.relu(out)

        out = self.fc2(out)
        out = self.softmax(out)
        return out


In [None]:
model = Net(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
total_step = len(train_loader) 

In [None]:
wandb.init(project="Nsiete_zadanie1_pytorch")

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
wandb.config.batch_size = batch_size
wandb.config.epochs = num_epochs
wandb.config.optimizer_learning_rate = learning_rate
wandb.config.regularizer_learning_rate = regularizer_learning_rate

In [None]:
for epoch in range(num_epochs):
  
    train_loss = 0
    train_correct = 0
    train_total = 0
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    val_loss = 0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
    train_acc = 100*train_correct/train_total
    val_acc = 100*val_correct/val_total

    print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.2f}%, Val Loss: {:.4f}, Val Acc: {:.2f}%'
          .format(epoch+1, num_epochs, train_loss/(i+1), train_acc, val_loss/len(val_loader), val_acc))

    wandb.log({"epoch": epoch+1, "train_loss": train_loss/(i+1), "train_acc": train_acc, "val_loss": val_loss/len(val_loader), "val_acc": val_acc})
            
            
torch.save(model.state_dict(), "/mnist_classification_model.pt")
wandb.save("/mnist_classification_model.pt")

Epoch [1/25], Train Loss: 2.1871, Train Acc: 30.26%, Val Loss: 1.8848, Val Acc: 57.61%
Epoch [2/25], Train Loss: 1.9447, Train Acc: 54.89%, Val Loss: 1.7849, Val Acc: 67.61%
Epoch [3/25], Train Loss: 1.8775, Train Acc: 60.39%, Val Loss: 1.7677, Val Acc: 69.34%
Epoch [4/25], Train Loss: 1.8265, Train Acc: 65.44%, Val Loss: 1.7008, Val Acc: 76.02%
Epoch [5/25], Train Loss: 1.7771, Train Acc: 70.43%, Val Loss: 1.6691, Val Acc: 79.19%
Epoch [6/25], Train Loss: 1.7439, Train Acc: 73.53%, Val Loss: 1.6443, Val Acc: 81.68%
Epoch [7/25], Train Loss: 1.6986, Train Acc: 78.48%, Val Loss: 1.5801, Val Acc: 88.11%
Epoch [8/25], Train Loss: 1.6576, Train Acc: 82.64%, Val Loss: 1.5643, Val Acc: 89.68%
Epoch [9/25], Train Loss: 1.6341, Train Acc: 84.56%, Val Loss: 1.5475, Val Acc: 91.35%
Epoch [10/25], Train Loss: 1.6166, Train Acc: 86.18%, Val Loss: 1.5400, Val Acc: 92.12%
Epoch [11/25], Train Loss: 1.6044, Train Acc: 87.22%, Val Loss: 1.5309, Val Acc: 93.02%
Epoch [12/25], Train Loss: 1.5949, Train 



Epoch [25/25], Train Loss: 1.5390, Train Acc: 92.93%, Val Loss: 1.5039, Val Acc: 95.72%


['/kaggle/working/wandb/run-20230328_202617-2r8dpp1g/files/mnist_classification_model.pt']

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    total_loss = 0
    
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        loss = criterion(outputs, labels)
        total_loss += loss.item() * labels.size(0)
    
    print('Test accuracy: {:.2f} %'.format(100 * correct / total))
    print('Average loss: {:.4f}'.format(total_loss / total))

Test accuracy: 96.21 %
Average loss: 1.4990
