In [14]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import os


In [15]:
import torch.nn.functional as F

class SingleDigitMNISTNet(torch.nn.Module):

    def __init__(self):
        super(SingleDigitMNISTNet, self).__init__()
        # input MNIST images for nof_digits digit-image: 1 x nof_digitsx28 x nof_digitsx28
        self.numChannels1 = 8
        self.numChannels2 = 32
        
        self.conv1 = torch.nn.Conv2d(1, self.numChannels1, 5, padding=2, bias=False) # <- out: 8 x 28 x 28  # <- max-pooling out: 8 x 14 x 14
        self.conv1_batchnorm = torch.nn.BatchNorm2d(num_features = self.numChannels1)
        
        # use normal initialization for conv1:
        torch.nn.init.normal_(self.conv1.weight)
        torch.nn.init.constant_(self.conv1_batchnorm.weight, 0.5)
        torch.nn.init.zeros_(self.conv1_batchnorm.bias)
        
        self.conv2 = torch.nn.Conv2d(self.numChannels1, self.numChannels2, 3, padding=1, bias=False) #<- out: 32 x 14 x 14
        self.conv2_batchnorm = torch.nn.BatchNorm2d(num_features = self.numChannels2)

         # use normal initialization for conv2:
        torch.nn.init.normal_(self.conv2.weight)
        torch.nn.init.constant_(self.conv2_batchnorm.weight, 0.5)
        torch.nn.init.zeros_(self.conv2_batchnorm.bias)

        nof_classes = 11 # figures 0...9 and "not recognized"
        self.fc1 = torch.nn.Linear(self.numChannels2 *7 * 7, 256)
        self.fc2 = torch.nn.Linear(256, 11)
        
    
    def forward(self, x):
        x = self.conv1_batchnorm(self.conv1(x))
        x = F.max_pool2d(F.relu(x), (2,2))
        x = self.conv2_batchnorm(self.conv2(x))
        x = F.max_pool2d(F.relu(x), (2,2))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.log_softmax(x, dim=1) # use log_softmax() (i.e. log(softmax()) ) to use NLLLoss() as loss-function
        return x

In [22]:
# check the network-definition:

pseudo_img_batch = torch.rand(2,1,28,28)
print(f"pseudo_img_batch.shape: {pseudo_img_batch.shape}")

tmodel = SingleDigitMNISTNet()
toutput = tmodel(pseudo_img_batch)
print(f"model output shape: {toutput.shape}")
print(f"sum is equal (zero,zero)? : {toutput.sum(dim=1)}")


pseudo_img_batch.shape: torch.Size([2, 1, 28, 28])
model output shape: torch.Size([2, 11])
sum is equal (zero,zero)? : tensor([-26.4055, -26.3921], grad_fn=<SumBackward1>)


In [46]:
import datetime

def training(epochs, train_loader, model, loss_fn, optimizer, device, show_progress= False, L2_regularization=False, L1_regularization=False, L2_lambda=0.001, L1_lambda=0.001):
    l2_norm = 0
    l1_norm = 0
    model.train()
    for epoch in range(1, epochs + 1):
        loss_train = 0.0
        for imgs, y in train_loader:
            imgs = imgs.to(device)
            y = y.to(device)
            
            yp = model(imgs)
            loss = loss_fn(yp, y)
            
            if(L2_regularization):
                l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())
                loss = loss + L2_lambda * l2_norm
            
            if(L1_regularization):
                l1_norm = sum(p.abs().sum() for p in model.parameters())
                loss = loss + L1_lambda * l1_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if(epoch == 1 or epoch%10 == 0):
            print(F"len train loader: {len(train_loader)}")
            print(f"{datetime.datetime.now()} Epoch {epoch} Training loss {loss_train/ len(train_loader)}")
            if(show_progress): # prints out some weights to see if anything happens at all:
                print(model.conv1.weight[0][0:10])


def validate(model, train_loader, val_loader, loss_fn):
    model.eval()
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        equals = 0
        nof_y = 0
        for imgs, y  in loader:
            with torch.no_grad():
                yp = model(imgs)
                y_class = torch.argmax(yp, dim=1)
                #print(f"y.shape: {y.shape}")
                #print(f"y_class.shape: {y_class.shape}")
                equals += torch.eq(y_class, y).sum()
                nof_y += len(y)
        
        print(f"Accuracy {name}: {equals/nof_y}")


In [45]:
for imgs, y in test_dl:
    print(len(y))
    print(y)
    break

64
tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2,
        4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3])


In [25]:
# training:

from torch.utils.data import DataLoader

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"Using device {device}")

# load datasets and create dataloader for BATCH_SIZE

BATCH_SIZE = 64

training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

train_dl = DataLoader(training_data, batch_size=BATCH_SIZE)

test_data = datasets.MNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

test_dl = DataLoader(test_data, batch_size=BATCH_SIZE)

# start with a new model each time:
model = None
model = SingleDigitMNISTNet().to(device=device)

# optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-5) # this optimizer is divergent most of the time leading to nan-size loss!
optimizer = torch.optim.Adam(model.parameters())

loss_fn = torch.nn.NLLLoss() # our model outputs log_softmax(), i.e. we can use NLLLoss() here

training(
    epochs = 30,
    train_loader = train_dl,
    model = model,
    loss_fn = loss_fn,
    optimizer = optimizer,
    device = device,
    show_progress = False,
    L2_regularization = True,
    L1_regularization = True,
    L1_lambda=0.01    
)

print("Training finished")


Using device cpu
len train loader: 938
2022-01-27 15:55:51.818228 Epoch 1 Training loss 16.19423073339564
len train loader: 938
2022-01-27 16:08:54.460777 Epoch 10 Training loss 2.8628625328352713
len train loader: 938
2022-01-27 16:22:20.027360 Epoch 20 Training loss 2.872350185156377
len train loader: 938
2022-01-27 16:34:59.643368 Epoch 30 Training loss 2.887605108686093
Training finished


In [29]:
# save, load the model via it's state-dict:

import os

#MODE="load"
MODE="save"

MODEL_PATH = os.path.join(os.getcwd(), "simplesaved_model.pt")

if( MODE=="save"):
    torch.save(model.state_dict(), MODEL_PATH)

if ( MODE == "load"):
    #Load with this code:
    model = SingleDigitMNISTNet(*args, **kwargs)
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()

print(f"Finished {MODE}ing the model to {MODEL_PATH}.")

In [None]:
# save, load the model using TorchScript:
# Using the TorchScript format, you will be able to load the exported model and run inference without defining the model class.
# https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-model-across-devices

import os

MODEL_PATH = os.path.join(os.getcwd(), "simplesaved_model.pt")

# chose load or save:
#MODE="load"
MODE="save"

if( MODE=="save"):
    model_scripted = torch.jit.script(model) # Export to TorchScript
    model_scripted.save(MODEL_PATH) # Save

if ( MODE == "load"):
    model = torch.jit.load(MODEL_PATH)
    model.eval() # call to prepare for inference - i.e. non-training

print(f"Finished {MODE}ing the model to {MODEL_PATH}.")

In [38]:
validate(
    model = model,
    train_loader = train_dl,
    val_loader = test_dl, 
    loss_fn = loss_fn
)

print("Validation finished.")

Accuracy train: 7.187633037567139
Accuracy val: 7.229299545288086
Validation finished.
