In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.optim as optim
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader

import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from torchinfo import summary

from ActiveShiftLayer import ASL
from util import test_loss

In [2]:
print(torch.version.cuda)
#print(torch.zeros(1).cuda())
torch.cuda.device_count()

11.6


0

In [3]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
print(device)

cpu


In [18]:
batch_size = 100

#transform images into normalized tensors
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,))
])

train_dataset = datasets.MNIST(
    "./data/MNIST",
    download=True,
    train=True,
    transform=transform,
)

test_dataset = datasets.MNIST(
    "./data/MNIST",
    download=True,
    train=False,
    transform=transform,
)

train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=1,
    pin_memory=True,
)

test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=1,
    pin_memory=True,
)

In [5]:
from Models import MNIST_Net

p_drop = 0.05
input_shape = (batch_size, 1, 28, 28)
expansion_rate = 1

NN = MNIST_Net(input_shape, 10, expansion_rate, device)

summary(NN, input_shape)

Layer (type:depth-idx)                   Output Shape              Param #
MNIST_Net                                [100, 10]                 --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [100, 32, 28, 28]         64
│    └─CSC_block: 2-2                    [100, 32, 28, 28]         --
│    │    └─Sequential: 3-1              --                        2,304
│    └─CSC_block: 2-3                    [100, 32, 28, 28]         --
│    │    └─Sequential: 3-2              --                        2,304
│    └─AvgPool2d: 2-4                    [100, 32, 4, 4]           --
│    └─Flatten: 2-5                      [100, 512]                --
│    └─Linear: 2-6                       [100, 10]                 5,130
Total params: 9,802
Trainable params: 9,802
Non-trainable params: 0
Total mult-adds (M): 336.72
Input size (MB): 0.31
Forward/backward pass size (MB): 220.78
Params size (MB): 0.04
Estimated Total Size (MB): 221.14

In [6]:
criterion = CrossEntropyLoss()
optimizer = optim.SGD(NN.parameters(), lr=0.05, momentum=0.9)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [8]:
start = time.process_time_ns()
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = NN(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] train_loss: {running_loss / 100:.3f}')
            running_loss = 0.0
            print(test_loss(NN, test_dataloader, criterion, device))
    scheduler.step()
end = time.process_time_ns()

print(f"time: {end - start}")

[1,   100] train_loss: 0.715
(0.22816641511395575, 92.68)
[1,   200] train_loss: 0.192
(0.18493165225721897, 93.84)
[1,   300] train_loss: 0.165
(0.13996522705536335, 95.64)
[1,   400] train_loss: 0.147
(0.1323805426992476, 95.86)
[1,   500] train_loss: 0.113
(0.11726483502890915, 96.0)
[1,   600] train_loss: 0.107
(0.08020057712914422, 97.37)
[2,   100] train_loss: 0.103
(0.07316928614629432, 97.48)
[2,   200] train_loss: 0.093
(0.07609701319481246, 97.45)
[2,   300] train_loss: 0.100
(0.08536210779915564, 97.34)
[2,   400] train_loss: 0.096
(0.0773756203148514, 97.56)
[2,   500] train_loss: 0.085
(0.06352138399321121, 97.84)
[2,   600] train_loss: 0.090
(0.07186561815091409, 97.42)
time: 91484375000


In [10]:
etime = (end - start) * 1e-9
print(time)



91.484375


In [23]:
from Models import MNIST_conv_Net

p_drop = 0.05
input_shape = (batch_size, 1, 28, 28)

NN2 = MNIST_conv_Net(input_shape, 10)

summary(NN2, input_shape)

Layer (type:depth-idx)                   Output Shape              Param #
MNIST_conv_Net                           [100, 10]                 --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [100, 32, 28, 28]         64
│    └─BatchNorm2d: 2-2                  [100, 32, 28, 28]         64
│    └─ReLU: 2-3                         [100, 32, 28, 28]         --
│    └─Conv2d: 2-4                       [100, 32, 28, 28]         9,248
│    └─BatchNorm2d: 2-5                  [100, 32, 28, 28]         64
│    └─ReLU: 2-6                         [100, 32, 28, 28]         --
│    └─Conv2d: 2-7                       [100, 32, 28, 28]         25,632
│    └─BatchNorm2d: 2-8                  [100, 32, 28, 28]         64
│    └─ReLU: 2-9                         [100, 32, 28, 28]         --
│    └─AvgPool2d: 2-10                   [100, 32, 4, 4]           --
│    └─Flatten: 2-11                     [100, 512]                --
│    └─L

In [24]:
criterion = CrossEntropyLoss()
optimizer = optim.SGD(NN2.parameters(), lr=0.05, momentum=0.9)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [25]:
start = time.process_time_ns()
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = NN2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] train_loss: {running_loss / 100:.3f}')
            running_loss = 0.0
            print(test_loss(NN2, test_dataloader, criterion, device))
    scheduler.step()

end = time.process_time_ns()

ex_time = (end - start) * 1e-9
print(time)


[1,   100] train_loss: 0.461
(0.121927930675447, 96.43)
[1,   200] train_loss: 0.131
(0.08365229741670191, 97.49)
[1,   300] train_loss: 0.107
(0.06615080372663215, 97.9)
[1,   400] train_loss: 0.083
(0.06283143160515464, 98.08)
[1,   500] train_loss: 0.081
(0.06338795224088244, 97.96)
[1,   600] train_loss: 0.074
(0.06811735047958792, 97.62)
[2,   100] train_loss: 0.065
(0.056236699686851355, 98.18)
[2,   200] train_loss: 0.056
(0.046598938982351686, 98.49)
[2,   300] train_loss: 0.058
(0.04778041346522514, 98.42)
[2,   400] train_loss: 0.060
(0.0473111592719215, 98.38)
[2,   500] train_loss: 0.058
(0.049529263663862365, 98.46)
[2,   600] train_loss: 0.057
(0.050539112215628845, 98.24)
<module 'time' (built-in)>


In [26]:
print(ex_time)

74.25


In [28]:
from Models import MNIST_ownconv_Net

p_drop = 0.05
input_shape = (batch_size, 1, 28, 28)

NN7 = MNIST_conv_Net(input_shape, 10)

summary(NN7, input_shape)

ImportError: cannot import name 'MNIST_ownconv_Net' from 'Models' (c:\Users\Max Heise\Documents\Uni\Veranstaltungen\AML\Abschlussprojekt\ASL\Models.py)

In [None]:
criterion = CrossEntropyLoss()
optimizer = optim.SGD(NN7.parameters(), lr=0.05, momentum=0.9)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [None]:
start = time.process_time_ns()
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = NN7(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] train_loss: {running_loss / 100:.3f}')
            running_loss = 0.0
            print(test_loss(NN7, test_dataloader, criterion, device))
    scheduler.step()

end = time.process_time_ns()

ex_time = (end - start) * 1e-9
print(time)


In [9]:
from Models import MNIST_Net2

expansion_rate = 3
input_shape = (batch_size, 1, 28, 28)

NN3 = MNIST_Net2(input_shape, 10)

summary(NN3, input_shape)

Layer (type:depth-idx)                   Output Shape              Param #
MNIST_Net2                               [100, 10]                 --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [100, 32, 28, 28]         64
│    └─CSC_block: 2-2                    [100, 32, 28, 28]         --
│    │    └─Sequential: 3-1              --                        6,720
│    └─CSC_block: 2-3                    [100, 32, 28, 28]         --
│    │    └─Sequential: 3-2              --                        6,720
│    └─CSC_block: 2-4                    [100, 32, 28, 28]         --
│    │    └─Sequential: 3-3              --                        6,720
│    └─AvgPool2d: 2-5                    [100, 32, 4, 4]           --
│    └─Flatten: 2-6                      [100, 512]                --
│    └─Linear: 2-7                       [100, 10]                 5,130
Total params: 25,354
Trainable params: 25,354
Non-trainable params: 0
Tot

In [7]:
criterion = CrossEntropyLoss()
optimizer = optim.SGD(NN3.parameters(), lr=0.05, momentum=0.9)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [8]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = NN3(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] train_loss: {running_loss / 100:.3f}')
            running_loss = 0.0
            print(test_loss(NN3, test_dataloader, criterion, device))
    scheduler.step()

[1,   100] train_loss: 0.509
(0.23171740879304706, 93.05)
[1,   200] train_loss: 0.176
(0.11580315442988649, 96.14)
[1,   300] train_loss: 0.130
(0.11617182110901922, 96.27)
[1,   400] train_loss: 0.132
(0.09387944596121088, 96.96)
[1,   500] train_loss: 0.134
(0.09338684955437201, 96.9)
[1,   600] train_loss: 0.102
(0.06715060683432966, 97.68)
[2,   100] train_loss: 0.091
(0.07073068731755484, 97.68)
[2,   200] train_loss: 0.087
(0.08192401613050607, 97.34)
[2,   300] train_loss: 0.084
(0.07734284515725448, 97.61)
[2,   400] train_loss: 0.082
(0.0925876001745928, 97.02)
[2,   500] train_loss: 0.088
(0.09988075721426867, 96.62)
[2,   600] train_loss: 0.080
(0.06303482159040869, 97.96)


In [10]:
expansion_rate = 1
input_shape = (batch_size, 1, 28, 28)

NN4 = MNIST_Net2(input_shape, 10, expansion_rate, device)

summary(NN4, input_shape)

Layer (type:depth-idx)                   Output Shape              Param #
MNIST_Net2                               [100, 10]                 --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [100, 32, 28, 28]         64
│    └─CSC_block: 2-2                    [100, 32, 28, 28]         --
│    │    └─Sequential: 3-1              --                        2,304
│    └─CSC_block: 2-3                    [100, 32, 28, 28]         --
│    │    └─Sequential: 3-2              --                        2,304
│    └─CSC_block: 2-4                    [100, 32, 28, 28]         --
│    │    └─Sequential: 3-3              --                        2,304
│    └─AvgPool2d: 2-5                    [100, 32, 4, 4]           --
│    └─Flatten: 2-6                      [100, 512]                --
│    └─Linear: 2-7                       [100, 10]                 5,130
Total params: 12,106
Trainable params: 12,106
Non-trainable params: 0
Tot

In [11]:
criterion = CrossEntropyLoss()
optimizer = optim.SGD(NN4.parameters(), lr=0.05, momentum=0.9)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [12]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = NN4(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] train_loss: {running_loss / 100:.3f}')
            running_loss = 0.0
            print(test_loss(NN4, test_dataloader, criterion, device))
    scheduler.step()

[1,   100] train_loss: 0.569
(0.18625395480543375, 94.0)
[1,   200] train_loss: 0.168
(0.12762564175296576, 95.99)
[1,   300] train_loss: 0.138
(0.10866357750957832, 96.47)
[1,   400] train_loss: 0.125
(0.10119087643921375, 96.86)
[1,   500] train_loss: 0.101
(0.07436577038373798, 97.6)
[1,   600] train_loss: 0.094
(0.08836525014252401, 97.2)
[2,   100] train_loss: 0.098
(0.06228518116287887, 98.07)
[2,   200] train_loss: 0.083
(0.07463950763689355, 97.66)
[2,   300] train_loss: 0.078
(0.0749166457017418, 97.52)
[2,   400] train_loss: 0.082
(0.060831173137994486, 98.1)
[2,   500] train_loss: 0.072
(0.058170136823318896, 98.15)
[2,   600] train_loss: 0.070
(0.059499458849313666, 98.13)
