<a href="https://colab.research.google.com/github/Projekt-R-DU/prva-faza/blob/master/Cnn/cnn_mnist512.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from numpy import floor
from torch import nn

class ConvolutionalModel(nn.Module):

    def __init__(self, in_channels, in_width, conv1_channels, pool1_width, conv2_channels, pool2_width, fc3_width, fc4_width, class_count):
        super(ConvolutionalModel, self).__init__()
                                                                                                                # in_channels x in_width x in_width
        self.conv1 = nn.Conv2d(in_channels, conv1_channels, kernel_size=5, stride=1, padding=2, bias=True)      # conv1_channels x in_width x in_width
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(pool1_width, stride=2)                                                        # conv1_channels x w2 x w2

        w2 = floor((in_width - pool1_width) / 2 + 1)

        self.conv2 = nn.Conv2d(conv1_channels, conv2_channels, kernel_size=5, stride=1, padding=2, bias=True)   # conv2_channels x w2 x w2
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(pool2_width, stride=2)                                                        # conv2_channels x w3 x w3
        
        w3 = floor((w2 - pool2_width) / 2 + 1)

        self.flatten3 = nn.Flatten()                                                                            # (conv2_channels x w3 x w3)
        self.fc3 = nn.Linear((int)(conv2_channels * w3 * w3), fc3_width)                                        # fc3width
        self.relu3 = nn.ReLU()

        self.fc4 = nn.Linear(fc3_width, fc4_width)                                                              # fc4width
        self.relu4 = nn.ReLU()                                                       

        self.fc_logits = nn.Linear(fc4_width, class_count)                                                      # class_count

        self.reset_parameters()

    def reset_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear) and m is not self.fc_logits:
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)
        self.fc_logits.reset_parameters()

    def forward(self, x):
        h = self.conv1(x)
        h = self.relu1(h)
        h = self.pool1(h)

        h = self.conv2(h)
        h = self.relu2(h)
        h = self.pool2(h)
        
        h = self.flatten3(h)
        h = self.fc3(h)
        h = self.relu3(h)

        h = self.fc4(h)
        h = self.relu4(h)

        logits = self.fc_logits(h)
        return logits

In [2]:
from IPython.core.display import display_markdown
import torch.nn.functional as F

class NegativeLogLikelihood(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(NegativeLogLikelihood, self).__init__()

    def softmax(self, x):
        max = torch.max(x, dim=1, keepdim=True)[0]
        x_exp_shifted = torch.exp(x - max)
        return x_exp_shifted / torch.sum(x_exp_shifted, dim=1, keepdim=True)

    def forward(self, x, y):
        y = torch.eye(x.shape[1])[y].cuda()
        probs = self.softmax(x)
        return -torch.mean(torch.log(torch.sum(probs * y, dim=1)))

class CenterLoss(nn.Module):
    def __init__(self, num_classes=10, feat_dim=90):
        super(CenterLoss, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim))

    def forward(self, x, labels):
        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long()
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))

        dist = distmat * mask.float()
        loss = dist.clamp(min=1e-12, max=1e+12).sum() / batch_size

        return loss

class CombinedLoss(nn.Module):
        def __init__(self, nll, center_loss, lambd):
            super(CombinedLoss, self).__init__()
            self.nll = nll
            self.center_loss = center_loss
            self.lambd = lambd

        def forward(self, x, y):
            return self.lambd * self.center_loss(x, torch.argmax(y, dim=1)) + self.nll(x, y)


In [3]:
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time
import torch.backends.cudnn as cudnn

#Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(28, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])

trainset = torchvision.datasets.MNIST(
    root='./data', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.MNIST(
    root='./data', train=False, download=True, transform=transform_test)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=512, shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

#hyperparameters
classes = ('0', '1', '2', '3', '4',
           '5', '6', '7', '8', '9')

#torch.save(net, 'cnn_mnist.pth')
#Model
print('==> Building model..')
best_acc = 0  
start_epoch = 0  
device = 'cuda'
net = ConvolutionalModel(1, 28, 16, 3, 32, 3, 256, 128, 10)
net = net.to(device)

net = torch.nn.DataParallel(net)

alpha = 1e-2
alpha_cent = 0.5
lambd = 0.01
nll = NegativeLogLikelihood()
center_loss = CenterLoss(num_classes=10, feat_dim=10)
combined_loss = CombinedLoss(nll, center_loss, lambd)
criterion = combined_loss
optimizer = optim.SGD(net.parameters(), lr=alpha,
                      momentum=0.9, weight_decay=1e-4)
cent_optimizer = optim.SGD(center_loss.parameters(), lr=alpha_cent)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
start_time = time.time()

#Training function
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))   
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        for param in center_loss.parameters():
            param.grad.data *= (1./lambd)

        cent_optimizer.step()
        cent_optimizer.zero_grad()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    print('Train -> Loss: %.3f | Acc: %.3f%%'
                  % (train_loss/(len(trainloader)), 100.*correct/total))
#Test function
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    print('Test -> Loss: %.3f | Acc: %.3f%%'
                  % (test_loss/(len(testloader)), 100.*correct/total))

for epoch in range(start_epoch, start_epoch+30):
      train(epoch)
      test(epoch)
      scheduler.step()

==> Preparing data..
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

==> Building model..

Epoch: 0
Time elapsed: 0.00 min
Train -> Loss: 0.962 | Acc: 67.010%
Test -> Loss: 0.275 | Acc: 90.750%

Epoch: 1
Time elapsed: 0.39 min
Train -> Loss: 0.331 | Acc: 89.135%
Test -> Loss: 0.244 | Acc: 91.860%

Epoch: 2
Time elapsed: 0.79 min
Train -> Loss: 0.242 | Acc: 92.098%
Test -> Loss: 0.170 | Acc: 94.250%

Epoch: 3
Time elapsed: 1.18 min
Train -> Loss: 0.194 | Acc: 93.845%
Test -> Loss: 0.124 | Acc: 96.130%

Epoch: 4
Time elapsed: 1.58 min
Train -> Loss: 0.163 | Acc: 94.790%
Test -> Loss: 0.128 | Acc: 95.660%

Epoch: 5
Time elapsed: 1.97 min
Train -> Loss: 0.139 | Acc: 95.600%
Test -> Loss: 0.082 | Acc: 97.130%

Epoch: 6
Time elapsed: 2.37 min
Train -> Loss: 0.126 | Acc: 95.927%
Test -> Loss: 0.091 | Acc: 96.890%

Epoch: 7
Time elapsed: 2.76 min
Train -> Loss: 0.116 | Acc: 96.353%
Test -> Loss: 0.088 | Acc: 97.140%

Epoch: 8
Time elapsed: 3.15 min
Train -> Loss: 0.106 | Acc: 96.648%
Tes