# Implementation of a dense CNN for the CIFAR-10 dataset
This is an implementation of a dense CNN for the CIFAR-10 dataset. The model is based on the paper [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993) by Gao Huang, Zhuang Liu, Laurens van der Maaten, Kilian Q. Weinberger. 

This will be the starting point to modify the structure of the network to test different architectures.

The implementation is basend on [this](https://amaarora.github.io/2020/08/02/densenets.html)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import copy
from collections import OrderedDict



In [3]:
class _Transition(nn.Sequential):
    def __init__(self, num_input_features, num_output_features):
        super(_Transition, self).__init__()
        self.add_module('BN', nn.BatchNorm2d(num_input_features))
        self.add_module('1x1', nn.Conv2d(num_input_features, num_output_features,
                                          kernel_size=1, stride=1, bias=False))
        self.add_module('relu', nn.ReLU(inplace=True))
        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))


#define dense layer and bottleneck layer
class _DenseLayer(nn.Module):
    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False):
        super(_DenseLayer, self).__init__()
        self.add_module('BN_1', nn.BatchNorm2d(num_input_features))
        self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
                                           growth_rate, kernel_size=1, stride=1,
                                           bias=False))
        self.add_module('relu_1', nn.ReLU(inplace=True))
        self.add_module('BN_2', nn.BatchNorm2d(bn_size * growth_rate))
        self.add_module('conv3', nn.Conv2d(bn_size * growth_rate, growth_rate,
                                           kernel_size=5, padding=2, dilation=1, groups=2,
                                           bias=False)) # questo è il cromosoma

       

        self.add_module('relu_2', nn.ReLU(inplace=True))
        self.drop_rate = float(drop_rate)
        self.memory_efficient = memory_efficient

    def bn_function(self, inputs):
        "Bottleneck function"
        # type: (List[Tensor]) -> Tensor
        
        concated_features = torch.cat(inputs, 1)
        bottleneck_output = self.relu_1(self.conv1(self.BN_1(concated_features)))  # noqa: T484
        
        return bottleneck_output

    def forward(self, input):  # noqa: F811
        if torch.is_tensor(input):
            prev_features = [input]
        else:
            prev_features = input
        bottleneck_output = self.bn_function(prev_features)
        new_features = self.relu_2(self.conv3(self.BN_2(bottleneck_output)))
        if self.drop_rate > 0:
            new_features = F.dropout(new_features, p=self.drop_rate,
                                     training=self.training)
        return new_features



class _DenseBlock(nn.ModuleDict):
    _version = 2

    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False):
        super(_DenseBlock, self).__init__()
        for i in range(num_layers):
            layer = _DenseLayer(
                num_input_features + i * growth_rate,
                growth_rate=growth_rate,
                bn_size=bn_size,
                drop_rate=drop_rate,
                memory_efficient=memory_efficient,
            )
            self.add_module('denselayer%d' % (i + 1), layer)

    def forward(self, init_features):
        features = [init_features]
        for name, layer in self.items():
            new_features = layer(features)
            features.append(new_features)

        
        return torch.cat(features, 1)


class DenseNet(nn.Module):
    def __init__(self, growth_rate=2, block_config=(3, 3),
                 num_init_features=3, bn_size=2, drop_rate=0, num_classes=10, memory_efficient=False):

        super(DenseNet, self).__init__()

        #Convolution and pooling part from table-1
        # self.features = nn.Sequential(OrderedDict([
        #     #('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2,
        #                        # padding=3, bias=False)),
        #     ('norm0', nn.BatchNorm2d(num_init_features)),
        #     ('relu0', nn.ReLU(inplace=True)),
        #     ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
        # ]))
        # print("features:", self.features)
        self.features = nn.Sequential()
     

        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate, memory_efficient=memory_efficient)
            self.features.add_module('denseblock%d' % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                # add transition layer between denseblocks to 
                # downsample
                trans = _Transition(num_input_features=num_features,
                                    num_output_features=num_features // 2)
                self.features.add_module('transition%d' % (i + 1), trans)
                num_features = num_features // 2

        # Final batch norm
        self.features.add_module('norm5', nn.BatchNorm2d(num_features))

        # Linear layer
        self.classifier = nn.Linear(num_features, num_classes)
       
        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        features = self.features(x)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out

net = DenseNet()

## Application of the net in the cifar-10 dataset

Fristly, import the dataset:

In [4]:
from scripts import cifar10
trainloader, testloader, classes = cifar10.cifar10()

Files already downloaded and verified
Files already downloaded and verified


## Train the network

In [5]:
import torch.optim as optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
print(torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))

cuda:0


In [6]:
from scripts import train
train_loss, train_acc = train.train_model(net, trainloader, criterion, optimizer, num_epochs=2, device=device)

TRAINING - loss 2.284794807434082 - performance 0.25
TRAINING - loss 2.3153443430910015 - performance 0.08663366336633663
TRAINING - loss 2.3054840327495367 - performance 0.08333333333333333
TRAINING - loss 2.298742900259075 - performance 0.09053156146179402
TRAINING - loss 2.2900160286491946 - performance 0.10224438902743142
TRAINING - loss 2.286401783872745 - performance 0.10828343313373254
TRAINING - loss 2.2802732216934993 - performance 0.12104825291181365
TRAINING - loss 2.280158241533179 - performance 0.12375178316690442
TRAINING - loss 2.277725527200211 - performance 0.12827715355805244
TRAINING - loss 2.275012868622961 - performance 0.13124306326304105
TRAINING - loss 2.272344369631071 - performance 0.13236763236763235
TRAINING - loss 2.2717197907176696 - performance 0.13328792007266121
TRAINING - loss 2.267995591763156 - performance 0.13905079100749376
TRAINING - loss 2.2655475667400786 - performance 0.13931591083781705
TRAINING - loss 2.2636991913704936 - performance 0.139007

KeyboardInterrupt: 

In [9]:

net.eval(testloader)

TypeError: eval() takes 1 positional argument but 2 were given

## Test the network

In [None]:
dataiter = iter(testloader)
images, labels = dataiter.next()

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))
outputs = net(images)
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}'
                              for j in range(4)))

In [11]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument weight in method wrapper__native_batch_norm)