In [0]:
import torchvision
import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
from torch import optim
import torch.backends.cudnn as cudnn


In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
cudnn.benchmark = True
!nvidia-smi --query-gpu=gpu_name,driver_version,memory.total --format=csv

cuda:0
name, driver_version, memory.total [MiB]
Tesla P100-PCIE-16GB, 418.67, 16280 MiB


In [0]:
class To3Channels(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        if sample.shape[0] < 3:
            sample = torch.squeeze(sample)
            sample = torch.stack([sample, sample,sample], 0)

        return sample

In [49]:
transformer_cifar_10 =  torchvision.transforms.Compose(
    [torchvision.transforms.Resize(224),
     torchvision.transforms.ToTensor(),
     To3Channels(),
     #torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
     torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

transformer_cifar_100 = torchvision.transforms.Compose(
    [torchvision.transforms.Resize(224),
     torchvision.transforms.ToTensor(),
     To3Channels(),
     torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
     #torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transformer_fashion = torchvision.transforms.Compose(
    [torchvision.transforms.Resize(224),
     torchvision.transforms.ToTensor(),
     To3Channels(),
     torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

CIFAR10_train = torchvision.datasets.CIFAR10("../datasets/CIFAR10/", train=True, transform=transformer_cifar_10, target_transform=None, download=True)
CIFAR100_train = torchvision.datasets.CIFAR100("../datasets/CIFAR100/", train=True, transform=transformer_cifar_100, target_transform=None, download=True)
FashionMNIST_train = torchvision.datasets.FashionMNIST("../datasets/FashionMNIST/", train=True, transform=transformer_fashion, target_transform=None, download=True)




CIFAR10_test = torchvision.datasets.CIFAR10("../datasets/CIFAR10/", train=False, transform=transformer_cifar_10, target_transform=None, download=True)
CIFAR100_test = torchvision.datasets.CIFAR100("../datasets/CIFAR100/", train=False, transform=transformer_cifar_100, target_transform=None, download=True)
FashionMNIST_test = torchvision.datasets.FashionMNIST("../datasets/FashionMNIST/", train=False, transform=transformer_fashion, target_transform=None, download=True)






Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [0]:
def get_loaders(dataset = "CIFAR10"):
    train_loader = None
    test_loader = None
    labels_num = None

    if dataset == "CIFAR10":
        train_loader = torch.utils.data.DataLoader(CIFAR10_train, batch_size=64,
                                          shuffle=True, num_workers=8)
        test_loader = torch.utils.data.DataLoader(CIFAR10_test, batch_size=64,
                                          shuffle=False, num_workers=8)
        labels_num = 10#len(set(CIFAR10_train.train_labels))
    elif dataset == "CIFAR100":
        train_loader = torch.utils.data.DataLoader(CIFAR100_train, batch_size=32,
                                                   shuffle=True, num_workers=8)
        test_loader = torch.utils.data.DataLoader(CIFAR100_test, batch_size=32,
                                                  shuffle=False, num_workers=8)
        labels_num = 100

    elif dataset == "FASHION_MNIST":
        train_loader = torch.utils.data.DataLoader(FashionMNIST_train, batch_size=64,
                                                   shuffle=True, num_workers=8)
        test_loader = torch.utils.data.DataLoader(FashionMNIST_test, batch_size=64,
                                                  shuffle=False, num_workers=8)
        labels_num = len(set(FashionMNIST_train.train_labels))


    return train_loader,test_loader,labels_num

In [0]:
class ShortcutBlock(nn.Module): 
    expansion = 1
    def __init__(self, in_channel, out_channel,downsample_layer = None,stride=1):
      super(ShortcutBlock,self).__init__()
      self.conv1 =  nn.Conv2d(in_channel, out_channel,3, stride = stride,padding=1)
      self.bn1 = nn.BatchNorm2d(out_channel)
      self.conv2 =  nn.Conv2d(out_channel, out_channel, 3,padding=1)
      self.bn2 = nn.BatchNorm2d(out_channel)
      self.downsample = downsample_layer
      self.relu = nn.ReLU(inplace=True)
    def forward(self,x):
      identity = x
      if self.downsample is not None:
        identity = self.downsample(x)

      x = self.relu(self.bn1(self.conv1(x)))
      x = self.bn2(self.conv2(x))

      return self.relu(identity + x)

class BottleneckBlock(nn.Module):
    expansion = 4
    def __init__(self,in_channel, out_channel,downsample_layer = None,stride=1):
      super(BottleneckBlock,self).__init__()
      features = int(out_channel * (64 / 64.)) * 1

      self.conv1 =  nn.Conv2d(in_channel, features,1)
      self.bn1 = nn.BatchNorm2d(features)
      self.conv2 =  nn.Conv2d(features, features, 3, stride = stride,padding=1)
      self.bn2 = nn.BatchNorm2d(features)
      self.conv3 =  nn.Conv2d(features, out_channel * self.expansion, 1)
      self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
      self.downsample = downsample_layer
      self.relu = nn.ReLU(inplace=True)
      self.downsample = downsample_layer

    def forward(self,x):
      identity = x
      if self.downsample is not None:
        identity = self.downsample(x)
      x = self.relu(self.bn1(self.conv1(x)))
      x = self.relu(self.bn2(self.conv2(x)))
      x = self.bn3(self.conv3(x))
      return self.relu(identity + x)


class ResNet(nn.Module):

    def __init__(self,num_classes,block,layers):
        super(ResNet, self).__init__()
        self.current_number_of_features = 64
        self.conv1 = nn.Conv2d(3,64,7,stride=2,padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(3,stride=2)
        self.block1 = self._construct_layer(block,64,layers[0])
        self.block2 = self._construct_layer(block,128,layers[1],stride=2)
        self.block3 = self._construct_layer(block,256,layers[2],stride=2)
        self.block4 = self._construct_layer(block,512,layers[3],stride=2)
        self.pool2 = nn.AdaptiveAvgPool2d((1,1))

        self.fc8 = nn.Linear(512 * block.expansion,num_classes)
        #self.actv8 = nn.Softmax(dim=1)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _construct_layer(self, block, output_channels, blocks, stride=1):
        downsample = None

        # If the input is going to be downsampled (stride = 2) or the number of
        #    channels is different from the expected expansion
        if stride != 1 or self.current_number_of_features != output_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.current_number_of_features, output_channels * block.expansion, 1 , stride),
                nn.BatchNorm2d(output_channels * block.expansion),
            )

        layers = []
        layers.append(block(self.current_number_of_features, output_channels, downsample, stride))
        self.current_number_of_features = output_channels * block.expansion

        for _ in range(1, blocks):
            layers.append(block(self.current_number_of_features, output_channels))

        return nn.Sequential(*layers)
    def _forward_impl(self,x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.pool1(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.pool2(x)
        
        x = torch.flatten(x, start_dim=1)
        x = self.fc8(x)
        return x
    def forward(self, x):
        return self._forward_impl(x)





In [0]:
def _resnet(block, layers,num_classes):
    return ResNet(num_classes,block,layers)

def ResNet18(num_classes):
    return _resnet(ShortcutBlock,[2,2,2,2],num_classes)

def ResNet34(num_classes):
    return _resnet(ShortcutBlock,[3,4,6,3],num_classes)

def ResNet50(num_classes):
    
    return _resnet(BottleneckBlock, [3, 4, 6, 3], num_classes)


def ResNet101(num_classes):
    
    return _resnet(BottleneckBlock, [3, 4, 23, 3], num_classes)


def ResNet152(num_classes):
    return _resnet(BottleneckBlock, [3, 8, 36, 3], num_classes)


In [0]:
def compute_accuracy(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [0]:
def train(net,trainloader,testloader,optim_name = "adam",epochs = 30):
    optimizer = optim.Adam(net.parameters(),lr= 0.001,weight_decay=0.0005)
    if optim_name == "sgd":
        optimizer = optim.SGD(net.parameters(),0.05,0.9,weight_decay=0.0005)

    criterion = torch.nn.CrossEntropyLoss()
    losses = []
    accuracies = []
    for epoch in range(epochs):
        #if epoch == 150: 
          #if optim_name == "sgd":
        #    optimizer = optim.SGD(net.parameters(),0.01,0.9)
        #if epoch == 250: 
        #  if optim_name == "sgd":
        #    optimizer = optim.SGD(net.parameters(),0.001,0.9)
        running_loss = 0.0
        for i,data in enumerate(trainloader,0):
            inputs, labels = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 300 == 299:  # print every 100 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 300))
                losses.append(running_loss/300)
                running_loss = 0.0

        accuracy = compute_accuracy(net,testloader)
        accuracies.append(accuracy)
        print('Accuracy of the network on the test images: %.3f' % accuracy)

    return accuracies,losses


In [0]:
from google.colab import files
def run(dataset = "CIFAR10",epochs = 30):
    trainloader, testloader, num_classes = get_loaders(dataset)

    net = ResNet50(num_classes)
    net.to(device)

    accuracies, losses = train(net, trainloader, testloader,epochs=epochs,optim_name="adam")

    
    
    
    f = plt.figure(1)
    x = np.linspace(0, 1, len(losses))
    plt.plot(x,losses)
    f.show()

    g = plt.figure(2)
    x = np.linspace(0, 1, len(accuracies))
    plt.plot(x, accuracies, figure = g)
    g.show()
    
    

    
    #files.download( dataset + "_loss.png") 
    
    
    plt.show()
    #files.download( dataset + "_accuracy.png")
    
 

In [0]:
run(epochs=70)

In [0]:
run("CIFAR100",80)

In [0]:
run("FASHION_MNIST",40)



[1,   300] loss: 0.750
[1,   600] loss: 0.453
[1,   900] loss: 0.417
Accuracy of the network on the test images: 0.787
[2,   300] loss: 4.532
[2,   600] loss: 1.054
[2,   900] loss: 0.734
Accuracy of the network on the test images: 0.766
[3,   300] loss: 0.604
[3,   600] loss: 0.550
[3,   900] loss: 0.523
Accuracy of the network on the test images: 0.800
[4,   300] loss: 0.490
[4,   600] loss: 0.471
[4,   900] loss: 0.458
Accuracy of the network on the test images: 0.807
[5,   300] loss: 0.443
[5,   600] loss: 0.433
[5,   900] loss: 0.422
Accuracy of the network on the test images: 0.818
[6,   300] loss: 0.415
[6,   600] loss: 0.399
[6,   900] loss: 0.411
Accuracy of the network on the test images: 0.828
[7,   300] loss: 0.396
[7,   600] loss: 0.395
[7,   900] loss: 0.383
Accuracy of the network on the test images: 0.853
[8,   300] loss: 0.364
[8,   600] loss: 0.381
[8,   900] loss: 0.367
Accuracy of the network on the test images: 0.861
[9,   300] loss: 0.352
[9,   600] loss: 0.359
[9

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7fcd5bb84dd8>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 962, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 124, in join
    res = self._popen.wait(timeout)
  File "/usr/lib/python3.6/multiprocessing/popen_fork.py", line 50, in wait
    return self.poll(os.WNOHANG if timeout == 0.0 else 0)
  File "/usr/lib/python3.6/multiprocessing/popen_fork.py", line 28, in poll
    pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt: 


KeyboardInterrupt: ignored






