In [1]:
from torch.nn import Conv2d, BatchNorm2d, ReLU, MaxPool2d, Sequential, AdaptiveAvgPool2d, Linear, Softmax, CrossEntropyLoss, MSELoss, NLLLoss
from torchvision.transforms import Compose, ToTensor, Normalize, RandomHorizontalFlip, RandomGrayscale, Resize, InterpolationMode
from torch.cuda import memory_allocated, empty_cache, memory_reserved, IntTensor
from torch.utils.tensorboard import SummaryWriter
from torch.optim import Adam, SGD, lr_scheduler
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch

In [2]:
Transform_Train = Compose([RandomHorizontalFlip(),
                           RandomGrayscale(),
                           ToTensor(),
                           Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
Transform_Test = Compose([ToTensor(),
                           Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
log_interval = 10
momentum = 0.9
TrainBS = 8
TestBS = 64
lr = 1e-4

In [3]:
writer = SummaryWriter('./log/')

In [4]:
Train_Data = DataLoader(dataset = CIFAR10(root = './data/',
                                          train = True,
                                          transform = Transform_Train,
                                          download = True),
                        batch_size = TrainBS,
                        shuffle = True)
Test_Data = DataLoader(dataset = CIFAR10(root = './data/',
                                          train = False,
                                          transform = Transform_Test,
                                          download = True),
                        batch_size = TestBS,
                        shuffle = True)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
class BasicBlock(nn.Module):
    def __init__(self, DownSample, Proc_Channel):
        super(BasicBlock, self).__init__()
        stride = 1
        self.shortcut = Sequential()
        in_channels = Proc_Channel
        if DownSample == 1:
            if Proc_Channel != 64:
                in_channels = int(Proc_Channel/2)
                stride = 2
            self.shortcut = Sequential(Conv2d(in_channels = in_channels, out_channels = Proc_Channel, kernel_size = 3, stride = stride, padding = 1),
                                       BatchNorm2d(Proc_Channel))    
        kernel_size = 3
        self.ConvLayer1 = Conv2d(in_channels, Proc_Channel, kernel_size, stride, 1)
        self.BatchNorm1 = BatchNorm2d(Proc_Channel)
        self.ConvLayer2 = Conv2d(in_channels = Proc_Channel, out_channels = Proc_Channel, kernel_size = 3, stride = 1, padding = 1)
        self.BatchNorm2 = BatchNorm2d(Proc_Channel)
        self.BatchNorm3 = BatchNorm2d(Proc_Channel)
    def forward(self, x):
        Residual = self.shortcut(x)
        x = self.ConvLayer1(x)
        x = self.BatchNorm1(x)
        x = F.relu(x)
        x = self.ConvLayer2(x)
        x = self.BatchNorm2(x)
        x = F.relu(x)
        x = x + Residual
        x = self.BatchNorm3(x)
        x = F.relu(x)
        return x

![resnet.png]("C:\Users\Baaaatttlllllllleeee\Pictures\resnet.png")

In [6]:
class Bottleneck(nn.Module):
    def __init__(self,DownSample,Proc_Channel):
        super(Bottleneck,self).__init__()
        Proc_Channel = int(Proc_Channel)
        stride = 1
        in_channels = int(Proc_Channel * 4)
        if DownSample == 1:
            if Proc_Channel == 64:
                in_channels = Proc_Channel
            else:
                stride = 2
                in_channels = int(Proc_Channel * 2)
        self.ConvLayer1 = Sequential(Conv2d(in_channels, Proc_Channel, 1, 1, 0),
                                     BatchNorm2d(Proc_Channel), 
                                     ReLU())
        self.ConvLayer2 = Sequential(Conv2d(Proc_Channel, Proc_Channel, 3, stride, 1),
                                     BatchNorm2d(Proc_Channel), 
                                     ReLU())
        self.ConvLayer3 = Sequential(Conv2d(Proc_Channel, Proc_Channel * 4, 1, 1, 0),
                                     BatchNorm2d(Proc_Channel * 4), 
                                     ReLU())
        self.shortcut = Sequential(Conv2d(in_channels, Proc_Channel * 4, 3, stride, 1),
                                   BatchNorm2d(Proc_Channel * 4))
        self.Post = Sequential(BatchNorm2d(Proc_Channel * 4),
                               ReLU())
    def forward(self,x):
        Residual = self.shortcut(x)
        x = self.ConvLayer1(x)
        x = self.ConvLayer2(x)
        x = self.ConvLayer3(x)
        x = x + Residual
        x = self.Post(x)
        return x

class Bottleneck(nn.Module):
    def __init__(self, DownSample, Proc_Channel):
        super(Bottleneck, self).__init__()
        Proc_Channel = int(Proc_Channel / 4)
        stride = 1
        in_channels = Proc_Channel * 4
        if DownSample == 1:
            stride = 2
            in_channels = int(Proc_Channel * 2)
            if Proc_Channel == 64:
                stride = 1
                in_channels = Proc_Channel
            self.shortcut = Sequential(Conv2d(in_channels = in_channels, out_channels = Proc_Channel * 4, kernel_size = 3, stride = stride, padding = 1),
                                       BatchNorm2d(Proc_Channel * 4))
        else:
            self.shortcut = Sequential(Conv2d(in_channels = in_channels, out_channels = Proc_Channel * 4, kernel_size = 3, stride = stride, padding = 1),
                                       BatchNorm2d(Proc_Channel * 4))
        self.ConvLayer1 = Sequential(Conv2d(in_channels = in_channels, out_channels = Proc_Channel, kernel_size = 1, stride = 1, padding = 0),
                                     BatchNorm2d(Proc_Channel),
                                     ReLU())
        self.ConvLayer2 = Sequential(Conv2d(in_channels = Proc_Channel, out_channels = Proc_Channel, kernel_size = 3, stride = stride, padding = 1),
                                     BatchNorm2d(Proc_Channel),
                                     ReLU())
        self.ConvLayer3 = Sequential(Conv2d(in_channels = Proc_Channel, out_channels = Proc_Channel * 4, kernel_size = 1, stride = 1, padding = 0),
                                     BatchNorm2d(Proc_Channel * 4))
        self.Res_Proc = Sequential(BatchNorm2d(Proc_Channel * 4),
                                   ReLU())
    def forward(self, x):
        Residual = self.shortcut(x)
        x = self.ConvLayer1(x)
        x = self.ConvLayer2(x)
        x = self.ConvLayer3(x)
        x = x + Residual
        x = self.Res_Proc(x)
        return x

In [7]:
class ResNet(nn.Module):
    def __init__(self, block, layer_arch, final_output):
        super(ResNet,self).__init__()
        self.Resize = Resize((224,224), interpolation = InterpolationMode.BILINEAR)
        self.final_output = final_output
        self.stem = Sequential(Conv2d(in_channels = 3, out_channels = 64, kernel_size = 7, stride = 2, padding = 3),
                               BatchNorm2d(64),
                               ReLU(),
                               MaxPool2d(kernel_size = 3, stride = 2, padding = 1))
        self.stage1 = self.make_layer(block, 64, layer_arch[0])
        self.stage2 = self.make_layer(block, 128, layer_arch[1])
        self.stage3 = self.make_layer(block, 256, layer_arch[2])
        self.stage4 = self.make_layer(block, 512, layer_arch[3])
        self.Aver_pool = AdaptiveAvgPool2d((1,1))
        self.fc = Linear(self.final_output, 10)
        self.softmax = Softmax(dim = 1)
    def make_layer(self, block, Proc_Channel, layer_arch):
        layer = []
        for i in range(layer_arch):
            if i == 0:
                layer.append(block(1,Proc_Channel))
            else:
                layer.append(block(0,Proc_Channel))
        return Sequential(*layer)
    def forward(self,x):
        x = self.Resize(x)
        x = self.stem(x)
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.Aver_pool(x)
        x = x.view(-1, self.final_output)
        x = self.fc(x)
        x = F.relu(x)
        x = self.softmax(x)
        return x

class ResNet(nn.Module):
    def __init__(self, block, layer_arch, final_output): 
        super(ResNet, self).__init__()
        self.final_output = final_output
        self.Resize = Resize((224,224), interpolation = InterpolationMode.BILINEAR)
        self.stem = Sequential(Conv2d(in_channels = 3, out_channels = 64, kernel_size = 7, stride = 2, padding =3),
                               BatchNorm2d(64),
                               ReLU(),
                               MaxPool2d(kernel_size = 3, stride = 2, padding = 1))
        self.stage1 = self._make_layer(block, layer_arch[0], int(self.final_output / 8))
        self.stage2 = self._make_layer(block, layer_arch[1], int(self.final_output / 4))
        self.stage3 = self._make_layer(block, layer_arch[2], int(self.final_output / 2))
        self.stage4 = self._make_layer(block, layer_arch[3], self.final_output)
        self.AvgPool = AdaptiveAvgPool2d((1,1))
        self.fc1 = Linear(self.final_output, 1000)
        self.fc2 = Linear(1000,10)
        self.softmax = Softmax(dim = 1)
    
    def forward(self, x):
        x = self.Resize(x)
        x = self.stem(x)
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.AvgPool(x)
        x = x.view(-1,self.final_output)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.softmax(x)
        return x
    
    def _make_layer(self, block, layer_arch, Proc_Channel):
        layer = Sequential()
        for i in range(layer_arch):
            if i == 0:
                layer.append(block(True, Proc_Channel))
            else:
                layer.append(block(False, Proc_Channel))
        return layer

In [8]:
def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2], 512)
def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3], 512)
def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3] , 2048)
def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3] , 2048)
def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3] , 2048)

In [9]:
Network = ResNet50()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Network.to(device)
print(device)

cuda:0


In [10]:
loss_function = CrossEntropyLoss()
#loss_function =  MSELoss() # Failed
#loss_function = NLLLoss()

In [11]:
train_acc_counter = []
train_loss_counter = []
train_data_counter = []
test_acc_counter = []
test_data_counter = []

In [12]:
def test(epoch):
    Network.eval()
    with torch.no_grad():
        correct = 0
        for data, target in Test_Data:
            data = data.to(device)
            target = target.to(device)
            output = Network(data).to(device)
            output = output.data.max(1, keepdim = True)[1]
            correct += output.eq(target.data.view_as(output)).sum()
        log = "Mode: Test | Epoch:{} Memory:{:.0f}MB Acc:{:.0f}%".format(epoch,  memory_allocated(device)/1048576, 100. * correct/len(Test_Data.dataset))
        writer.add_scalar(tag = 'TestAccuracy', scalar_value=(100. * correct/len(Test_Data.dataset)), global_step=(epoch * len(Train_Data.dataset)))
        print(log)
        test_acc_counter.append((100. * correct/len(Test_Data.dataset)))
        test_data_counter.append(epoch * len(Train_Data.dataset))

In [13]:
def train(epochs):
    for epoch in range(1, epochs+1):
        Network.train()
        empty_cache()
        for batch_idx, (data, target) in enumerate(Train_Data):
            data = data.to(device)
            target = target.to(device)
            optimizer.zero_grad()
            correct = 0
            output = Network(data)
            pred = output.data.max(1, keepdim = True)[1]
            pred = pred.view_as(target)
            correct += pred.eq(target.data).sum()
            correct = int(correct)
            loss = loss_function(output, target)
            loss.backward()
            optimizer.step()
            if batch_idx % log_interval == 0 and batch_idx != 0:
                log = "Mode: Train | Epoch:{} Data:{}/{} Memory:{:.0f}MB Accuracy:{:.0f}% Loss:{:.6f}".format(epoch, batch_idx*len(data), len(Train_Data.dataset), memory_allocated(device)/1048576, 100. * (correct/len(data)), loss.item())
                print(log)
                train_acc_counter.append((100. * (correct/len(data))))
                train_loss_counter.append(loss.item())
                train_data_counter.append((epoch-1) * len(Train_Data.dataset) + batch_idx * len(data))
                writer.add_scalar(tag = 'TrainAccuracy', scalar_value=(100. * (correct/len(data))), global_step=(epoch-1) * len(Train_Data.dataset) + batch_idx * len(data))
                writer.add_scalar(tag = 'TrainLoss', scalar_value=loss.item(), global_step=(epoch-1) * len(Train_Data.dataset) + batch_idx * len(data))
        empty_cache()
        test(epoch)
        scheduler.step()
    return 0

In [14]:
optimizer = Adam(params = Network.parameters(), lr = lr) 
#optimizer = SGD(params = Network.parameters(), lr = lr, momentum = momentum)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max = 30)

In [15]:
train(30)

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 6.00 GiB total capacity; 3.72 GiB already allocated; 0 bytes free; 4.33 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
print(memory_allocated(device)/1048576)
print(memory_reserved(device)/1048576)

In [None]:
empty_cache()

In [None]:
Network.to('cpu')
empty_cache()