In [1]:
import math
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import Lambda
import time as time
import numpy as np
from gradient_descent_the_ultimate_optimizer import gdtuo
from gradient_descent_the_ultimate_optimizer.gdtuo import Optimizable
import os
import matplotlib.pyplot as plt
import imageio
from IPython.display import Video, Image

os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"

DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(DEVICE))
torch.manual_seed(0)
np.random.seed(0)

Using cuda:0 device


In [2]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.out_channels = out_channels
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.gelu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = F.gelu(out)
        return out
    
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes = 257):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(block, 64, layers[0], stride = 1)
        self.layer1 = self._make_layer(block, 128, layers[1], stride = 2)
        self.layer2 = self._make_layer(block, 256, layers[2], stride = 2)
        self.layer3 = self._make_layer(block, 512, layers[3], stride = 2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 
        self.fc = nn.Linear(512, num_classes)
        
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)    

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.gelu(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [3]:
torch.cuda.empty_cache()

model = ResNet(ResidualBlock, [3, 4, 6, 3])

if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs")
    model = nn.DataParallel(model)

model.to(DEVICE)

optim = torch.optim.Adam(model.parameters(), lr=0.001)

criterion = nn.CrossEntropyLoss()

Using 2 GPUs


In [4]:
BATCH_SIZE = 256

transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),
    Lambda(lambda x: x.convert("RGB")),  # Convert all images to RGB
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

dataset_glob = torchvision.datasets.Caltech256('./data_caltech256', download=True, transform=transforms)
dataset_train, dataset_test = torch.utils.data.random_split(dataset_glob, [int(len(dataset_glob)*0.8), len(dataset_glob) - int(len(dataset_glob)*0.8)])
dl_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
dl_test = torch.utils.data.DataLoader(dataset_test, batch_size=256, shuffle=False, num_workers=4)

Files already downloaded and verified


In [5]:
init_time = time.time()
EPOCHS = 20

train_loss_list = []
train_acc_list = []
test_loss_list = []
test_acc_list = []

for i in range(1, EPOCHS+1):
    running_acc = 0.0
    running_loss = 0.0
    model.train()
    for j, (features_, labels_) in enumerate(dl_train):


        optim.zero_grad()
        features, labels = features_.to(DEVICE), labels_.to(DEVICE)
        pred = model.forward(features)
        loss = criterion(pred, labels)
        
        loss.backward(create_graph=True) # important! use create_graph=True
        
        optim.step()

        running_loss += loss.item() * features_.size(0)
        running_acc += (torch.argmax(pred, dim=1) == labels).sum().item()

        if j%50 == 0:
            print("EPOCH: {}, BATCH: {}".format(i, j))

    
    train_loss = running_loss / len(dl_train.dataset)
    train_acc = running_acc / len(dl_train.dataset)
    train_loss_list.append(train_loss)
    train_acc_list.append(train_acc)

    running_acc = 0.0
    running_loss = 0.0
    with torch.no_grad():
        model.eval()
        for j, (features_, labels_) in enumerate(dl_test):
            features, labels = features_.to(DEVICE), labels_.to(DEVICE)
            pred = model.forward(features)
            running_acc += (torch.argmax(pred, dim=1) == labels).sum().item()
            loss = criterion(pred, labels)
            running_loss += loss.item() * features_.size(0)

    test_acc = running_acc / len(dl_test.dataset)
    test_loss = running_loss / len(dl_test.dataset)
    test_loss_list.append(test_loss)
    test_acc_list.append(test_acc)
    
    print("EPOCH: {}, TRAIN LOSS: {}, ACC: {}".format(i, train_loss, train_acc))
    print("EPOCH: {}, TEST ACC: {}\n".format(i, test_acc))
    
print("Time taken: {}".format(time.time() - init_time))

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


EPOCH: 1, BATCH: 0
EPOCH: 1, BATCH: 50
EPOCH: 1, TRAIN LOSS: 5.116221036905168, ACC: 0.07143148866653053
EPOCH: 1, TEST ACC: 0.08134596537079386

EPOCH: 2, BATCH: 0
EPOCH: 2, BATCH: 50
EPOCH: 2, TRAIN LOSS: 4.595645735851472, ACC: 0.11513171329385338
EPOCH: 2, TEST ACC: 0.10878797778503757

EPOCH: 3, BATCH: 0
EPOCH: 3, BATCH: 50
EPOCH: 3, TRAIN LOSS: 4.222991145579747, ACC: 0.15899530324688585
EPOCH: 3, TEST ACC: 0.13508657301535446

EPOCH: 4, BATCH: 0
EPOCH: 4, BATCH: 50
EPOCH: 4, TRAIN LOSS: 3.907096390990011, ACC: 0.19677353481723503
EPOCH: 4, TEST ACC: 0.18458020254818686

EPOCH: 5, BATCH: 0
EPOCH: 5, BATCH: 50
EPOCH: 5, TRAIN LOSS: 3.582888582119388, ACC: 0.24288339799877476
EPOCH: 5, TEST ACC: 0.19503430251551782

EPOCH: 6, BATCH: 0
EPOCH: 6, BATCH: 50
EPOCH: 6, TRAIN LOSS: 3.259081021751558, ACC: 0.291974678374515
EPOCH: 6, TEST ACC: 0.24403789611238158

EPOCH: 7, BATCH: 0
EPOCH: 7, BATCH: 50
EPOCH: 7, TRAIN LOSS: 2.914481178312514, ACC: 0.34572187053297937
EPOCH: 7, TEST ACC: 0

In [6]:
# save training and testing statistics in csv
path = '../results/caltech256'
name = 'baseline'
np.savetxt(path + '/' + name + '_train_loss.csv', train_loss_list, delimiter=',')
np.savetxt(path + '/' + name + '_train_acc.csv', train_acc_list, delimiter=',')
np.savetxt(path + '/' + name + '_test_loss.csv', test_loss_list, delimiter=',')
np.savetxt(path + '/' + name + '_test_acc.csv', test_acc_list, delimiter=',')