In [7]:
import math
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import time as time
import numpy as np
from gradient_descent_the_ultimate_optimizer import gdtuo
from gradient_descent_the_ultimate_optimizer.gdtuo import Optimizable
import os
import matplotlib.pyplot as plt
import imageio
from IPython.display import Video, Image

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

class CNN_2_2(nn.Module):
    def __init__(self):
        super(CNN_2_2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(16384, 128)  # Adjusted input dimensions
        self.fc2 = nn.Linear(128, 10)
        self.bn1 = nn.BatchNorm2d(3)
        self.bn2 = nn.BatchNorm2d(32)
        self.bn3 = nn.BatchNorm1d(16384)


    def forward(self, x):
        x = self.bn1(x)
        x = self.conv1(x)

        x = F.gelu(x)

        x = self.bn2(x)
        x = self.conv2(x)

        x = F.gelu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)

        x = self.bn3(x)
        x = self.fc1(x)

        x = F.gelu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

BATCH_SIZE = 256
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

dataset_train = torchvision.datasets.CIFAR10('./data_cifar', train=True, download=True, transform=torchvision.transforms.ToTensor())
dataset_test = torchvision.datasets.CIFAR10('./data_cifar', train=False, download=True, transform=torchvision.transforms.ToTensor())
dl_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
dl_test = torch.utils.data.DataLoader(dataset_test, batch_size=256, shuffle=False)
model = CNN_2_2().to(DEVICE)
optim = torch.optim.Adam(model.parameters(), lr=0.001)

cuda
Files already downloaded and verified
Files already downloaded and verified


In [8]:
init_time = time.time()
EPOCHS = 10

train_loss_list = []
train_acc_list = []
test_loss_list = []
test_acc_list = []

for i in range(1, EPOCHS+1):
    running_acc = 0.0
    running_loss = 0.0
    model.train()
    for j, (features_, labels_) in enumerate(dl_train):


        optim.zero_grad()
        features, labels = features_.to(DEVICE), labels_.to(DEVICE)
        pred = model.forward(features)
        loss = F.nll_loss(pred, labels)
        loss.backward(create_graph=True)

        optim.step()
        running_loss += loss.item() * features_.size(0)
        running_acc += (torch.argmax(pred, dim=1) == labels).sum().item()
    train_loss = running_loss / len(dl_train.dataset)
    train_acc = running_acc / len(dl_train.dataset)

    train_loss_list.append(train_loss)  
    train_acc_list.append(train_acc)

    running_acc = 0.0
    running_loss = 0.0
    with torch.no_grad():
        model.eval()
        for j, (features_, labels_) in enumerate(dl_test):
            features, labels = features_.to(DEVICE), labels_.to(DEVICE)
            pred = model.forward(features)
            running_acc += (torch.argmax(pred, dim=1) == labels).sum().item()
            loss = F.nll_loss(pred, labels)
            running_loss += loss.item() * features_.size(0)
    test_loss = running_loss / len(dl_test.dataset)
    test_acc = running_acc / len(dl_test.dataset)

    test_loss_list.append(test_loss)
    test_acc_list.append(test_acc)
    
    print("EPOCH: {}, TRAIN LOSS: {}, ACC: {}".format(i, train_loss, train_acc))
    print("TEST ACC: {}\n".format(running_acc / len(dl_test.dataset)))

print("Time taken: {}".format(time.time() - init_time))

EPOCH: 1, TRAIN LOSS: 1.5536489905929565, ACC: 0.46708
TEST ACC: 0.6275

EPOCH: 2, TRAIN LOSS: 1.1104099477005005, ACC: 0.60664
TEST ACC: 0.6637

EPOCH: 3, TRAIN LOSS: 0.9624481081581115, ACC: 0.65994
TEST ACC: 0.6953

EPOCH: 4, TRAIN LOSS: 0.8718431086730957, ACC: 0.6912
TEST ACC: 0.7006

EPOCH: 5, TRAIN LOSS: 0.7848658743667603, ACC: 0.7203
TEST ACC: 0.7142

EPOCH: 6, TRAIN LOSS: 0.7231053317260743, ACC: 0.7439
TEST ACC: 0.7151

EPOCH: 7, TRAIN LOSS: 0.6672848970413208, ACC: 0.76372
TEST ACC: 0.7172

EPOCH: 8, TRAIN LOSS: 0.624582153968811, ACC: 0.77734
TEST ACC: 0.7183

EPOCH: 9, TRAIN LOSS: 0.5749708553123474, ACC: 0.7943
TEST ACC: 0.7122

EPOCH: 10, TRAIN LOSS: 0.5381403729057312, ACC: 0.8081
TEST ACC: 0.7234

Time taken: 73.61860704421997


In [10]:
# save training and testing statistics in csv
path = '../results/CIFAR-10'
name = 'baseline_gelu'
np.savetxt(path + '/' + name + '_train_loss.csv', train_loss_list, delimiter=',')
np.savetxt(path + '/' + name + '_train_acc.csv', train_acc_list, delimiter=',')
np.savetxt(path + '/' + name + '_test_loss.csv', test_loss_list, delimiter=',')
np.savetxt(path + '/' + name + '_test_acc.csv', test_acc_list, delimiter=',')