In [7]:
import torch
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn as nn
import torchvision
from torchvision import datasets, models, transforms
import os


from urllib.request import urlretrieve
import json

import matplotlib.pyplot as plt
import PIL

import tensorboard as tb

In [2]:
print(torch.__version__)

2.1.1


In [3]:
# GPU device 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)
print(torch.cuda.get_device_name(0))
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.memory_allocated())
print(torch.cuda.memory_cached())

cuda:0
NVIDIA GeForce RTX 3090
1
0
0
0




# CIFAR-10

In [16]:
# These models are trained with a mini- batch size of 128 on two GPUs
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, ), (0.5, ))])

batch_size = 128

# trainset
trainset = torchvision.datasets.CIFAR10(root='/home/hslee/Desktop/Datasets', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

# valset : split the trainset into trainset and validationset
trainset, valset = torch.utils.data.random_split(trainset, [45000, 5000])

# testset
testset = torchvision.datasets.CIFAR10(root='/home/hslee/Desktop/Datasets', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# train, val, test data size
print(len(trainset))
print(len(valset))
print(len(testset))

# train, val, test data shape
print(trainset[0][0].shape)
print(valset[0][0].shape)
print(testset[0][0].shape)

# record image to tensorboard
writer = SummaryWriter('runs/resnet32_cifar10_experiment_1')
writer.add_image('4_train_images', torchvision.utils.make_grid(trainset[0][0], nrow=4))
writer.add_image('4_val_images', torchvision.utils.make_grid(valset[0][0], nrow=4))
writer.add_image('4_test_images', torchvision.utils.make_grid(testset[0][0], nrow=4))
writer.close()

Files already downloaded and verified
Files already downloaded and verified
45000
5000
10000
torch.Size([3, 32, 32])
torch.Size([3, 32, 32])
torch.Size([3, 32, 32])


In [19]:
# We use a weight decay of 0.0001 and momentum of 0.9, and adopt the weight initialization in [13] and BN [16] but with no dropout. 
# These models are trained with a mini- batch size of 128 on two GPUs. 
# We start with a learning rate of 0.1, divide it by 10 at 32k and 48k iterations, 
# and terminate training at 64k iterations, which is determined on a 45k/5k train/val split. 
# We follow the simple data augmentation in for training: 4 pixels are padded on each side, 
# and a 32×32 crop is randomly sampled from the padded image or its horizontal flip. 
# For testing, we only evaluate the single view of the original 32×32 image.

model = models.resnet32(pretrained=False, progress=True)
print(model.eval)
model = model.to(device)
# record model to tensorboard
writer.add_graph(model, trainset[0][0].unsqueeze(0))

# hyper parameters
## learning rate
lr = 0.1
## momentum
momentum = 0.9
## weight decay
L2 = 0.0001
## batch size
mini_batch_size = 256
# iterations
iterations = len(trainset) // batch_size + 1
print(f"# iterations per epoch = {iterations}")
print(f"1peoch : {iterations} iterations")
# epochs
total_iterations = 64 * 10e3
epochs = int(total_iterations // iterations)
print(f"# epochs = {epochs}")

## optimizer
## scheduler
## loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=L2)

AttributeError: module 'torchvision.models' has no attribute 'resnet32'

In [7]:
# record model to tensorboard
writer.add_graph(model, train_dataset[0][0].unsqueeze(0).to(device))

In [10]:
# training no pretrained resnet50 model
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    print(f"epoch : {epoch+1} epoch --------------------------------------------")
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %(epoch+1, i+1, running_loss/100))
            running_loss = 0.0
            
        # tensorboard
        writer.add_scalar('loss', loss.item(), epoch*len(train_loader)+i)
        

epoch : 1 epoch --------------------------------------------
[1,   100] loss: 7.032
[1,   200] loss: 6.913
[1,   300] loss: 6.789
[1,   400] loss: 6.700
[1,   500] loss: 6.618


KeyboardInterrupt: 