In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR100
from torch.utils.data import DataLoader
from torchvision.models import vgg16
import time
import os.path as osp

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
# Loading dataset and pre-processing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 64

trainset = CIFAR100(root='./data_cifar100', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = CIFAR100(root='./data_cifar100', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data_cifar100/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:02<00:00, 69875905.23it/s]


Extracting ./data_cifar100/cifar-100-python.tar.gz to ./data_cifar100
Files already downloaded and verified


In [4]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# net = vgg16(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes
# net.to(device)



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
# Building VGG16
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 10)  # CIFAR-10 has 10 classes
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


In [None]:
net = VGG16()

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

num_epochs = 50

In [6]:
log_file_path = "/content/gdrive/MyDrive/Project_II/model_logging/VGG_16_cifar100.log"

# definition a function to write data to log file
def write_log(log_file_path, content):
    with open(log_file_path, 'a') as log_file:
        log_file.write(content + '\n')

In [None]:
# definition a function to read data from log file
def read_log(log_file_path):
    with open(log_file_path, 'r') as log_file:
        lines = log_file.readlines()

    # Check if log file contains data or being empty
    if len(lines) == 0:
        return 0, None

    # Get the number of epochs has been completed
    num_epochs_completed = len(lines) // 3

    # Check if the remained lines of the log file
    if len(lines) < 3 or len(lines) % 3 != 0:
        print("Error: File log contains incomplete or invalid information.")
        return num_epochs_completed, None

    try:
        # Get the information of last epoch
        last_epoch_info = lines[-3:]
        last_epoch_train_loss = float(last_epoch_info[0].split(":")[1].strip())
        last_epoch_train_accuracy = float(last_epoch_info[1].split(":")[1].strip()[:-1])
    except (ValueError, IndexError) as e:
        print("Error: Invalid format or missing information in log file.")
        return num_epochs_completed, None

    return num_epochs_completed, (last_epoch_train_loss, last_epoch_train_accuracy)

In [7]:
# Intialize the array to save training time per each epoch
epoch_times_list = []

In [8]:
# Starting to calculate training model time
start_time = time.time()

In [9]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'


In [10]:
for epoch in range(num_epochs):
    epoch_start_time = time.time()
    net.train()
    train_loss = 0.0
    correct_train = 0
    total_train = 0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_accuracy = 100 * correct_train / total_train
    write_log(log_file_path, f"VGG-16\n{'-'*50}\nEpoch {epoch + 1}:\n\tTrain loss: {train_loss:.3f}, Train Accuracy: {train_accuracy:.2f}%")
    print(f"Epoch {epoch + 1}, Loss: {train_loss / len(trainloader)}, Train Accuracy: {train_accuracy:.2f}%")

    # Evaluate the model in testing dataset and calculate the accuracy
    net.eval()
    correct_test = 0
    total_test = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()
    test_accuracy = 100 * correct_test / total_test
    write_log(log_file_path, f"VGG-16\n{'-'*50}\nEpoch {epoch + 1}:\n\tAccuracy on test set: {test_accuracy:.2f}%")

    end_time = time.time()
    elapsed_time = end_time - epoch_start_time
    elapsed_time_str = f"{int(elapsed_time // 3600)}h {int((elapsed_time % 3600) // 60)}m {int(elapsed_time % 60)}s"
    epoch_times_list.append((epoch + 1, elapsed_time))

    # Calculate total elapsed time
    total_elapsed_time = sum([epoch_times[1] for epoch_times in epoch_times_list])
    total_elapsed_time_str = f"{int(total_elapsed_time // 3600)}h {int((total_elapsed_time % 3600) // 60)}m {int(total_elapsed_time % 60)}s"
    write_log(log_file_path, f"\tElapsed Time: {elapsed_time_str}")
    write_log(log_file_path, f"\tTotal Elapsed Time: {total_elapsed_time_str}\n{'-'*50}")
    print(f"Accuracy on test set: {test_accuracy:.2f}%")

Epoch 1, Loss: 4.514012345267684, Train Accuracy: 1.63%
Accuracy on test set: 2.30%
Epoch 2, Loss: 4.3304688104278295, Train Accuracy: 3.27%
Accuracy on test set: 4.47%
Epoch 3, Loss: 3.9457928768509185, Train Accuracy: 7.06%
Accuracy on test set: 10.69%
Epoch 4, Loss: 3.6383996845206337, Train Accuracy: 11.76%
Accuracy on test set: 15.08%
Epoch 5, Loss: 3.3309383712461234, Train Accuracy: 17.33%
Accuracy on test set: 21.94%
Epoch 6, Loss: 3.012654499019808, Train Accuracy: 23.45%
Accuracy on test set: 24.84%
Epoch 7, Loss: 2.7510148704509296, Train Accuracy: 28.81%
Accuracy on test set: 30.29%
Epoch 8, Loss: 2.5054836757957477, Train Accuracy: 34.15%
Accuracy on test set: 34.02%
Epoch 9, Loss: 2.274147378209302, Train Accuracy: 39.26%
Accuracy on test set: 34.53%
Epoch 10, Loss: 2.07378155953439, Train Accuracy: 43.91%
Accuracy on test set: 39.30%
Epoch 11, Loss: 1.8885610077692114, Train Accuracy: 48.36%
Accuracy on test set: 40.77%
Epoch 12, Loss: 1.7504858158128647, Train Accuracy: