In [None]:
!pip install torch torchvision



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.models as models
import time
import os.path as osp

In [None]:
# Load MNIST dataset
train_transform = transforms.Compose([
    transforms.RandomCrop(28, padding=4),  # MNIST images are 28x28
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize for single-channel images
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data_mnist', train=True, download=True, transform=train_transform)
test_dataset = datasets.MNIST(root='./data_mnist', train=False, download=True, transform=test_transform)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:

resnet50 = models.resnet50(pretrained=False)
num_classes = 10
resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
in_features = resnet50.fc.in_features
resnet50.fc = nn.Linear(in_features, num_classes)



In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet50.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet50.to(device)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
log_file_path = "/content/gdrive/MyDrive/Project_II/model_logging/resnet_50_mnist.log"

In [None]:
# definition a function to write data to log file
def write_log(log_file_path, content):
    with open(log_file_path, 'a') as log_file:
        log_file.write(content + '\n')

In [None]:
# definition a function to read data from log file
def read_log(log_file_path):
    with open(log_file_path, 'r') as log_file:
        lines = log_file.readlines()

    # Check if log file contains data or being empty
    if len(lines) == 0:
        return 0, None

    # Get the number of epochs has been completed
    num_epochs_completed = len(lines) // 3

    # Checking in the log file if the remaining number of epochs still acceptable
    if len(lines) < 3 or len(lines) % 3 != 0:
        print("Error: File log contains incomplete or invalid information.")
        return num_epochs_completed, None

    try:
        # Get the information of the last epoch
        last_epoch_info = lines[-3:]
        last_epoch_train_loss = float(last_epoch_info[0].split(":")[1].strip())
        last_epoch_train_accuracy = float(last_epoch_info[1].split(":")[1].strip()[:-1])
    except (ValueError, IndexError) as e:
        print("Error: Invalid format or missing information in log file.")
        return num_epochs_completed, None

    return num_epochs_completed, (last_epoch_train_loss, last_epoch_train_accuracy)

In [None]:
# Intialize the array to save training time per each epoch
epoch_times_list = []

In [None]:
# Starting to calculate training model time
start_time = time.time()

In [None]:
# Training model per each epoch
def train(epoch):
    resnet50.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = resnet50(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    end_time = time.time()
    elapsed_time = end_time - start_time
    elapsed_time_str = f"{int(elapsed_time // 60)}m {int(elapsed_time % 60)}s"
    write_log(log_file_path, f"Training Loss: {train_loss / len(train_loader):.3f} \t\t Training Accuracy: {100 * correct / total:.2f}%")
    write_log(log_file_path, f"\tElapsed Time: {elapsed_time_str}")

    # Updating total running time of all epochs
    total_elapsed_time = sum([epoch_times[1] for epoch_times in epoch_times_list])
    total_elapsed_time_str = f"{int(total_elapsed_time // 3600)}h {int((total_elapsed_time % 3600) // 60)}m {int(total_elapsed_time % 60)}s"
    write_log(log_file_path, f"\tTotal Elapsed Time: {total_elapsed_time_str}\n{'-'*50}")

    print(f"Epoch {epoch+1}:\nTraining Loss: {train_loss / len(train_loader):.3f}, Accuracy: {100 * correct / total:.2f}%")


In [None]:
def test():
    resnet50.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = resnet50(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    avg_loss = test_loss / len(test_loader)
    accuracy = 100 * correct / total
    write_log(log_file_path, f"ResNet-50\n{'-'*50}\nEpoch {epoch+1}:\nTest Loss: {avg_loss:.3f} \t\t Accuracy: {accuracy:.2f}%")
    print(f"ResNet-50\n{'-'*50}\nEpoch {epoch+1}:\nTest Loss: {avg_loss:.3f}, Accuracy: {accuracy:.2f}%")


In [None]:
# Moving the model to device
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet50.to(device)

In [None]:
# Read data from the last epoch
num_epochs_completed, last_epoch_info = read_log(log_file_path)
start_epoch = num_epochs_completed

for epoch in range(start_epoch, num_epochs):
    test()
    train(epoch)
    scheduler.step()

    # Saving model
    end_time = time.time()
    elapsed_time = end_time - start_time
    epoch_times_list.append((epoch + 1, elapsed_time))

    torch.save(resnet50.state_dict(), "/content/gdrive/MyDrive/Project_II/model_logging/resnet50_cifar10_model.pth")

ResNet-50
--------------------------------------------------
Epoch 1:
Test Loss: 0.089, Accuracy: 97.07%
Epoch 1:
Training Loss: 0.125, Accuracy: 96.03%
ResNet-50
--------------------------------------------------
Epoch 2:
Test Loss: 0.074, Accuracy: 97.53%
Epoch 2:
Training Loss: 0.112, Accuracy: 96.53%
ResNet-50
--------------------------------------------------
Epoch 3:
Test Loss: 0.078, Accuracy: 97.46%
Epoch 3:
Training Loss: 0.094, Accuracy: 96.98%
ResNet-50
--------------------------------------------------
Epoch 4:
Test Loss: 0.060, Accuracy: 98.04%
Epoch 4:
Training Loss: 0.083, Accuracy: 97.36%
ResNet-50
--------------------------------------------------
Epoch 5:
Test Loss: 0.056, Accuracy: 98.28%
Epoch 5:
Training Loss: 0.075, Accuracy: 97.61%
ResNet-50
--------------------------------------------------
Epoch 6:
Test Loss: 0.042, Accuracy: 98.50%
Epoch 6:
Training Loss: 0.069, Accuracy: 97.83%
ResNet-50
--------------------------------------------------
Epoch 7:
Test Loss: 0