<a href="https://colab.research.google.com/github/SergeyXiong/final-projects/blob/main/Ex5_2_XiongSuifu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np

In [3]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [4]:
# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                    download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                    shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                    download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                    shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 13081252.68it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [5]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False   

In [8]:
import torchvision.models as models
import torch.nn as nn
feature_extract = True
model_ft = models.resnet18(pretrained = True)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 10), nn.LogSoftmax(dim=1))
input_size= 224



In [9]:
model_ft = model_ft.to(device)

In [10]:
model_ft

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
def unfreez_last10(model):
    layers = []
    for layer, param in model_ft.named_parameters():
        layers.append(layer)
    print(layers)
    last_10_layers = layers[-10:]
    print(last_10_layers)
    for layer, param in model_ft.named_parameters():
        if layer not in last_10_layers:
            param.requires_grad = False
unfreez_last10(model_ft)

['conv1.weight', 'bn1.weight', 'bn1.bias', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer2.0.conv1.weight', 'layer2.0.bn1.weight', 'layer2.0.bn1.bias', 'layer2.0.conv2.weight', 'layer2.0.bn2.weight', 'layer2.0.bn2.bias', 'layer2.0.downsample.0.weight', 'layer2.0.downsample.1.weight', 'layer2.0.downsample.1.bias', 'layer2.1.conv1.weight', 'layer2.1.bn1.weight', 'layer2.1.bn1.bias', 'layer2.1.conv2.weight', 'layer2.1.bn2.weight', 'layer2.1.bn2.bias', 'layer3.0.conv1.weight', 'layer3.0.bn1.weight', 'layer3.0.bn1.bias', 'layer3.0.conv2.weight', 'layer3.0.bn2.weight', 'layer3.0.bn2.bias', 'layer3.0.downsample.0.weight', 'layer3.0.downsample.1.weight', 'layer3.0.downsample.1.bias', 'layer3.1.conv1.weight', 'layer3.1.bn1.weight', 'layer3.1.bn1.bias', 'layer3.1.

In [12]:
import torch.optim as optim
optimizer = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.5)
criterion = nn.NLLLoss()

In [14]:
from datetime import datetime

def get_acc(output, label):
    """Calculation accuracy"""
    total = output.shape[0]
    _, pred_label = output.max(1)
    num_correct = (pred_label == label).sum().item()
    return num_correct / total


def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
   # """Model training"""
    prev_time = datetime.now()
    
    num_epochs = 5
    train_losses = []
    test_losses = []
    train_correct = []
    test_correct = []


    for epoch in range(num_epochs):
        train_loss = 0
        train_acc = 0
        net = net.train()       # Train the model
        for im, label in train_data:
            im = im.to(device)          # (bs, 3, h, w)
            label = label.to(device)    # (bs, h, w)
            output = net(im)                    # Forward propagation
            loss = criterion(output, label)     # Loss function
            optimizer.zero_grad()               # Gradient zeroing
            loss.backward()                     # Backward propagation
            optimizer.step()                    # Gradient updates

            train_loss += loss.item()
            train_acc += get_acc(output, label)

        # Print run time
        cur_time = datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)

        if valid_data is not None:
            valid_loss = 0
            valid_acc = 0
            net = net.eval()        # Validate the model
            for im, label in valid_data:
                im = im.to(device)          # (bs, 3, h, w)
                label = label.to(device)    # (bs, h, w)
                output = net(im)                        # Forward propagation
                loss = criterion(output, label)         # Loss function
                valid_loss += loss.item()
                valid_acc += get_acc(output, label)
            # For each Epoch, print the result.
            epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, " %
                         (epoch, train_loss / len(train_data), train_acc / len(train_data), valid_loss / len(valid_data), valid_acc / len(valid_data)))
        else:
            epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " %
                         (epoch, train_loss / len(train_data), train_acc / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str)

train(model_ft, trainloader, testloader, 5, optimizer, criterion)

Epoch 0. Train Loss: 0.954436, Train Acc: 0.710006, Valid Loss: 0.720166, Valid Acc: 0.766515, Time 00:01:36
Epoch 1. Train Loss: 0.660959, Train Acc: 0.782469, Valid Loss: 0.628237, Valid Acc: 0.790249, Time 00:01:46
Epoch 2. Train Loss: 0.615082, Train Acc: 0.792571, Valid Loss: 0.608040, Valid Acc: 0.796479, Time 00:01:47
Epoch 3. Train Loss: 0.591876, Train Acc: 0.797630, Valid Loss: 0.599780, Valid Acc: 0.794304, Time 00:01:46
Epoch 4. Train Loss: 0.578225, Train Acc: 0.801954, Valid Loss: 0.584498, Valid Acc: 0.799051, Time 00:01:46
