In [66]:
!nvidia-smi
!pip install prefetch_generator
!pip install timm
!pip install albumentations

Sun Apr 16 01:49:15 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A10G         On   | 00000000:00:1E.0 Off |                    0 |
|  0%   35C    P0    62W / 300W |  18849MiB / 23028MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [110]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import cv2
from torchvision import transforms
from torchvision.transforms import Resize, ToTensor
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import trange
import torchvision
import boto3
import io
import sagemaker
from sagemaker import get_execution_role
import tempfile
import PIL
import time
from datetime import timedelta
from prefetch_generator import BackgroundGenerator
import timm
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [3]:
# Make sure you're using cuda (GPU) by checking the hardware accelerator under Runtime -> Change runtime type
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("We're using:", device)

We're using: cuda


# Download dataset from AWS S3

In [4]:
#!pip install cloudpathlib[s3,gs,azure]

In [5]:
#from cloudpathlib import CloudPath

#Download all the files from AWS S3
#Run this cell once if working in a new sagemaker instance
#This will take about 45 minutes to finish
#Use ls -l | grep "^-" | wc -l in terminal to check the number of files

#cp = CloudPath("s3://cassavaproject")
#cp.download_to("Dataset")

# Preprocessing

In [134]:
class MyDataset(Dataset):

    def __init__(self, transform=None):
        self.image_path = 'Dataset/train_images'
        self.labels = pd.read_csv('Dataset/train.csv')
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img_name = self.image_path + '/' + self.labels.iloc[idx]['image_id']

        # Read the image from the file path
        #image = Image.open(img_name)
        image = cv2.imread(img_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #image = torchvision.io.read_file(img_name)
        #image = torchvision.io.decode_jpeg(image, device="cpu")
        #image = image.float().
        
        # Transform the image using self.transform
        if self.transform:
            image = image = self.transform(image=image)["image"]

        if "label" in self.labels.columns:
            label = self.labels.iloc[idx]['label']
            sample = (image, label)
        else:
            sample = (image)
        return sample

In [135]:
#Used for directly download and read file from AWS S3.
#If running the download cell above, use 'MyDataset' class instead of this one.
class MyDatasetS3(Dataset):

    def __init__(self, transform=None):
        #File path for csv and images
        self.image_path = 'train_images'
        
        #Connect to s3 file
        self.csv_path = 'train.csv'
        self.s3_client = boto3.resource('s3')
        self.bucket = self.s3_client.Bucket('cassavaproject')
        
        s3 = boto3.client('s3')
        obj = s3.get_object(Bucket = 'cassavaproject',Key = 'train.csv')

        self.labels = pd.read_csv(obj['Body'])
        
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img_name = self.image_path + '/' + self.labels.iloc[idx]['image_id']
        
        # we can download the file from S3 to a temporary file locally, then store that opened file as our image variable.
        # we need to create the local file name
        obj = self.bucket.Object(img_name)
        tmp = tempfile.NamedTemporaryFile()
        tmp_name = '{}.jpg'.format(tmp.name)

        # now we can actually download from S3 to a local place
        with open(tmp_name, 'wb') as f:
            obj.download_fileobj(f)
            f.flush()
            f.close()
            
            image = torchvision.io.read_file(tmp_name)
            image = torchvision.io.decode_jpeg(image, device="cpu")
            image = image.float()
            
        # Transform the image using self.transform
        if self.transform:
            image = self.transform(image=image)["image"]

        if "label" in self.labels.columns:
            label = self.labels.iloc[idx]['label']
            sample = (image, label)
        else:
            sample = (image)
        return sample

In [136]:
# Credit: https://www.kaggle.com/code/aliabdin1/calculate-mean-std-of-images/notebook
mean = np.array([0.42984136, 0.49624753, 0.3129598 ])
std = np.array([0.21417203, 0.21910103, 0.19542212])

In [137]:
train_transform = torch.nn.Sequential(transforms.Resize((256,256),antialias=True), transforms.Normalize(mean=mean,
    std=std))

In [138]:
albu_transform = A.Compose(
        [A.CenterCrop(height=256, width=256), 
         A.RandomBrightnessContrast(p=0.5), 
         A.Normalize(mean=mean, std=std),
         ToTensorV2()
        ])

In [139]:
dataset = MyDataset(transform=albu_transform)
train_data, test_data = torch.utils.data.random_split(dataset, [15000, 6397])

# Define our model

In [19]:
timm.list_models('*vgg*')

['repvgg_a2',
 'repvgg_b0',
 'repvgg_b1',
 'repvgg_b1g4',
 'repvgg_b2',
 'repvgg_b2g4',
 'repvgg_b3',
 'repvgg_b3g4',
 'vgg11',
 'vgg11_bn',
 'vgg13',
 'vgg13_bn',
 'vgg16',
 'vgg16_bn',
 'vgg19',
 'vgg19_bn']

In [31]:
test_model = timm.create_model('vgg16', pretrained=True)
test_model.default_cfg

{'url': 'https://download.pytorch.org/models/vgg16-397923af.pth',
 'num_classes': 1000,
 'input_size': (3, 224, 224),
 'pool_size': (7, 7),
 'crop_pct': 0.875,
 'interpolation': 'bilinear',
 'mean': (0.485, 0.456, 0.406),
 'std': (0.229, 0.224, 0.225),
 'first_conv': 'features.0',
 'classifier': 'head.fc',
 'architecture': 'vgg16'}

In [58]:
class OurModel(nn.Module):
  def __init__(self, num_classes):
    super(OurModel, self).__init__()
    self.model = timm.create_model('vgg19', pretrained=True)#torchvision.models.resnet34(weights='IMAGENET1K_V1')
    self.model.head.fc = nn.Linear(self.model.head.fc.in_features, num_classes)
    self.softmax = nn.Softmax()

  def forward(self, input):
    out = self.model(input)
    softmax = self.softmax(out)
    return softmax

# Train our model

In [59]:
model = OurModel(5).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-6, amsgrad=False) #torch.optim.Adam(model.parameters(), lr=5*1e-3)


Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /home/ec2-user/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth


In [60]:
class DataLoaderX(DataLoader):

    def __iter__(self):
        return BackgroundGenerator(super().__iter__())

In [61]:
train_loader = DataLoaderX(dataset=train_data, batch_size = 128, shuffle= True, num_workers=2, pin_memory=True)
test_loader = DataLoaderX(dataset=test_data, batch_size = 128, shuffle=False, num_workers=2, pin_memory=True)

In [62]:
print('Start fine-tuning...')

Start fine-tuning...


In [63]:
def test_model(model, test_loader):
    #model = torch.compile(model)
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            outputs = model(images)
            
            _,prediction = torch.max(outputs.data, 1)
            correct += (prediction == labels).sum().item()
            total += labels.size(0)
        model.train()
        return 100 * correct / total

In [64]:
def get_run_time(start_time):
    end_time = time.time()
    runtime = end_time - start_time
    return runtime

In [65]:
best_acc = 0.
best_epoch = None
end_patient = 0
num_epochs = 25
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=3, verbose=True)

save_model_path = ''
train_loss = []
train_accuracy = []
test_accuracy = []

start_time = time.time()

#model = torch.compile(model)

for epoch in range(num_epochs):
    correct = 0
    total = 0
    epoch_loss = 0.
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss
        _, prediction = torch.max(outputs.data, 1)
        correct += (prediction == labels).sum().item()
        total += labels.size(0)
        
        time_diff = get_run_time(start_time)

        print('Epoch [{}/{}]: Iter {}, Loss {:.4f}, Runtime {:.0f}m {:.0f}s'.format(epoch + 1, num_epochs, i + 1, loss, time_diff//60, time_diff%60))
        train_loss.append(loss)
    
    train_acc = 100 * correct / total
    print('Testing on test dataset...')
    test_acc = test_model(model, test_loader)
    print('Epoch [{}/{}] Loss: {:.4f} Train_Acc: {:.4f}  Test_Acc: {:.4f}'
          .format(epoch + 1, num_epochs, epoch_loss, train_acc, test_acc))
    scheduler.step(test_acc)
    train_accuracy.append(train_acc)
    test_accuracy.append(test_acc)
    if test_acc > best_acc:
        best_acc = test_acc
        best_epoch = epoch + 1
        print('The accuracy is improved, save model')
        torch.save(model.state_dict(), os.path.join(
                                                    'resnet34_tuning_epoch_%d_acc_%g.pth' %
                                                    (best_epoch, best_acc)))

print('After the training, the end of the epoch %d, the accuracy %g is the highest' % (best_epoch, best_acc))

  softmax = self.softmax(out)


Epoch [1/25]: Iter 1, Loss 1.8623, Runtime 0m 3s
Epoch [1/25]: Iter 2, Loss 1.3345, Runtime 0m 4s
Epoch [1/25]: Iter 3, Loss 1.2330, Runtime 0m 5s
Epoch [1/25]: Iter 4, Loss 1.2798, Runtime 0m 6s
Epoch [1/25]: Iter 5, Loss 1.2876, Runtime 0m 7s
Epoch [1/25]: Iter 6, Loss 1.2798, Runtime 0m 8s
Epoch [1/25]: Iter 7, Loss 1.2642, Runtime 0m 9s
Epoch [1/25]: Iter 8, Loss 1.3189, Runtime 0m 11s
Epoch [1/25]: Iter 9, Loss 1.2955, Runtime 0m 12s
Epoch [1/25]: Iter 10, Loss 1.3189, Runtime 0m 13s
Epoch [1/25]: Iter 11, Loss 1.3345, Runtime 0m 14s
Epoch [1/25]: Iter 12, Loss 1.3111, Runtime 0m 15s
Epoch [1/25]: Iter 13, Loss 1.2798, Runtime 0m 16s
Epoch [1/25]: Iter 14, Loss 1.3267, Runtime 0m 17s
Epoch [1/25]: Iter 15, Loss 1.3189, Runtime 0m 18s
Epoch [1/25]: Iter 16, Loss 1.2955, Runtime 0m 20s
Epoch [1/25]: Iter 17, Loss 1.3580, Runtime 0m 21s
Epoch [1/25]: Iter 18, Loss 1.2798, Runtime 0m 22s
Epoch [1/25]: Iter 19, Loss 1.3501, Runtime 0m 23s
Epoch [1/25]: Iter 20, Loss 1.2876, Runtime 0m 

KeyboardInterrupt: 

# Result Analysis

In [None]:
import matplotlib.pyplot as plt
import itertools

In [None]:
plt.plot(np.arange(1,51),train_accuracy,label='Train Accuracy')
plt.plot(np.arange(1,51),test_accuracy,label='Test Accuracy')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy Versus Epochs')
plt.show()

In [None]:
model = OurModel(5).to(device)
model.load_state_dict(torch.load('resnet34_tuning_epoch_29_acc_83.689.pth'))


In [None]:
nb_classes = 5

confusion_matrix = torch.zeros(nb_classes, nb_classes)
model.eval()
with torch.no_grad():
    for i, (inputs, classes) in enumerate(test_loader):
        inputs = inputs.to(device)
        classes = classes.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        for t, p in zip(classes.view(-1), preds.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(confusion_matrix)

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm/ np.sum(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2%' #if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [None]:
plt.figure()
num_classes=5
M = confusion_matrix.numpy()
plot_confusion_matrix(M, classes=np.arange(num_classes), normalize=True)
print("Accuracy:",np.trace(M/np.sum(M)))