In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import gc
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("ggplot")

from datetime import datetime
from tqdm.notebook import tqdm

from PIL import Image
from sklearn import model_selection, metrics
from sklearn.metrics import classification_report, accuracy_score
from torch.utils.data import DataLoader, Dataset

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


seed_everything(42)

In [3]:
# general global variables
DATA_PATH = "data/"
TRAIN_PATH = "data/train_images"
TEST_PATH = "data/test_images/"
BEST_MODEL = "weights/Vgg16.pth"
SUBMISSION_FILE = "submission.csv"

In [4]:
# model specific global variables
IMG_SIZE = 224
BATCH_SIZE = 32
LR = 2e-05
N_EPOCHS = 15

In [5]:
class CassavaDataset(Dataset):
    """
    Helper Class to create the pytorch dataset
    """

    def __init__(self, df, data_path=DATA_PATH, mode="train", transforms=None):
        super().__init__()
        self.df_data = df.values
        self.data_path = data_path
        self.transforms = transforms
        self.mode = mode
        self.data_dir = "train_images" if mode == "train" else "test_images"

    def __len__(self):
        return len(self.df_data)

    def __getitem__(self, index):
        img_name, label = self.df_data[index]
        img_path = os.path.join(self.data_path, self.data_dir, img_name)
        img = Image.open(img_path).convert("RGB")

        if self.transforms is not None:
            image = self.transforms(img)

        return image, label

In [6]:
# create image augmentations
transforms_train = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

transforms_valid = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

In [7]:
from torch.utils.tensorboard import SummaryWriter

class VGG16(nn.Module):
    def __init__(self, n_classes):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, n_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    def train_one_epoch(self, train_loader, criterion, optimizer, device, writer, epoch):
        self.train()
        epoch_loss = 0.0
        epoch_accuracy = 0.0

        for i, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = self.forward(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            accuracy = (output.argmax(dim=1) == target).float().mean().item()
            epoch_loss += loss.item()
            epoch_accuracy += accuracy

            if i % 20 == 0:
                print(f"BATCH {i+1}/{len(train_loader)} - LOSS: {loss.item():.4f} - ACCURACY: {accuracy:.4f}")
                writer.add_scalar('Training Loss', loss.item(), epoch * len(train_loader) + i)
                writer.add_scalar('Training Accuracy', accuracy, epoch * len(train_loader) + i)

        return epoch_loss / len(train_loader), epoch_accuracy / len(train_loader)

    def valid_one_epoch(self, valid_loader, criterion, device, writer, epoch):
        self.eval()
        valid_loss = 0.0
        valid_accuracy = 0.0

        with torch.no_grad():
            for data, target in valid_loader:
                data, target = data.to(device), target.to(device)

                output = self.forward(data)
                loss = criterion(output, target)
                accuracy = (output.argmax(dim=1) == target).float().mean().item()

                valid_loss += loss.item()
                valid_accuracy += accuracy

            writer.add_scalar('Validation Loss', valid_loss / len(valid_loader), epoch)
            writer.add_scalar('Validation Accuracy', valid_accuracy / len(valid_loader), epoch)

        return valid_loss / len(valid_loader), valid_accuracy / len(valid_loader)

In [8]:
def fit_gpu(model, epochs, device, criterion, optimizer, train_loader, valid_loader=None):
    writer = SummaryWriter()
    valid_loss_min = np.Inf
    train_losses = []
    valid_losses = []
    train_accs = []
    valid_accs = []

    for epoch in range(1, epochs+1):
        gc.collect()
        print(f"{'='*50}")
        print(f"EPOCH {epoch} - TRAINING...")

        train_loss, train_acc = model.train_one_epoch(train_loader, criterion, optimizer, device, writer, epoch)
        print(f"\n\t[TRAIN] EPOCH {epoch} - LOSS: {train_loss}, ACCURACY: {train_acc}\n")
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        gc.collect()

        if valid_loader is not None:
            gc.collect()
            print(f"EPOCH {epoch} - VALIDATING...")
            valid_loss, valid_acc = model.valid_one_epoch(valid_loader, criterion, device, writer, epoch)
            print(f"\t[VALID] LOSS: {valid_loss}, ACCURACY: {valid_acc}\n")
            valid_losses.append(valid_loss)
            valid_accs.append(valid_acc)
            gc.collect()

            if valid_loss <= valid_loss_min and epoch != 1:
                print(f"Validation loss decreased ({valid_loss_min:.4f} --> {valid_loss:.4f}). Saving model...")
                torch.save(model.state_dict(), BEST_MODEL)
                valid_loss_min = valid_loss

    writer.close()
    return {
        "train_loss": train_losses,
        "valid_losses": valid_losses,
        "train_acc": train_accs,
        "valid_accs": valid_accs,
    }

In [9]:
def run():
    df = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
    train_df, test_df = model_selection.train_test_split(df, test_size=0.1, random_state=42, shuffle=True, stratify=df.label.values)
    train_df, valid_df = model_selection.train_test_split(train_df, test_size=0.2, random_state=42, shuffle=True, stratify=train_df.label.values)

    train_dataset = CassavaDataset(train_df, transforms=transforms_train)
    valid_dataset = CassavaDataset(valid_df, transforms=transforms_valid)
    test_dataset = CassavaDataset(test_df, transforms=transforms_valid)

    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, drop_last=True, num_workers=4)
    valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, drop_last=True, num_workers=4)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, drop_last=True, num_workers=4)

    criterion = nn.CrossEntropyLoss()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    lr = LR
    model = VGG16(n_classes=5)
    model = model.cuda()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Training
    start_time = datetime.now()
    logs = fit_gpu(model=model, epochs=N_EPOCHS, device=device, criterion=criterion, optimizer=optimizer, train_loader=train_loader, valid_loader=valid_loader)
    print(f"Execution time: {datetime.now() - start_time}")

    # Load the best model and evaluate on test set
    best_model = VGG16(n_classes=5)
    best_model.load_state_dict(torch.load(BEST_MODEL))
    best_model = best_model.to(device)
    best_model.eval()

    test_labels = []
    test_preds = []
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = best_model(images)
            _, preds = torch.max(outputs, 1)
            test_labels.extend(labels.cpu().numpy())
            test_preds.extend(preds.cpu().numpy())

    print("Classification Report:")
    print(classification_report(test_labels, test_preds, target_names=[str(i) for i in range(5)]))
    print(f"Test Accuracy: {accuracy_score(test_labels, test_preds)}")

In [10]:
torch.set_default_tensor_type("torch.FloatTensor")
a = run()

EPOCH 1 - TRAINING...
BATCH 1/481 - LOSS: 1.6051 - ACCURACY: 0.4688
BATCH 21/481 - LOSS: 1.3051 - ACCURACY: 0.5938
BATCH 41/481 - LOSS: 1.1499 - ACCURACY: 0.6250
BATCH 61/481 - LOSS: 1.2480 - ACCURACY: 0.5938
BATCH 81/481 - LOSS: 1.2649 - ACCURACY: 0.5938
BATCH 101/481 - LOSS: 1.4714 - ACCURACY: 0.4688
BATCH 121/481 - LOSS: 1.1028 - ACCURACY: 0.6562
BATCH 141/481 - LOSS: 1.3806 - ACCURACY: 0.4688
BATCH 161/481 - LOSS: 1.0701 - ACCURACY: 0.6562
BATCH 181/481 - LOSS: 1.5020 - ACCURACY: 0.4062
BATCH 201/481 - LOSS: 1.0102 - ACCURACY: 0.6875
BATCH 221/481 - LOSS: 1.1543 - ACCURACY: 0.6250
BATCH 241/481 - LOSS: 0.9295 - ACCURACY: 0.7188
BATCH 261/481 - LOSS: 1.5832 - ACCURACY: 0.4062
BATCH 281/481 - LOSS: 1.4400 - ACCURACY: 0.4688
BATCH 301/481 - LOSS: 0.9472 - ACCURACY: 0.6875
BATCH 321/481 - LOSS: 1.2144 - ACCURACY: 0.5625
BATCH 341/481 - LOSS: 1.4225 - ACCURACY: 0.4688
BATCH 361/481 - LOSS: 1.1631 - ACCURACY: 0.5938
BATCH 381/481 - LOSS: 1.0681 - ACCURACY: 0.6562
BATCH 401/481 - LOSS: 1.