In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import gc
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("ggplot")

from datetime import datetime
from tqdm.notebook import tqdm

from PIL import Image
from sklearn import model_selection, metrics
from sklearn.metrics import classification_report, accuracy_score
from torch.utils.data import DataLoader, Dataset

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms


In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


seed_everything(42)

In [3]:
# general global variables
DATA_PATH = "data/"
TRAIN_PATH = "data/train_images"
TEST_PATH = "data/test_images/"
BEST_MODEL = "ResNet16.pth"
SUBMISSION_FILE = "submission.csv"

In [4]:
# model specific global variables
IMG_SIZE = 224
BATCH_SIZE = 16
LR = 2e-05
N_EPOCHS = 15

In [5]:
class CassavaDataset(Dataset):
    """
    Helper Class to create the pytorch dataset
    """

    def __init__(self, df, data_path=DATA_PATH, mode="train", transforms=None):
        super().__init__()
        self.df_data = df.values
        self.data_path = data_path
        self.transforms = transforms
        self.mode = mode
        self.data_dir = "train_images" if mode == "train" else "test_images"

    def __len__(self):
        return len(self.df_data)

    def __getitem__(self, index):
        img_name, label = self.df_data[index]
        img_path = os.path.join(self.data_path, self.data_dir, img_name)
        img = Image.open(img_path).convert("RGB")

        if self.transforms is not None:
            image = self.transforms(img)

        return image, label


# create image augmentations
transforms_train = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

transforms_valid = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

from torch.utils.tensorboard import SummaryWriter
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ResNet16(nn.Module):
    def __init__(self, num_classes=5):
        super(ResNet16, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(64, 2)
        self.layer2 = self._make_layer(128, 2, stride=2)
        self.layer3 = self._make_layer(256, 2, stride=2)
        self.layer4 = self._make_layer(512, 2, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )

        layers = []
        layers.append(BasicBlock(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(BasicBlock(out_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def train_one_epoch(self, train_loader, criterion, optimizer, device, writer, epoch):
        self.train()
        epoch_loss = 0.0
        epoch_accuracy = 0.0

        for i, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = self.forward(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            accuracy = (output.argmax(dim=1) == target).float().mean().item()
            epoch_loss += loss.item()
            epoch_accuracy += accuracy

            if i % 20 == 0:
                print(f"BATCH {i+1}/{len(train_loader)} - LOSS: {loss.item():.4f} - ACCURACY: {accuracy:.4f}")
                writer.add_scalar('Training Loss', loss.item(), epoch * len(train_loader) + i)
                writer.add_scalar('Training Accuracy', accuracy, epoch * len(train_loader) + i)

        return epoch_loss / len(train_loader), epoch_accuracy / len(train_loader)

    def valid_one_epoch(self, valid_loader, criterion, device, writer, epoch):
        self.eval()
        valid_loss = 0.0
        valid_accuracy = 0.0

        with torch.no_grad():
            for data, target in valid_loader:
                data, target = data.to(device), target.to(device)

                output = self.forward(data)
                loss = criterion(output, target)
                accuracy = (output.argmax(dim=1) == target).float().mean().item()

                valid_loss += loss.item()
                valid_accuracy += accuracy

            writer.add_scalar('Validation Loss', valid_loss / len(valid_loader), epoch)
            writer.add_scalar('Validation Accuracy', valid_accuracy / len(valid_loader), epoch)

        return valid_loss / len(valid_loader), valid_accuracy / len(valid_loader)

In [6]:
def fit_gpu(model, epochs, device, criterion, optimizer, train_loader, valid_loader=None):
    writer = SummaryWriter()
    valid_loss_min = np.Inf
    train_losses = []
    valid_losses = []
    train_accs = []
    valid_accs = []

    for epoch in range(1, epochs+1):
        gc.collect()
        print(f"{'='*50}")
        print(f"EPOCH {epoch} - TRAINING...")

        train_loss, train_acc = model.train_one_epoch(train_loader, criterion, optimizer, device, writer, epoch)
        print(f"\n\t[TRAIN] EPOCH {epoch} - LOSS: {train_loss}, ACCURACY: {train_acc}\n")
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        gc.collect()

        if valid_loader is not None:
            gc.collect()
            print(f"EPOCH {epoch} - VALIDATING...")
            valid_loss, valid_acc = model.valid_one_epoch(valid_loader, criterion, device, writer, epoch)
            print(f"\t[VALID] LOSS: {valid_loss}, ACCURACY: {valid_acc}\n")
            valid_losses.append(valid_loss)
            valid_accs.append(valid_acc)
            gc.collect()

            if valid_loss <= valid_loss_min and epoch != 1:
                print(f"Validation loss decreased ({valid_loss_min:.4f} --> {valid_loss:.4f}). Saving model...")
                torch.save(model.state_dict(), BEST_MODEL)
                valid_loss_min = valid_loss

    writer.close()
    return {
        "train_loss": train_losses,
        "valid_losses": valid_losses,
        "train_acc": train_accs,
        "valid_accs": valid_accs,
    }

def run():
    df = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
    train_df, test_df = model_selection.train_test_split(df, test_size=0.1, random_state=42, shuffle=True, stratify=df.label.values)
    train_df, valid_df = model_selection.train_test_split(train_df, test_size=0.2, random_state=42, shuffle=True, stratify=train_df.label.values)

    train_dataset = CassavaDataset(train_df, transforms=transforms_train)
    valid_dataset = CassavaDataset(valid_df, transforms=transforms_valid)
    test_dataset = CassavaDataset(test_df, transforms=transforms_valid)

    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, drop_last=True, num_workers=4)
    valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, drop_last=True, num_workers=4)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, drop_last=True, num_workers=4)

    criterion = nn.CrossEntropyLoss()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    lr = LR
    model = ResNet16(num_classes=5)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    start_time = datetime.now()
    logs = fit_gpu(model=model, epochs=N_EPOCHS, device=device, criterion=criterion, optimizer=optimizer, train_loader=train_loader, valid_loader=valid_loader)
    print(f"Execution time: {datetime.now() - start_time}")
    torch.save(model.state_dict(), f'end_model.pth')

    best_model = ResNet16(num_classes=5)
    best_model.load_state_dict(torch.load(BEST_MODEL))
    best_model = best_model.to(device)
    best_model.eval()

    test_labels = []
    test_preds = []
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = best_model(images)
            _, preds = torch.max(outputs, 1)
            test_labels.extend(labels.cpu().numpy())
            test_preds.extend(preds.cpu().numpy())

    print("Classification Report:")
    print(classification_report(test_labels, test_preds, target_names=[str(i) for i in range(5)]))
    print(f"Test Accuracy: {accuracy_score(test_labels, test_preds)}")

In [6]:
torch.set_default_tensor_type("torch.FloatTensor")
a = run()

EPOCH 1 - TRAINING...
BATCH 1/962 - LOSS: 1.8639 - ACCURACY: 0.3125
BATCH 21/962 - LOSS: 1.4886 - ACCURACY: 0.3125
BATCH 41/962 - LOSS: 1.2624 - ACCURACY: 0.6250
BATCH 61/962 - LOSS: 1.2136 - ACCURACY: 0.5000
BATCH 81/962 - LOSS: 1.5758 - ACCURACY: 0.4375
BATCH 101/962 - LOSS: 1.1546 - ACCURACY: 0.6875
BATCH 121/962 - LOSS: 1.0939 - ACCURACY: 0.6250
BATCH 141/962 - LOSS: 0.8509 - ACCURACY: 0.7500
BATCH 161/962 - LOSS: 1.0582 - ACCURACY: 0.5625
BATCH 181/962 - LOSS: 1.3373 - ACCURACY: 0.4375
BATCH 201/962 - LOSS: 1.2647 - ACCURACY: 0.5000
BATCH 221/962 - LOSS: 0.9606 - ACCURACY: 0.7500
BATCH 241/962 - LOSS: 0.9626 - ACCURACY: 0.6250
BATCH 261/962 - LOSS: 1.1919 - ACCURACY: 0.5000
BATCH 281/962 - LOSS: 1.0621 - ACCURACY: 0.5625
BATCH 301/962 - LOSS: 1.3797 - ACCURACY: 0.5000
BATCH 321/962 - LOSS: 0.9363 - ACCURACY: 0.6875
BATCH 341/962 - LOSS: 1.1717 - ACCURACY: 0.5625
BATCH 361/962 - LOSS: 1.2280 - ACCURACY: 0.5625
BATCH 381/962 - LOSS: 0.8493 - ACCURACY: 0.6250
BATCH 401/962 - LOSS: 0.

In [7]:
import os
import gc
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from sklearn import model_selection
from sklearn.metrics import classification_report
import torchvision.transforms as transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


seed_everything(42)

def generate_classification_report(model, valid_loader, device):
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for data, target in valid_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(target.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    # 生成分类报告
    report = classification_report(all_labels, all_preds, target_names=[str(i) for i in range(5)])
    print("Classification Report on Validation Set:")
    print(report)

# 加载最好的模型
best_model = ResNet16(num_classes=5)
best_model.load_state_dict(torch.load('ResNet16.pth'))
best_model = best_model.to(device)

# 验证集的数据加载器
df = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
train_df, test_df = model_selection.train_test_split(df, test_size=0.1, random_state=42, shuffle=True, stratify=df.label.values)
train_df, valid_df = model_selection.train_test_split(train_df, test_size=0.2, random_state=42, shuffle=True, stratify=train_df.label.values)

valid_dataset = CassavaDataset(valid_df, transforms=transforms_valid)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, drop_last=True, num_workers=4)

# 生成分类报告
generate_classification_report(best_model, valid_loader, torch.device("cuda" if torch.cuda.is_available() else "cpu"))

Classification Report on Validation Set:
              precision    recall  f1-score   support

           0       0.38      0.47      0.42       194
           1       0.55      0.60      0.57       392
           2       0.73      0.31      0.44       428
           3       0.91      0.89      0.90      2364
           4       0.44      0.65      0.52       462

    accuracy                           0.75      3840
   macro avg       0.60      0.58      0.57      3840
weighted avg       0.77      0.75      0.75      3840

