In [None]:
from matplotlib import pyplot as plt
from huggingface_hub import hf_hub_download
from datasets import load_dataset
from sklearn.metrics import f1_score
import pandas as pd
import optuna

import torch
from torch import nn
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.models import resnet152, ResNet152_Weights

In [None]:
ds = load_dataset(
    "mikkoim/aquamonitor-jyu",
    cache_dir="/kaggle/working/"
)

hf_hub_download(
    repo_id="mikkoim/aquamonitor-jyu",
    filename="aquamonitor-jyu.parquet.gzip",
    repo_type="dataset",
    local_dir="/kaggle/working/"
)

In [None]:
# dataset elements can be accessed with indices. Each "row" or record
# has an image and a key that can be used to access data from the metadata table
record = ds["train"][21015]
print(record, "\n")

img = record["jpg"]
print(record["__key__"])
img

In [None]:
# The keys match the rows in the metadata table
metadata = pd.read_parquet("/kaggle/working/aquamonitor-jyu.parquet.gzip")
metadata

In [None]:
classes = sorted(metadata["taxon_group"].unique())
class_map = {k:v for v,k in enumerate(classes)}
class_map_inv = {v:k for k,v in class_map.items()}

metadata["img"] = metadata["img"].str.removesuffix(".jpg")
label_dict = dict(zip(metadata["img"], metadata["taxon_group"].map(class_map)))

class_map_inv

In [None]:
IMAGE_SIZE = 224
BATCH_SIZE = 32
EPOCH_NUM = 7

In [None]:
tf = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    # transforms.RandomHorizontalFlip(p=0.75),
    # transforms.RandomVerticalFlip(p=0.75),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

def preprocess(batch):
    return {"key": batch["__key__"],
            "img": [tf(x) for x in batch["jpg"]],
            "label": torch.as_tensor([label_dict[x] for x in batch["__key__"]], dtype=torch.long)}

train_ds = ds["train"].with_transform(preprocess)
devel_ds = ds["validation"].with_transform(preprocess)

print(f"Train Size: {train_ds.num_rows}")
print(f"Devel Size: {devel_ds.num_rows}")

# plt.imshow(train_ds[2014]["img"].permute(1, 2, 0).numpy())

In [None]:
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True
)

devel_loader = DataLoader(
    devel_ds,
    batch_size=BATCH_SIZE
)

In [None]:
model = resnet152(weights=ResNet152_Weights.DEFAULT)

model.fc = nn.Linear(
    in_features=model.fc.in_features,
    out_features=len(classes)
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.5,
    patience=5
)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Initialise objects to store results
train_losses = []
train_f1s = []
devel_losses = []
devel_f1s = []
best_loss = float("inf")
best_model_weights = None

for epoch in range(EPOCH_NUM):

    model.train()
    running_loss = 0.0
    train_labels = []
    train_preds = []
    
    for batch in train_loader:
        images, labels = batch["img"], batch["label"]
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Store predictions
        _, preds = torch.max(outputs.data, 1)
        train_labels.extend(labels.cpu().numpy())
        train_preds.extend(preds.cpu().numpy())
    
    # Calculate and store training loss and f1 score
    train_loss = running_loss / len(train_loader)
    train_f1 = f1_score(train_labels, train_preds, average="weighted")
    train_losses.append(train_loss)
    train_f1s.append(train_f1)

    # Validation phase
    model.eval()
    devel_running_loss = 0.0
    devel_labels = []
    devel_preds = []
    
    with torch.no_grad():
        for batch in devel_loader:
            images, labels = batch["img"], batch["label"]
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            devel_running_loss += loss.item()
            
            # Store predictions
            _, preds = torch.max(outputs.data, 1)
            devel_labels.extend(labels.cpu().numpy())
            devel_preds.extend(preds.cpu().numpy())
    
    # Calculate and store validation loss and f1 score
    devel_loss = devel_running_loss / len(devel_loader)
    devel_f1 = f1_score(devel_labels, devel_preds, average="weighted")
    devel_losses.append(devel_loss)
    devel_f1s.append(devel_f1)

    # Step the scheduler
    scheduler.step(devel_loss)

    print(f"Epoch [{epoch+1}/{EPOCH_NUM}], lr: {scheduler.get_last_lr()[0]}, "
          f"Train Loss: {train_loss:.4f}, Train F1-Score: {train_f1:.3g}, "
          f"Devel Loss: {devel_loss:.4f}, Devel F1-Score: {devel_f1:.3g}")

    # Check if we have a new best model
    if devel_loss < best_loss:
        best_loss = devel_loss
        best_model_weights = model.state_dict()

In [None]:
# Restore best weights
if best_model_weights is not None:

    # Load best model
    model.load_state_dict(best_model_weights)
    # Save best model
    torch.save(best_model_weights, "/kaggle/working/fine_tuned_resnet18.pth")

In [None]:
# Plot the learning curves
plt.figure(figsize=(12, 5))

# Loss plot
plt.subplot(1, 2, 1)
plt.plot(range(1, EPOCH_NUM + 1), train_losses, label="Train Set")
plt.plot(range(1, EPOCH_NUM + 1), devel_losses, label="Devel Set")
plt.title("Loss Curve")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# F1-Score plot
plt.subplot(1, 2, 2)
plt.plot(range(1, EPOCH_NUM + 1), train_f1s, label="Train Set")
plt.plot(range(1, EPOCH_NUM + 1), devel_f1s, label="Devel Set")
plt.title("F1-Score Curve")
plt.xlabel("Epochs")
plt.ylabel("F1-Score")
plt.legend()

plt.tight_layout()