## Imports

In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset
import torchvision
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

## Dataset

In [None]:
class ImagesDataset(Dataset):
    """Reads in an image, transforms pixel values, and serves
    a dictionary containing the image id, image tensors, and label.
    """

    def __init__(self, x_df: pd.DataFrame, y_df: pd.DataFrame | None = None, transforms=None):
        self.data = x_df
        self.label = y_df
        if transforms is not None:
            self.transforms = transforms

    def __getitem__(self, index):
        image = Image.open(self.data.iloc[index]["filepath"]).convert("RGB")
        if self.transforms:
            image = self.transforms(image)
        image_id = self.data.index[index]
        sample = {"image_id": image_id, "image": image}

        if self.label is not None:
            label = torch.tensor(self.label.iloc[index].values, dtype=torch.float)
            sample |= {"label": label}

        return sample

    def __len__(self):
        return len(self.data)


In [None]:
# Load Training Data CSV:
import pathlib
from typing import Tuple


def load_training_data(features_csv: str, labels_csv: str, images_dir: str,
                       ) -> Tuple[pd.DataFrame, pd.DataFrame]:

    features = pd.read_csv(features_csv, index_col="id")
    features['filepath'] = features['filepath'].apply(lambda path: pathlib.Path(images_dir) / str(path))
    train_labels = pd.read_csv(labels_csv, index_col="id")

    y = train_labels
    x = features.loc[y.index].filepath.to_frame()

    return x, y


x, y = load_training_data(features_csv, labels_csv, images_dir)
x_train, x_eval, y_train, y_eval = train_test_split(x, y, stratify=y, test_size=0.25, random_state=42)
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
