In [1]:
pip install omegaconf

Collecting omegaconf
  Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting antlr4-python3-runtime==4.9.* (from omegaconf)
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.0/117.0 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: antlr4-python3-runtime
  Building wheel for antlr4-python3-runtime (setup.py) ... [?25ldone
[?25h  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=22c0bfba2867cd0240a98edb16a0c27622d982ab93ccccc4efad19282d0d748d
  Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88
Successfully built antlr4-python3-runtime
Installing collected packages: antlr

# **Important Imports**

In [14]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import torchvision.transforms as transforms
from PIL import Image
from torchvision.transforms import functional as F
from torch.utils.data import Dataset, DataLoader
from omegaconf import OmegaConf
import torch
import torchvision
from torchvision import transforms
import pytorch_lightning as pl
# from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import Trainer
import torch.nn as nn
import torch.optim as optim
from  torchvision.models import efficientnet_b2
from tqdm import tqdm 

# **Configuration File**

In [3]:
%%writefile efficient_model_config.yaml

data:
    labels_path: /kaggle/input/mayo-clinic-strip-ai/train.csv
    processed_imgs_dir: /kaggle/input/mayo-clinic-strip-ai-preprocessed-data/processed_images
    train_split: 0.9
    classes: ['CE','LAA']
    random_seed: 123
    image_size: 512
model:
    checkpoint: /kaggle/working/models/
    max_epochs: 30
    learning_rate: 0.0001
    batch_size: 20
    shuffle: True
    num_workers: 4

Overwriting efficient_model_config.yaml


# **Dataloader**

In [15]:
class EfficientDataset(Dataset):
    """Dataset for loading efficient model data, including 2D images,and labels
    Args:
        cfg (dict): Configuration dictionary containing dataset/model settings.
    """

    def __init__(self, cfg: dict) -> None:
        """Constructor to assign class attributes

        Args:
            cfg (dict): Configuration dictionary containing dataset/model settings.
        """
        super(EfficientDataset, self).__init__()
        self.data_dir: str = cfg.data.processed_imgs_dir
        self.labels_pth = cfg.data.labels_path
        self.data_dir_list: list[str] = [os.path.join(self.data_dir, filename) for filename in os.listdir(self.data_dir)]
        self.class_to_int = {cfg.data.classes[i] : i for i in range(len(cfg.data.classes))}
        self.img_transform = transforms.Compose(
            [
                transforms.Resize([cfg.data.image_size, cfg.data.image_size]),
                transforms.ToTensor(),
            ]
        )
        self.labels = pd.read_csv(cfg.data.labels_path)

  

    def __len__(self) -> int:
        """Method to return dataset length

        Returns:
            int: dataset length
        """
        return len(self.data_dir_list)


    def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
        img_path = self.data_dir_list[index]
        image = Image.open(img_path)
        image= self.img_transform(image)
        label = self.labels.loc[
             self.labels.image_id == os.path.splitext(os.path.basename(self.data_dir_list[index]))[0], "label"
        ].values[0]
        label = self.class_to_int[label]
        return image,label
        


class EfficientDataModule(pl.LightningDataModule):
    """LightningDataModule for the EfficientTrainingDataset.

    Args:
        cfg (dict): Configuration dictionary.
    """

    def __init__(self, cfg: dict) -> None:
        super(EfficientDataModule, self).__init__()
        self.cfg = cfg

    def setup(self, stage=None) -> None:
        """method to setup dataset

        Args:
            stage: The stage for which the data module is being set up.
        """
        dataset = EfficientDataset(self.cfg)
        
        # Get labels and convert them to integers
        labels = dataset.labels["label"].apply(lambda x: dataset.class_to_int[x]).values
        
        # Use StratifiedShuffleSplit to maintain class distribution in train and val sets
        sss = StratifiedShuffleSplit(
            n_splits=1,
            test_size=1 - self.cfg.data.train_split,
            random_state=self.cfg.data.random_seed
        )
        
        train_idx, val_idx = next(sss.split(np.zeros(len(labels)), labels))
        
        self.train_set = torch.utils.data.Subset(dataset, train_idx)
        self.val_set = torch.utils.data.Subset(dataset, val_idx)

    def train_dataloader(self) -> DataLoader:
        """Method to return the training dataloader.

        Returns:
            DataLoader: training dataloader.
        """
        return DataLoader(
            dataset=self.train_set,
            batch_size=self.cfg.model.batch_size,
            shuffle=self.cfg.model.shuffle,
            num_workers=self.cfg.model.num_workers,
        )

    def val_dataloader(self):
        """Method to return the validation dataloader.

        Returns:
            DataLoader: validation dataloader.
        """
        return DataLoader(
            dataset=self.val_set,
            batch_size=self.cfg.model.batch_size,
            shuffle= False,
            num_workers=self.cfg.model.num_workers,
        )




In [9]:
path_train_images = '/kaggle/working/preprocessed-png'
train_ids = next(os.walk(path_train_images))[2]
img_list = os.listdir('/kaggle/working/preprocessed-png')
train_csv_data = pd.read_csv("/kaggle/input/mayo-clinic-strip-ai/train.csv")


In [12]:
from collections import defaultdict
low_sc_images = []
def load_train_df():
    train_df = defaultdict(list)
    img_indx = 0
    print('Loading train images...')
    for i, proc_image_id in tqdm(enumerate(train_ids), total=len(train_ids)):
        fnames = ['0ba49d_0', '0ba49d_0', '006388_0']   
        if any(f in proc_image_id for f in fnames):
            continue
        if(train_csv_data.loc[train_csv_data['image_id'] == proc_image_id[:-8]].empty):
            continue
            
        label = train_csv_data.loc[train_csv_data['image_id'] == proc_image_id[:-8]]['label'].item()
        center_id = train_csv_data.loc[train_csv_data['image_id'] == proc_image_id[:-8]]['center_id'].item()
        
        path_tiles = path_train_images + proc_image_id
        train_df['image_id'].append(proc_image_id)
        train_df['label'].append(label)
        img_indx += 1
                
    return pd.DataFrame(train_df)

In [15]:
train_csv_data = pd.read_csv("/kaggle/input/mayo-clinic-strip-ai/train.csv")
train_df = load_train_df()


Loading train images...


100%|██████████| 4323/4323 [00:06<00:00, 693.07it/s]


In [16]:
IMG_SIZE = 224

In [17]:

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


class BloodClotDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name).convert("RGB")

        label = torch.tensor(self.dataframe.iloc[idx, 1])

        if self.transform:
            image = self.transform(image)

        return image, label
    
class BloodDataModule(pl.LightningDataModule):
    def __init__(self, train_dataframe, image_dir, batch_size=16, seed=19):
        super(BloodDataModule, self).__init__()
        self.train_dataframe = train_dataframe
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.seed = seed
        self.label_mapping = {}

    def setup(self, stage=None):
        train_data, val_data = train_test_split(self.train_dataframe, test_size=0.2, random_state=self.seed)

        self.label_mapping = {label: idx for idx, label in enumerate(train_data['label'].unique())}

        train_data['label'] = train_data['label'].map(self.label_mapping)
        val_data['label'] = val_data['label'].map(self.label_mapping)

        self.train_dataset = BloodClotDataset(train_data, self.image_dir, transform=self.transform("train"))
        self.val_dataset = BloodClotDataset(val_data, self.image_dir, transform=self.transform("val"))

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4)


    def transform(self, split):
        if split == "train":
            return transforms.Compose([
                transforms.RandomAffine(0, scale=(0.8, 1.2), translate=(0.1, 0.1)),
                transforms.RandomRotation(20),
                transforms.ToTensor(),
            ])
        elif split == "val":
            return transforms.Compose([
                transforms.RandomAffine(0, scale=(0.8, 1.2), translate=(0.1, 0.1)),

                transforms.RandomRotation(20),

                transforms.ToTensor(),

            ])

# **Pretrained Effiecient Model**

In [26]:

class EfficientNetSTRIP(pl.LightningModule):
    def __init__(self,cfg: dict):
        super(EfficientNetSTRIP, self).__init__()        
        self.efficientnet = efficientnet_b2(num_classes=1)
        self.save_hyperparameters()
        self.sigmoid = nn.Sigmoid()
        class_weights = torch.tensor([3])  
        self.criterion = nn.BCEWithLogitsLoss()
    
    def forward(self, x):
        return self.sigmoid(self.efficientnet(x))
    
    def training_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = self.criterion(outputs, labels.view(-1, 1).float())
        self.log("train_loss", loss,on_epoch = True,prog_bar = True, on_step = True)
        train_acc = self.calculate_accuracy(labels.view(-1, 1).float(),outputs)
        self.log("train_acc", train_acc,on_epoch = True,prog_bar = True, on_step = True)
        return {"loss":loss, "labels": labels, "outputs": outputs}


    def validation_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = self.criterion(outputs, labels.view(-1, 1).float())
        val_acc = self.calculate_accuracy(labels.view(-1, 1).float(),outputs)
        self.log("val_acc", val_acc,on_epoch = True,prog_bar = True, on_step = True)
        self.log("val_loss", loss,on_epoch = True,prog_bar = True, on_step = True)
        return {"val_loss":loss, "labels": labels, "outputs" :outputs}


    def calculate_accuracy(self, targets, outputs):
        binary_predictions = (outputs > 0.5).float()
        correct_predictions = (binary_predictions == targets).float()        
        accuracy = correct_predictions.mean().item()
        return accuracy

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=0.00001)
        lr_scheduler = {'scheduler': optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=3,min_lr=1e-7),
                'monitor': 'train_loss'}

        return [optimizer], [lr_scheduler]


# **Model Training**

In [None]:
from pytorch_lightning.loggers import WandbLogger
import wandb
cfg = OmegaConf.load("/kaggle/working/efficient_model_config.yaml")
# dataModule = EfficientDataModule(cfg)
data_module = BloodDataModule(train_df, path_train_images, batch_size=16, seed=19)

model = EfficientNetSTRIP(cfg)

wandb_logger = WandbLogger(project="blood-clot-efficientb2",log_model="all")


checkpoint_loss = ModelCheckpoint(
    dirpath=cfg.model.checkpoint,
    monitor="val_loss",
    filename="efficientb2-loss-{val_loss:.2f}",
    save_top_k=1,
    mode="min"
)

checkpoint_acc = ModelCheckpoint(
    dirpath=cfg.model.checkpoint,
    monitor="val_acc",
    filename="efficient-acc-{val_acc:.2f}",
    save_top_k=1,
    mode="max"
)

trainer = Trainer(
    logger = wandb_logger,
    accelerator="auto",
    max_epochs=80, 
    callbacks=[checkpoint_loss]
) 

 
trainer.fit(model, datamodule=data_module)
wandb.finish()


/opt/conda/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /kaggle/working/models/ exists and is not empty.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]