In [1]:
%matplotlib inline

In [2]:
!pip install lightly

Collecting lightly
  Downloading lightly-1.5.19-py3-none-any.whl.metadata (36 kB)
Collecting hydra-core>=1.0.0 (from lightly)
  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)
Collecting lightly_utils~=0.0.0 (from lightly)
  Downloading lightly_utils-0.0.2-py3-none-any.whl.metadata (1.4 kB)
Collecting pytorch_lightning>=1.0.4 (from lightly)
  Downloading pytorch_lightning-2.5.1-py3-none-any.whl.metadata (20 kB)
Collecting aenum>=3.1.11 (from lightly)
  Downloading aenum-3.1.15-py3-none-any.whl.metadata (3.7 kB)
Collecting omegaconf<2.4,>=2.2 (from hydra-core>=1.0.0->lightly)
  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)
Collecting antlr4-python3-runtime==4.9.* (from hydra-core>=1.0.0->lightly)
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.0/117.0 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torchmetri

In [1]:
import torch
import torch.nn as nn
import torchvision
import pytorch_lightning as pl
import lightly.models
import lightly.data
import lightly.loss
from torchmetrics import F1Score
from pytorch_lightning.callbacks import TQDMProgressBar
import torchmetrics



In [12]:
! mkdir ~/.kaggle

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [13]:
! cp kaggle.json ~/.kaggle/

In [14]:
! chmod 600 ~/.kaggle/kaggle.json

In [15]:
! kaggle datasets download sheel1206/cancer2

Dataset URL: https://www.kaggle.com/datasets/sheel1206/cancer2
License(s): unknown


In [16]:
! unzip '/content/cancer2.zip'

Archive:  /content/cancer2.zip
  inflating: Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/10009_CRC-Prim-HE-03_009.tif_Row_301_Col_151.tif  
  inflating: Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/10062_CRC-Prim-HE-02_003b.tif_Row_1_Col_301.tif  
  inflating: Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/100B0_CRC-Prim-HE-09_009.tif_Row_1_Col_301.tif  
  inflating: Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/10104_CRC-Prim-HE-10_021.tif_Row_451_Col_1.tif  
  inflating: Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/10142_CRC-Prim-HE-09_025.tif_Row_151_Col_151.tif  
  inflating: Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/101A0_CRC-Prim-HE-03_034.tif_Row_151_Col_1.tif  
  inflating: Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/1021F_CRC-Prim-HE-04_029.tif_Row_151_Col_1.tif  
  inflating: Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/10264_CRC-Prim-HE-07_025.tif_Row_1801_Col_1.tif  
  inflating: Kather_texture_2016_image_tiles_5000/T

In [17]:
num_workers = 8
batch_size = 64
memory_bank_size = 4096
seed = 1
max_epochs = 20

In [18]:
path_to_train = '/content/Kather_texture_2016_image_tiles_5000/Train'
path_to_test = '/content/Kather_texture_2016_image_tiles_5000/Val'

In [19]:
pl.seed_everything(seed)

INFO:lightning_fabric.utilities.seed:Seed set to 1


1

In [20]:
# MoCo v2 uses SimCLR augmentations, additionally, disable blur
collate_fn = lightly.data.SimCLRCollateFunction(
    input_size=150,
    gaussian_blur=0.,
)

We don't want any augmentation for our test data. Therefore,
we create custom, torchvision based data transformations.
Let's ensure the size is correct and we normalize the data in
the same way as we do with the training data.



In [21]:

train_classifier_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
dataset_train_moco = lightly.data.LightlyDataset(
    input_dir=path_to_train
)
dataset_train_classifier = lightly.data.LightlyDataset(
    input_dir=path_to_train,
    transform=train_classifier_transforms
)

dataset_test = lightly.data.LightlyDataset(
    input_dir=path_to_test,
    transform=test_transforms
)

In [22]:
dataloader_train_moco = torch.utils.data.DataLoader(
    dataset_train_moco,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn,
    drop_last=True,
    num_workers=num_workers
)

dataloader_train_classifier = torch.utils.data.DataLoader(
    dataset_train_classifier,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=num_workers
)

dataloader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False,
    num_workers=num_workers
)



In [23]:
class MocoModel(pl.LightningModule):
    def __init__(self):
        super().__init__()

        # create a ResNet backbone and remove the classification head
        resnet = lightly.models.ResNetGenerator('resnet-18', 1, num_splits=8)
        backbone = nn.Sequential(
            *list(resnet.children())[:-1],
            nn.AdaptiveAvgPool2d(1),
        )

        # create a moco based on ResNet
        self.resnet_moco = \
            lightly.models.MoCo(backbone, num_ftrs=512, m=0.99, batch_shuffle=True)

        # create our loss with the optional memory bank
        self.criterion = lightly.loss.NTXentLoss(
            temperature=0.1,
            memory_bank_size=memory_bank_size)

    def forward(self, x):
        self.resnet_moco(x)

    # We provide a helper method to log weights in tensorboard
    # which is useful for debugging.
    def custom_histogram_weights(self):
        for name, params in self.named_parameters():
            self.logger.experiment.add_histogram(
                name, params, self.current_epoch)

    def training_step(self, batch, batch_idx):
        (x0, x1), _, _ = batch
        y0, y1 = self.resnet_moco(x0, x1)
        loss = self.criterion(y0, y1)
        self.log('train_loss_ssl', loss)
        return loss

    def on_train_epoch_end(self):
        self.custom_histogram_weights()


    def configure_optimizers(self):
        optim = torch.optim.SGD(self.resnet_moco.parameters(), lr=6e-2,
                                momentum=0.9, weight_decay=5e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
        return [optim], [scheduler]

In [24]:
acc_list = []
class Classifier(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        # create a moco based on ResNet
        self.resnet_moco = model

        # freeze the layers of moco
        for p in self.resnet_moco.parameters():  # reset requires_grad
            p.requires_grad = False

        # we create a linear layer for our downstream classification
        # model
        self.fc = nn.Linear(512, 6)

        # self.accuracy = pl.metrics.Accuracy()

        self.accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=6) # Assuming a 6-class classification problem
        # self.f1 = F1Score(num_classes = 6)
        self.f1 = F1Score(task="multiclass", num_classes=6) # Added the 'task' argument

    def forward(self, x):
        with torch.no_grad():
            y_hat = self.resnet_moco.backbone(x).squeeze()
            y_hat = nn.functional.normalize(y_hat, dim=1)
        y_hat = self.fc(y_hat)
        return y_hat

    # We provide a helper method to log weights in tensorboard
    # which is useful for debugging.
    def custom_histogram_weights(self):
        for name, params in self.named_parameters():
            self.logger.experiment.add_histogram(
                name, params, self.current_epoch)

    def training_step(self, batch, batch_idx):
        x, y, _ = batch
        y_hat = self.forward(x)
        loss = nn.functional.cross_entropy(y_hat, y)
        self.log('train_loss_fc', loss)
        return loss

    def on_train_epoch_end(self):
        self.custom_histogram_weights()

    def validation_step(self, batch, batch_idx):
        x, y, _ = batch
        y_hat = self.forward(x)
        y_hat = torch.nn.functional.softmax(y_hat, dim=1)
        self.accuracy(y_hat, y)

        self.log('val_acc', self.accuracy.compute(),
                 on_epoch=True, prog_bar=True)
        acc_list.append(self.accuracy(y_hat, y))
        self.log('val_f1',self.f1(y_hat, y),
                on_epoch=True, prog_bar=True)

    def configure_optimizers(self):
        optim = torch.optim.SGD(self.fc.parameters(), lr=30.)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
        return [optim], [scheduler]

## Train the MoCo model





In [None]:
# use a GPU if available
gpus = 'gpu' if torch.cuda.is_available() else 'cpu'


In [26]:


model = MocoModel()
# Instead of 'gpus', use 'accelerator' and 'devices'
# Use TQDMProgressBar to set refresh rate
trainer = pl.Trainer(max_epochs=max_epochs, accelerator=gpus, devices=1,
                     callbacks=[TQDMProgressBar(refresh_rate=10)])
trainer.fit(
    model,
    dataloader_train_moco
)

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name        | Type       | Params | Mode 
---------------------------------------------------
0 | resnet_moco | MoCo       | 23.0 M | train
1 | criterion   | NTXentLoss | 0      | train
---------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
91.977    Total estimated model params size (MB)
138       Modules in train mode
0         Modules in eval mode
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/loops/fit_loop.py:310: The number of traini

Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:
Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

Train the Classifier



In [None]:
model.eval()

classifier = Classifier(model.resnet_moco)

trainer = pl.Trainer(max_epochs=max_epochs, accelerator=gpus, devices=1,
                     callbacks=[TQDMProgressBar(refresh_rate=10)])
trainer.fit(
    classifier,
    dataloader_train_classifier,
    dataloader_test
)