In [None]:
%matplotlib inline

In [None]:
!pip install lightly



In [None]:
import torch
import torch.nn as nn
import torchvision
import pytorch_lightning as pl
import lightly.models
import lightly.data
import lightly.loss
from torchmetrics import F1Score
from pytorch_lightning.callbacks import TQDMProgressBar
import torchmetrics

In [None]:
! mkdir ~/.kaggle

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download sheel1206/cancer2

Dataset URL: https://www.kaggle.com/datasets/sheel1206/cancer2
License(s): unknown
cancer2.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
! unzip '/content/cancer2.zip'

Archive:  /content/cancer2.zip
replace Kather_texture_2016_image_tiles_5000/Train/01_TUMOR/10009_CRC-Prim-HE-03_009.tif_Row_301_Col_151.tif? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
num_workers = 8
batch_size = 64
memory_bank_size = 4096
seed = 1
max_epochs = 20

In [None]:
path_to_train = '/content/Kather_texture_2016_image_tiles_5000/Train'
path_to_test = '/content/Kather_texture_2016_image_tiles_5000/Val'

In [None]:
pl.seed_everything(seed)

In [None]:
# MoCo v2 uses SimCLR augmentations, additionally, disable blur
collate_fn = lightly.data.SimCLRCollateFunction(
    input_size=150,
    gaussian_blur=0.,
)

We don't want any augmentation for our test data. Therefore,
we create custom, torchvision based data transformations.
Let's ensure the size is correct and we normalize the data in
the same way as we do with the training data.



In [None]:

train_classifier_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
dataset_train_moco = lightly.data.LightlyDataset(
    input_dir=path_to_train
)
dataset_train_classifier = lightly.data.LightlyDataset(
    input_dir=path_to_train,
    transform=train_classifier_transforms
)

dataset_test = lightly.data.LightlyDataset(
    input_dir=path_to_test,
    transform=test_transforms
)

In [None]:
dataloader_train_moco = torch.utils.data.DataLoader(
    dataset_train_moco,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn,
    drop_last=True,
    num_workers=num_workers
)

dataloader_train_classifier = torch.utils.data.DataLoader(
    dataset_train_classifier,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=num_workers
)

dataloader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False,
    num_workers=num_workers
)

In [None]:
class MocoModel(pl.LightningModule):
    def __init__(self):
        super().__init__()

        # create a ResNet backbone and remove the classification head
        resnet = lightly.models.ResNetGenerator('resnet-18', 1, num_splits=8)
        backbone = nn.Sequential(
            *list(resnet.children())[:-1],
            nn.AdaptiveAvgPool2d(1),
        )

        # create a moco based on ResNet
        self.resnet_moco = \
            lightly.models.MoCo(backbone, num_ftrs=512, m=0.99, batch_shuffle=True)

        # create our loss with the optional memory bank
        self.criterion = lightly.loss.NTXentLoss(
            temperature=0.1,
            memory_bank_size=memory_bank_size)

    def forward(self, x):
        self.resnet_moco(x)

    # We provide a helper method to log weights in tensorboard
    # which is useful for debugging.
    def custom_histogram_weights(self):
        for name, params in self.named_parameters():
            self.logger.experiment.add_histogram(
                name, params, self.current_epoch)

    def training_step(self, batch, batch_idx):
        (x0, x1), _, _ = batch
        y0, y1 = self.resnet_moco(x0, x1)
        loss = self.criterion(y0, y1)
        self.log('train_loss_ssl', loss)
        return loss

    def on_train_epoch_end(self):
        self.custom_histogram_weights()


    def configure_optimizers(self):
        optim = torch.optim.SGD(self.resnet_moco.parameters(), lr=6e-2,
                                momentum=0.9, weight_decay=5e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
        return [optim], [scheduler]

In [None]:
acc_list = []
class Classifier(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        # create a moco based on ResNet
        self.resnet_moco = model

        # freeze the layers of moco
        for p in self.resnet_moco.parameters():  # reset requires_grad
            p.requires_grad = False

        # we create a linear layer for our downstream classification
        # model
        self.fc = nn.Linear(512, 6)

        # self.accuracy = pl.metrics.Accuracy()

        self.accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=6) # Assuming a 6-class classification problem
        # self.f1 = F1Score(num_classes = 6)
        self.f1 = F1Score(task="multiclass", num_classes=6) # Added the 'task' argument

    def forward(self, x):
        with torch.no_grad():
            y_hat = self.resnet_moco.backbone(x).squeeze()
            y_hat = nn.functional.normalize(y_hat, dim=1)
        y_hat = self.fc(y_hat)
        return y_hat

    # We provide a helper method to log weights in tensorboard
    # which is useful for debugging.
    def custom_histogram_weights(self):
        for name, params in self.named_parameters():
            self.logger.experiment.add_histogram(
                name, params, self.current_epoch)

    def training_step(self, batch, batch_idx):
        x, y, _ = batch
        y_hat = self.forward(x)
        loss = nn.functional.cross_entropy(y_hat, y)
        self.log('train_loss_fc', loss)
        return loss

    def on_train_epoch_end(self):
        self.custom_histogram_weights()

    def validation_step(self, batch, batch_idx):
        x, y, _ = batch
        y_hat = self.forward(x)
        y_hat = torch.nn.functional.softmax(y_hat, dim=1)
        self.accuracy(y_hat, y)

        self.log('val_acc', self.accuracy.compute(),
                 on_epoch=True, prog_bar=True)
        acc_list.append(self.accuracy(y_hat, y))
        self.log('val_f1',self.f1(y_hat, y),
                on_epoch=True, prog_bar=True)

    def configure_optimizers(self):
        optim = torch.optim.SGD(self.fc.parameters(), lr=30.)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
        return [optim], [scheduler]

## Train the MoCo model





In [None]:
# use a GPU if available
gpus = 1 if torch.cuda.is_available() else 0


model = MocoModel()
# Instead of 'gpus', use 'accelerator' and 'devices'
# Use TQDMProgressBar to set refresh rate
trainer = pl.Trainer(max_epochs=max_epochs, accelerator='gpu', devices=1,
                     callbacks=[TQDMProgressBar(refresh_rate=10)])
trainer.fit(
    model,
    dataloader_train_moco
)

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name        | Type       | Params | Mode 
---------------------------------------------------
0 | resnet_moco | MoCo       | 23.0 M | train
1 | criterion   | NTXentLoss | 0      | train
---------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
91.977    Total estimated model params size (MB)
138       Modules in train mode
0         Modules in eval mode
/usr/local/li

Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.


Train the Classifier



In [None]:
model.eval()

classifier = Classifier(model.resnet_moco)

trainer = pl.Trainer(max_epochs=max_epochs, accelerator='gpu', devices=1,
                     callbacks=[TQDMProgressBar(refresh_rate=10)])
trainer.fit(
    classifier,
    dataloader_train_classifier,
    dataloader_test
)

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name        | Type               | Params | Mode 
-----------------------------------------------------------
0 | resnet_moco | MoCo               | 23.0 M | eval 
1 | fc          | Linear             | 3.1 K  | train
2 | accuracy    | MulticlassAccuracy | 0      | train
3 | f1          | MulticlassF1Score  | 0      | train
-----------------------------------------------------------
3.1 K     Trainable params
23.0 M    Non-trainable params


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.11/dist-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 64. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.11/dist-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 62. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.
