In [1]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets list

rm: cannot remove '/root/.kaggle': No such file or directory
ref                                                             title                                                size  lastUpdated          downloadCount  voteCount  usabilityRating  
--------------------------------------------------------------  --------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
victorsoeiro/netflix-tv-shows-and-movies                        Netflix TV Shows and Movies                           2MB  2022-05-15 00:01:23          10797        325  1.0              
devansodariya/student-performance-data                          Student Performance Dataset                           7KB  2022-05-26 13:55:09           6112        182  0.9705882        
mohamedharris/supermart-grocery-sales-retail-analytics-dataset  Supermart Grocery Sales - Retail Analytics Dataset  191KB  2022-06-12 16:14:44            961         46  0.88235295       

In [2]:
!kaggle datasets download andrewmvd/lung-and-colon-cancer-histopathological-images -p /content/data/ --unzip

Downloading lung-and-colon-cancer-histopathological-images.zip to /content/data
100% 1.75G/1.76G [00:10<00:00, 206MB/s]
100% 1.76G/1.76G [00:11<00:00, 171MB/s]


In [3]:
!mkdir ./LungColon

print('Copying files...')
!cp -R /content/data/lung_colon_image_set/colon_image_sets/* ./LungColon
!cp -R /content/data/lung_colon_image_set/lung_image_sets/* ./LungColon
print('All files copied successfully!')

Copying files...
All files copied successfully!


In [4]:
!pip install pytorch-lightning torchmetrics --quiet


[K     |████████████████████████████████| 585 kB 7.1 MB/s 
[K     |████████████████████████████████| 419 kB 62.6 MB/s 
[K     |████████████████████████████████| 596 kB 56.9 MB/s 
[K     |████████████████████████████████| 140 kB 71.7 MB/s 
[K     |████████████████████████████████| 1.1 MB 52.7 MB/s 
[K     |████████████████████████████████| 271 kB 57.9 MB/s 
[K     |████████████████████████████████| 144 kB 71.9 MB/s 
[K     |████████████████████████████████| 94 kB 2.0 MB/s 
[?25h

In [5]:
from pytorch_lightning import LightningDataModule ,LightningModule
import os
import torch
from  torchvision.datasets import ImageFolder
import torchvision.io as io
import torchvision.transforms as T
from functools import partial
from torch.utils.data import DataLoader
import torchmetrics
import torch.nn as nn
from math import sqrt
import torch.optim as optim
from enum import Enum
import torch.nn.functional as F
import torchvision.models.resnet as RES

from pytorch_lightning import Trainer ,LightningModule
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

In [6]:
class DataModule(LightningDataModule):
    def __init__(self,data_dir):
        super().__init__()
        self.data_dir = data_dir
        self.image_read_func = partial(io.read_image,mode=io.image.ImageReadMode.RGB)
        self.batch_size = 64
        self.num_workers= 8
        self.train_transform = T.Compose([
                T.Resize(size = (256,256)),
                T.RandomRotation(degrees = (-20,+20)),
                T.CenterCrop(size=224),
                T.ToTensor(),
                T.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])

        self.test_trasform = T.Compose([
                T.Resize(size = (224,224)),
                T.ToTensor(),
                T.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])


    def setup(self, stage= None) -> None:
        self.train_data = ImageFolder(self.data_dir,transform=self.train_transform,)
        print(len(self.train_data))
        self.train_set, self.val_set = torch.utils.data.random_split(self.train_data, [20000,5000])

        self.val_length = len(self.val_set)

    def train_dataloader(self):
        return DataLoader(dataset=self.train_set,batch_size=self.batch_size,shuffle=True,)

    def val_dataloader(self) :
        return DataLoader(dataset=self.val_set,batch_size=self.batch_size,)
    
    

In [None]:
path= '/content/LungColon'
dm = DataModule(path)
dm.setup()

25000


In [7]:





class LightModel(LightningModule):
    def __init__(self,val_length):
        super().__init__()
        self.model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet152', pretrained=True)
        self.model.fc = nn.Linear(2048,5)
        self.loss = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy()
        self.val_length = val_length
    def forward(self,x):
        x = self.model(x)
        return x

    def training_step(self, batch, batch_idx):
        #print(batch)
        x, y = batch
        y_hat = self(x)
        #loss = nn.CrossEntropyLoss(y_hat, y)
        loss = self.loss(y_hat, y)
        self.accuracy(y_hat, y)
        self.log('train_acc_step', self.accuracy)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        acc = (y_hat.argmax(-1) == y).sum().item()
        return acc

    def validation_epoch_end(self, validation_step_outputs):
        acc = 0
        for pred in validation_step_outputs:
            acc += pred
        acc = acc / self.val_length
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True) 

    def configure_optimizers(self):
        optim= torch.optim.Adam(self.parameters(), lr=0.02)
        scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=50, gamma=0.8)
        return {
            'optimizer': optim,
            'lr_scheduler': {
                'scheduler': scheduler,
                'interval': 'epoch',
            }
        }

In [8]:





logger = TensorBoardLogger("tb_logs", name="my_model")
data_dir = '/content/LungColon'


def main():
    
    dm = DataModule(data_dir)
    dm.setup()
    model = LightModel(val_length=dm.val_length)
    callback = ModelCheckpoint(filename="{epoch}-{val_acc}",
                               monitor='val_acc',
                               save_last=True,
                               mode='max')


    trainer = Trainer(gpus=1,
                      benchmark=True,
                      max_epochs=10,
                      precision=16,
                      callbacks=[callback],
                      check_val_every_n_epoch=2 , 
                     gradient_clip_val=8,
                     logger=logger)
    trainer.fit(model, datamodule=dm)



if __name__ == "__main__":
    main() 

25000


Downloading: "https://github.com/pytorch/vision/archive/v0.10.0.zip" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth


  0%|          | 0.00/230M [00:00<?, ?B/s]

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: tb_logs/my_model


25000


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type             | Params
----------------------------------------------
0 | model    | ResNet           | 58.2 M
1 | loss     | CrossEntropyLoss | 0     
2 | accuracy | Accuracy         | 0     
----------------------------------------------
58.2 M    Trainable params
0         Non-trainable params
58.2 M    Total params
116.308   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [10]:
%load_ext tensorboard
%tensorboard --logdir tb_logs/my_model

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 377), started 0:00:12 ago. (Use '!kill 377' to kill it.)

<IPython.core.display.Javascript object>