In [2]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader,Dataset,random_split
import torchvision as tv
import numpy as np
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt

import glob as gb

In [3]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [24]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [25]:
!kaggle datasets download -d andrewmvd/lung-and-colon-cancer-histopathological-images


Dataset URL: https://www.kaggle.com/datasets/andrewmvd/lung-and-colon-cancer-histopathological-images
License(s): CC-BY-SA-4.0
^C


In [26]:
!unzip lung-and-colon-cancer-histopathological-images.zip


Archive:  lung-and-colon-cancer-histopathological-images.zip
replace lung_colon_image_set/colon_image_sets/colon_aca/colonca1.jpeg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [4]:
dataset_path="/content/lung_colon_image_set"
image_size=224
epochs=100


In [5]:
class CustomDataset(Dataset):
  def __init__(self,data_dir):
    self.data_dir=data_dir
    self.image_paths,self.labels,self.label_map,self.n_class=self.ReadDataset(self.data_dir)
    self.transform=tv.transforms.Compose([
        tv.transforms.Resize([image_size,image_size]),
        tv.transforms.ToTensor()
    ])
  def ReadDataset(self,data_dir):
    image_paths,labels=[],[]
    label_map={}
    idx=0
    for folder in sorted(os.listdir(dataset_path)):
      for i in sorted(os.listdir(os.path.join(dataset_path,folder))):
        label_map[idx]=i
        for path in os.listdir(dataset_path +'/'+ folder +'/'+ i):
          files=gb.glob(pathname=str(dataset_path+"/"+folder+"/"+i+"/"+path ))
          for file in files:
            image_paths.append(file)
            labels.append(idx)
        idx+=1
    n_class=len(label_map)
    return np.array(image_paths) , np.array(labels) , label_map , n_class
  def __len__(self):
    return len(self.labels)
  def __getitem__(self,index):
    img=Image.open(self.image_paths[index])
    img=self.transform(img)
    label=torch.tensor(self.labels[index])
    return img,label


In [6]:
ds=CustomDataset(dataset_path)

In [7]:
label_map = ds.label_map
N_CLASSES = ds.n_class
label_map , N_CLASSES

({0: 'colon_aca', 1: 'colon_n', 2: 'lung_aca', 3: 'lung_n', 4: 'lung_scc'}, 5)

In [8]:
train_size = int(0.8 * ds.__len__())
val_size = int(0.2 * ds.__len__())


In [9]:
train_set,val_set=random_split(ds,[train_size,val_size])

In [10]:
torch.manual_seed(123)
train_ds = DataLoader(
    train_set ,
    batch_size = 64 ,
    shuffle = True ,
    pin_memory = True ,
    num_workers=2
)

torch.manual_seed(42)
val_ds = DataLoader(
    val_set ,
    batch_size = 64 ,
    shuffle = False ,
    pin_memory = True ,
    num_workers=2
)

In [11]:
len(train_ds),len(val_ds)

(313, 79)

In [12]:
from torch.nn.modules.linear import Linear
model =nn.Sequential(
    nn.Conv2d(3,32,3),
     nn.BatchNorm2d(32),
    nn.ReLU(),

    nn.MaxPool2d((2,2)) ,
    nn.Conv2d(32,64,3),
    nn.BatchNorm2d(64),
    nn.ReLU(),

    nn.MaxPool2d((2,2)) ,
    nn.Conv2d(64,128,3),
      nn.BatchNorm2d(128),
    nn.ReLU(),

    nn.MaxPool2d((2,2)) ,
     nn.Conv2d(128,256,3),
        nn.BatchNorm2d(256),
    nn.ReLU(),

     nn.MaxPool2d((2,2)) ,
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(),

    nn.Linear(256,N_CLASSES)


)

In [13]:
model=model.to(device)


In [14]:
!pip install torchinfo




In [15]:
from torchinfo import summary

In [16]:
summary(model,input_size=(64,3,image_size,image_size))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [64, 5]                   --
├─Conv2d: 1-1                            [64, 32, 222, 222]        896
├─BatchNorm2d: 1-2                       [64, 32, 222, 222]        64
├─ReLU: 1-3                              [64, 32, 222, 222]        --
├─MaxPool2d: 1-4                         [64, 32, 111, 111]        --
├─Conv2d: 1-5                            [64, 64, 109, 109]        18,496
├─BatchNorm2d: 1-6                       [64, 64, 109, 109]        128
├─ReLU: 1-7                              [64, 64, 109, 109]        --
├─MaxPool2d: 1-8                         [64, 64, 54, 54]          --
├─Conv2d: 1-9                            [64, 128, 52, 52]         73,856
├─BatchNorm2d: 1-10                      [64, 128, 52, 52]         256
├─ReLU: 1-11                             [64, 128, 52, 52]         --
├─MaxPool2d: 1-12                        [64, 128, 26, 26]         --
├─Co

In [17]:
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=0.003)

In [19]:
from tqdm import tqdm

for epoch in range(10):
    total_loss = 0
    model.train()
    for X, y in tqdm(train_ds, desc=f"Epoch {epoch+1}"):
        X, y = X.to(device), y.to(device)
        optim.zero_grad()
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        loss.backward()
        optim.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(train_ds)
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}")


Epoch 1: 100%|██████████| 313/313 [02:24<00:00,  2.16it/s]


Epoch 1/100, Training Loss: 0.2501


Epoch 2: 100%|██████████| 313/313 [02:21<00:00,  2.21it/s]


Epoch 2/100, Training Loss: 0.1665


Epoch 3: 100%|██████████| 313/313 [02:21<00:00,  2.21it/s]


Epoch 3/100, Training Loss: 0.1331


Epoch 4: 100%|██████████| 313/313 [02:19<00:00,  2.24it/s]


Epoch 4/100, Training Loss: 0.0978


Epoch 5: 100%|██████████| 313/313 [02:21<00:00,  2.21it/s]


Epoch 5/100, Training Loss: 0.0784


Epoch 6: 100%|██████████| 313/313 [02:20<00:00,  2.22it/s]


Epoch 6/100, Training Loss: 0.0671


Epoch 7: 100%|██████████| 313/313 [02:23<00:00,  2.18it/s]


Epoch 7/100, Training Loss: 0.0587


Epoch 8: 100%|██████████| 313/313 [02:21<00:00,  2.21it/s]


Epoch 8/100, Training Loss: 0.0497


Epoch 9: 100%|██████████| 313/313 [02:22<00:00,  2.19it/s]


Epoch 9/100, Training Loss: 0.0468


Epoch 10: 100%|██████████| 313/313 [02:23<00:00,  2.19it/s]

Epoch 10/100, Training Loss: 0.0385





In [20]:
model.eval()


Sequential(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (14): ReLU()
  (15): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilatio

In [22]:

val_loss = 0
correct = 0
with torch.no_grad():
        for X_val, y_val in val_ds:
            X_val, y_val = X_val.to(device), y_val.to(device)
            y_pred = model(X_val)
            val_loss += loss_fn(y_pred, y_val).item()
            correct += (y_pred.argmax(1) == y_val).sum().item()
        val_loss /= len(val_ds)
        val_acc = correct / len(val_set)
        print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")

Validation Loss: 0.0813, Validation Accuracy: 0.9652
