In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm

import matplotlib.pyplot as plt # For data viz
import pandas as pd
import numpy as np
import sys
from tqdm.notebook import tqdm

print('System Version:', sys.version)
print('PyTorch version', torch.__version__)
print('Torchvision version', torchvision.__version__)
print('Numpy version', np.__version__)
print('Pandas version', pd.__version__)


  from .autonotebook import tqdm as notebook_tqdm


System Version: 3.13.9 (tags/v3.13.9:8183fa5, Oct 14 2025, 14:09:13) [MSC v.1944 64 bit (AMD64)]
PyTorch version 2.8.0+cpu
Torchvision version 0.23.0+cpu
Numpy version 2.3.3
Pandas version 2.3.3


In [8]:
class ColonCancerDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(data_dir, transform=transform)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]
    
    @property
    def classes(self):
        return self.data.classes

In [None]:
transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.ToTensor()
])
dataset = ColonCancerDataset(data_dir='./colon_output/train', transform=transform)

In [17]:
len(dataset)

10000

In [18]:
image, label = dataset[6000]
print(label)
image

1


tensor([[[0.9216, 0.9216, 0.9373,  ..., 0.8941, 0.8863, 0.8706],
         [0.9255, 0.9216, 0.9333,  ..., 0.9059, 0.8902, 0.8706],
         [0.9333, 0.9255, 0.9294,  ..., 0.9216, 0.8980, 0.8784],
         ...,
         [0.9647, 0.9569, 0.9490,  ..., 0.7255, 0.7294, 0.7294],
         [0.9647, 0.9569, 0.9451,  ..., 0.7333, 0.7333, 0.7333],
         [0.9647, 0.9569, 0.9412,  ..., 0.7451, 0.7451, 0.7451]],

        [[0.8235, 0.8235, 0.8196,  ..., 0.9059, 0.9059, 0.9020],
         [0.8275, 0.8235, 0.8157,  ..., 0.9059, 0.9098, 0.9020],
         [0.8353, 0.8275, 0.8118,  ..., 0.9137, 0.9098, 0.8980],
         ...,
         [0.8902, 0.8824, 0.8706,  ..., 0.5922, 0.5961, 0.5961],
         [0.8902, 0.8824, 0.8667,  ..., 0.6000, 0.6078, 0.6118],
         [0.8902, 0.8824, 0.8627,  ..., 0.6118, 0.6196, 0.6235]],

        [[0.9098, 0.9098, 0.9216,  ..., 0.9255, 0.9216, 0.9098],
         [0.9137, 0.9098, 0.9176,  ..., 0.9373, 0.9255, 0.9098],
         [0.9216, 0.9137, 0.9137,  ..., 0.9569, 0.9373, 0.

In [19]:
target_to_class = {v: k for k, v in ImageFolder('./colon_output/train').class_to_idx.items()}
print(target_to_class)

{0: 'colon_aca', 1: 'colon_n'}


In [20]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
for image, label in dataset:
    break

for images, labels in dataloader:
    break


In [21]:
class ColonCancerClassifer(nn.Module):
    def __init__(self, num_classes=3):
        super(ColonCancerClassifer, self).__init__()
        self.base_model = timm.create_model('efficientnet_b0', pretrained=True)
        self.features = nn.Sequential(*list(self.base_model.children())[:-1])

        enet_out_size = 1280
        # Make a classifier
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(enet_out_size, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        output = self.classifier(x)
        return output

In [23]:
model = ColonCancerClassifer(num_classes=3)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [38]:
train_folder = './colon_output/train'
test_folder = './colon_output/test'
validation_folder = './colon_output/validate' 

train_dataset = ColonCancerDataset(data_dir=train_folder, transform=transform)
test_dataset = ColonCancerDataset(data_dir=test_folder, transform=transform)
validation_dataset = ColonCancerDataset(data_dir=validation_folder, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
validation_loader = DataLoader(validation_dataset, batch_size=32, shuffle=False)

In [37]:
num_epochs = 5
train_losses, val_losses = [], []

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = ColonCancerClassifer(num_classes=53)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc='Training loop'):
        # Move inputs and labels to the device
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * labels.size(0)
    train_loss = running_loss / len(train_loader.dataset)
    train_losses.append(train_loss)
    
    # Validation phase
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for images, labels in tqdm(validation_loader, desc='Validation loop'):
            # Move inputs and labels to the device
            images, labels = images.to(device), labels.to(device)
         
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * labels.size(0)
    val_loss = running_loss / len(validation_loader.dataset)
    val_losses.append(val_loss)
    print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss}, Validation loss: {val_loss}")

ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html