# Getting our data from kaggle

In [37]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("mohamedhanyyy/chest-ctscan-images")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/mohamedhanyyy/chest-ctscan-images/versions/1


In [38]:
import shutil
import os

In [39]:
source_path = "/root/.cache/kagglehub/datasets/mohamedhanyyy/chest-ctscan-images/versions/1"
destination_path = "/content/extracted_dataset"

In [40]:
# Create the destination directory if it doesn't exist
os.makedirs(destination_path, exist_ok=True)

# Copy the files from the source to the destination
shutil.copytree(source_path, destination_path, dirs_exist_ok=True)

'/content/extracted_dataset'

In [41]:
img_path ="/content/extracted_dataset/Data"

In [42]:
train_dir = img_path + "/train"
test_dir = img_path + "/test"

In [43]:
import torch
from torch import nn
import torch
import torchvision
import numpy as np
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


import timm
device = "cuda" if torch.cuda.is_available() else "cpu"


model = torchvision.models.vit_b_16(weights=torchvision.models.ViT_B_16_Weights.IMAGENET1K_V1).to(device)
print(model)

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [44]:
weights = torchvision.models.ViT_B_16_Weights.IMAGENET1K_V1
auto= weights.transforms()

In [45]:
from torchvision import datasets

# Train datasets
train_data = datasets.ImageFolder(root=train_dir, transform=auto)


# Test datasets
test_data = datasets.ImageFolder(root=test_dir, transform=auto)


In [46]:
len(train_data)  , len(test_data)

(613, 315)

In [47]:
!pip install torchinfo



In [48]:
from torchinfo import summary
summary(model=model,
          input_size=(1, 3, 224, 224),
          col_names=["input_size", "output_size", "num_params", "trainable"],
          col_width=20,
          row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [1, 3, 224, 224]     [1, 1000]            768                  True
├─Conv2d (conv_proj)                                         [1, 3, 224, 224]     [1, 768, 14, 14]     590,592              True
├─Encoder (encoder)                                          [1, 197, 768]        [1, 197, 768]        151,296              True
│    └─Dropout (dropout)                                     [1, 197, 768]        [1, 197, 768]        --                   --
│    └─Sequential (layers)                                   [1, 197, 768]        [1, 197, 768]        --                   True
│    │    └─EncoderBlock (encoder_layer_0)                   [1, 197, 768]        [1, 197, 768]        7,087,872            True
│    │    └─EncoderBlock (encoder_layer_1)                   [1, 197, 768]        [1, 197, 768

# **Freezing layers and Adding layer for fine tunings**

In [49]:
models = [model]
for i, model in enumerate(models):
    # Handle models with and without the 'features' attribute
    if hasattr(model, 'features'):
        for param in model.features.parameters():
            param.requires_grad = False
    else:
        # You might need to adjust this based on the specific model architecture
        for param in model.parameters():
            param.requires_grad = False

In [50]:
# For Vision Transformer (ViT)
num_ftrs = model.heads.head.in_features
model.heads.head = torch.nn.Linear(num_ftrs, 4)



# Now move  models to the device
model.to(device)


VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [51]:
summary(model=model,
          input_size=(1, 3, 224, 224),
          col_names=["input_size", "output_size", "num_params", "trainable"],
          col_width=20,
          row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [1, 3, 224, 224]     [1, 4]               768                  Partial
├─Conv2d (conv_proj)                                         [1, 3, 224, 224]     [1, 768, 14, 14]     (590,592)            False
├─Encoder (encoder)                                          [1, 197, 768]        [1, 197, 768]        151,296              False
│    └─Dropout (dropout)                                     [1, 197, 768]        [1, 197, 768]        --                   --
│    └─Sequential (layers)                                   [1, 197, 768]        [1, 197, 768]        --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [1, 197, 768]        [1, 197, 768]        (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [1, 197, 768]        [1, 1

In [52]:
# Train datasets
BATCH_SIZE = 32

train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

# Test datasets
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [53]:
class_names = train_data.classes
class_names

['adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib',
 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa',
 'normal',
 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa']

# **Training of all the models**

In [54]:
# Create train_step()
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer:torch.optim.Optimizer,
               device=device):
  # Put the model in train mode
  model.train()

  # Setup train loss and train accuracy values
  train_loss, train_acc = 0, 0

  # Loop through data loader data batches
  for batch, (X, y) in enumerate(dataloader):
    # Send data to the target device
    X, y = X.to(device), y.to(device)

    # 1. Forward pass
    y_pred = model(X) # output model logits

    # 2. Calculate the loss
    loss = loss_fn(y_pred, y)
    train_loss += loss.item()

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    # Calculate accuracy metric
    y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
    train_acc += (y_pred_class==y).sum().item()/len(y_pred)

  # Adjust metrics to get average loss and accuracy per batch
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc

In [55]:
# Create a test step
def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device=device):
  # Put model in eval mode
  model.eval()

  # Setup test loss and test accuracy values
  test_loss, test_acc = 0,  0

  # Turn on inference mode
  with torch.inference_mode():
    # Loop through DataLoader batches
    for batch, (X, y) in enumerate(dataloader):
      # Send data to the target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      test_pred_logits = model(X)

      # 2. Calculate the loss
      loss = loss_fn(test_pred_logits, y)
      test_loss += loss.item()

      # Calculate the accuracy
      test_pred_labels = test_pred_logits.argmax(dim=1)
      test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

  # Adjust metrics to get average loss and accuracy per batch
  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

In [56]:
from tqdm.auto import tqdm
import torch
import torch.nn as nn # Import the torch.nn module

# 1. Create a train function that takes in various model parameters + optimizer + dataloaders + loss function
def train(model: torch.nn.Module,
          train_dataloader,
          test_dataloader,
          optimizer,
          loss_fn: torch.nn.Module = nn.CrossEntropyLoss(),
          epochs: int = 5,
          device=device):

  # 2. Create empty results dictionary
  results = {"train_loss": [],
             "train_acc": [],
             "test_loss": [],
             "test_acc": []}

  # 3. Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train_step(model=model,
                                       dataloader=train_dataloader,
                                       loss_fn=loss_fn,
                                       optimizer=optimizer,
                                       device=device)
    test_loss, test_acc = test_step(model=model,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn,
                                    device=device)

    # 4. Print out what's happening
    print(f"Epoch: {epoch} | Train loss: {train_loss:.4f} | Train acc: {train_acc:.4f} | Test loss: {test_loss:.4f} | Test acc: {test_acc:.4f}")

    # 5. Update results dictionary
    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_acc"].append(test_acc)

  # 6. Return the filled results at the end of the epochs
  return results

In [57]:
# Compile the model with Torch 2.0
model = torch.compile(model)


In [58]:
NUM_EPOCHS = 15

# Start the timer
from timeit import default_timer as timer
start_time = timer()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

model_1_results = train(model=model,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=NUM_EPOCHS)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch: 0 | Train loss: 1.1822 | Train acc: 0.4403 | Test loss: 1.0116 | Test acc: 0.5218
Epoch: 1 | Train loss: 0.8571 | Train acc: 0.6494 | Test loss: 0.8895 | Test acc: 0.5814
Epoch: 2 | Train loss: 0.7483 | Train acc: 0.7159 | Test loss: 0.8658 | Test acc: 0.5911
Epoch: 3 | Train loss: 0.6830 | Train acc: 0.7312 | Test loss: 0.8317 | Test acc: 0.6064
Epoch: 4 | Train loss: 0.6399 | Train acc: 0.7644 | Test loss: 0.8087 | Test acc: 0.6505
Epoch: 5 | Train loss: 0.6069 | Train acc: 0.7762 | Test loss: 0.7981 | Test acc: 0.6226
Epoch: 6 | Train loss: 0.5532 | Train acc: 0.8088 | Test loss: 0.7840 | Test acc: 0.6755
Epoch: 7 | Train loss: 0.5193 | Train acc: 0.8266 | Test loss: 0.7598 | Test acc: 0.6681
Epoch: 8 | Train loss: 0.4879 | Train acc: 0.8547 | Test loss: 0.7552 | Test acc: 0.6712
Epoch: 9 | Train loss: 0.4895 | Train acc: 0.8463 | Test loss: 0.7515 | Test acc: 0.6698
Epoch: 10 | Train loss: 0.4628 | Train acc: 0.8525 | Test loss: 0.7789 | Test acc: 0.6681
Epoch: 11 | Train lo

In [59]:
from PIL import Image
transform=auto

# Load the image
def predict_single_image(model, image_path, class_names, device="cuda" if torch.cuda.is_available() else "cpu"):
    model.eval()
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)  # Add batch dimension

    # Move image to device
    image = image.to(device)
    model = model.to(device)

    # Perform inference
    with torch.no_grad():
        outputs = model(image)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)  # Get probabilities
        predicted_class = torch.argmax(probabilities, dim=1).item()  # Get class index

    return class_names[predicted_class], probabilities[0][predicted_class].item()

# Example usage
image_path ="/content/extracted_dataset/Data/valid/normal/4 - Copy (2).png"

predicted_class, confidence = predict_single_image(model, image_path, class_names)
print(f"Predicted Class: {predicted_class}, Confidence: {confidence:.4f}")


Predicted Class: normal, Confidence: 0.9813
