<a href="https://colab.research.google.com/github/Srinathi117/Pancreatic-Cancer_app/blob/main/Pancreatic_prediction_GNN_Testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
import zipfile, os

print("‚¨ÜÔ∏è Upload TWO ZIP FILES (1. Image Dataset ZIP, 2. CSV ZIP)")
uploaded = files.upload()  # Select BOTH zip files at SAME time

# Create target folders
DATASET_DIR = "/content/dataset"
CSV_DIR = "/content/pc_excel"
os.makedirs(DATASET_DIR, exist_ok=True)
os.makedirs(CSV_DIR, exist_ok=True)

print("\nProcessing uploaded zip files...\n")

for filename in uploaded.keys():
    filepath = f"/content/{filename}"

    # Extract based on file type name
    if "csv" in filename.lower() or "excel" in filename.lower():
        extract_path = CSV_DIR
    else:
        extract_path = DATASET_DIR

    # Unzip
    with zipfile.ZipFile(filepath, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    print(f"üìÇ '{filename}' extracted to: {extract_path}")

print("\n‚úîÔ∏è Files extracted successfully!")
print("Dataset folder contains:", os.listdir(DATASET_DIR))
print("CSV/Excel folder contains:", os.listdir(CSV_DIR))


‚¨ÜÔ∏è Upload TWO ZIP FILES (1. Image Dataset ZIP, 2. CSV ZIP)


Saving archive (5).zip to archive (5) (1).zip
Saving pc excel.zip to pc excel.zip

Processing uploaded zip files...

üìÇ 'archive (5) (1).zip' extracted to: /content/dataset
üìÇ 'pc excel.zip' extracted to: /content/pc_excel

‚úîÔ∏è Files extracted successfully!
Dataset folder contains: ['DATASET']
CSV/Excel folder contains: ['pancreatic_cancer_prediction_sample.csv']


In [None]:
# ==============================================
# PANCREATIC CANCER GNN: CANCER vs NORMAL
# ==============================================

# ---------- 0. INSTALL LIBRARIES (RUN ONCE) ----------
!pip install -q torch torchvision torchaudio
!pip install -q torch-geometric
!pip install -q scikit-learn
!pip install -q pandas

# ---------- 1. IMPORTS & BASIC SETUP ----------
import os
import torch
from torch import nn
from torch.utils.data import DataLoader, ConcatDataset
from torchvision import datasets, transforms, models

from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
import numpy as np
from collections import Counter

from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

from PIL import Image
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ---------- 2. PATHS ----------
# Your folder structure:
# /content/dataset/DATASET/train/Cancer, Normal
# /content/dataset/DATASET/test/Cancer, Normal

data_root = "/content/dataset/DATASET"   # main dataset folder
csv_path = "/content/pc_excel/pancreatic_cancer_prediction_sample.csv"  # second dataset (tabular)

print("Inside DATASET:", os.listdir(data_root))

train_root_base = os.path.join(data_root, "train")
test_root_base  = os.path.join(data_root, "test")

print("Raw train_root_base:", train_root_base)
print("Raw test_root_base:", test_root_base)

def find_class_root(root):
    """If there is only one subfolder (like 'train' or 'test'), go one level deeper."""
    subs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
    print(f"\nChecking root: {root}")
    print(" Subfolders:", subs)
    if len(subs) == 1:
        new_root = os.path.join(root, subs[0])
        print(" -> Only one subfolder, using inner folder as class root:", new_root)
        return new_root
    else:
        print(" -> Using this as class root")
        return root

train_root = find_class_root(train_root_base)
test_root  = find_class_root(test_root_base)

print("\nFinal train_root:", train_root)
print("Final test_root:", test_root)

print("\nTrain classes folders:", os.listdir(train_root))
print("Test classes folders:", os.listdir(test_root))

# ---------- 2.1 LOAD CSV (SECOND DATASET) ----------
if os.path.exists(csv_path):
    csv_df = pd.read_csv(csv_path)
    print("\nLoaded CSV:", csv_path)
    print("CSV shape:", csv_df.shape)
    print(csv_df.head())
else:
    csv_df = None
    print("\nNo CSV found at:", csv_path)

# ---------- 3. IMAGE TRANSFORMS ----------
img_size = 224
transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],  # ImageNet stats
        std=[0.229, 0.224, 0.225]
    ),
])

# ---------- 4. DATASETS ----------
train_dataset = datasets.ImageFolder(root=train_root, transform=transform)
test_dataset  = datasets.ImageFolder(root=test_root,  transform=transform)

print("\nTrain classes:", train_dataset.classes)
print("Test classes:", test_dataset.classes)

# Ensure same class mapping
assert train_dataset.classes == test_dataset.classes, "Train/Test classes mismatch!"

class_names = train_dataset.classes
num_classes = len(class_names)

print("\nClass index mapping:")
for idx, name in enumerate(class_names):
    print(idx, "->", name)
print("Number of classes:", num_classes)

# Identify which class index is CANCER automatically
CANCER_CLASS_INDICES = [i for i, c in enumerate(class_names) if "cancer" in c.lower() or "tumor" in c.lower()]
if not CANCER_CLASS_INDICES:
    print("\n‚ö†Ô∏è Warning: No class name contains 'Cancer' or 'Tumor'.")
else:
    print("\nCancer class indices:", CANCER_CLASS_INDICES)

# Merge train + test into one dataset for building a single graph
full_dataset = ConcatDataset([train_dataset, test_dataset])
print("\nTotal images (train + test):", len(full_dataset))

# ---------- 5. FEATURE EXTRACTION (RESNET18 BACKBONE) ----------
loader = DataLoader(full_dataset, batch_size=64, shuffle=False, num_workers=2)

backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
backbone.fc = nn.Identity()   # remove final classifier -> feature extractor
backbone = backbone.to(device)
backbone.eval()

all_features = []
all_labels = []

with torch.no_grad():
    for imgs, labels in loader:
        imgs = imgs.to(device)
        feats = backbone(imgs)    # [B, feat_dim]
        all_features.append(feats.cpu())
        all_labels.append(labels)

X = torch.cat(all_features, dim=0)   # [N, feat_dim]
y = torch.cat(all_labels, dim=0)     # [N]

print("\nFeature matrix shape:", X.shape)
print("Labels shape:", y.shape)

# ---------- 6. BUILD k-NN GRAPH ----------
k = 8   # neighbors per node
features_np = X.numpy()

nbrs = NearestNeighbors(n_neighbors=k + 1, metric='euclidean').fit(features_np)
distances, indices = nbrs.kneighbors(features_np)

edge_index_list = []
N = indices.shape[0]

for i in range(N):
    for j in indices[i, 1:]:   # skip itself
        edge_index_list.append([i, j])
        edge_index_list.append([j, i])   # undirected

edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous()
print("edge_index shape:", edge_index.shape)  # [2, num_edges]

# ---------- 7. TRAIN/VAL/TEST SPLIT ----------
idx = np.arange(N)

# 20% test
idx_train_val, idx_test = train_test_split(
    idx, test_size=0.2, stratify=y.numpy(), random_state=42
)

# From remaining: 20% as val => 64% train, 16% val, 20% test
idx_train, idx_val = train_test_split(
    idx_train_val, test_size=0.2, stratify=y.numpy()[idx_train_val], random_state=42
)

train_mask = torch.zeros(N, dtype=torch.bool)
val_mask   = torch.zeros(N, dtype=torch.bool)
test_mask  = torch.zeros(N, dtype=torch.bool)

train_mask[idx_train] = True
val_mask[idx_val]     = True
test_mask[idx_test]   = True

print(f"\nTrain nodes: {train_mask.sum().item()}, "
      f"Val nodes: {val_mask.sum().item()}, "
      f"Test nodes: {test_mask.sum().item()}")

# ---------- 8. GRAPH DATA OBJECT ----------
graph_data = Data(
    x=X,
    edge_index=edge_index,
    y=y,
    train_mask=train_mask,
    val_mask=val_mask,
    test_mask=test_mask
).to(device)

# ---------- 9. DEFINE GNN MODEL ----------
class PancreaticGCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes, dropout=0.5):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = nn.functional.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

in_channels = graph_data.x.size(1)
hidden_channels = 128

model = PancreaticGCN(
    in_channels=in_channels,
    hidden_channels=hidden_channels,
    num_classes=num_classes,
    dropout=0.5
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

print("\nGNN model:\n", model)

# ---------- 10. TRAIN & EVALUATE ----------
def train_step():
    model.train()
    optimizer.zero_grad()
    out = model(graph_data.x, graph_data.edge_index)
    loss = criterion(out[graph_data.train_mask], graph_data.y[graph_data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def accuracy(mask):
    model.eval()
    out = model(graph_data.x, graph_data.edge_index)
    pred = out.argmax(dim=1)
    correct = (pred[mask] == graph_data.y[mask]).sum().item()
    total = int(mask.sum())
    return correct / total if total > 0 else 0.0

epochs = 50   # increase if needed

for epoch in range(1, epochs + 1):
    loss = train_step()
    train_acc = accuracy(graph_data.train_mask)
    val_acc   = accuracy(graph_data.val_mask)
    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | "
              f"Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

test_acc = accuracy(graph_data.test_mask)
print(f"\n‚úÖ Final Test Accuracy: {test_acc:.4f}")

# Label distribution on test
model.eval()
with torch.no_grad():
    out = model(graph_data.x, graph_data.edge_index)
    pred = out.argmax(dim=1)

true_labels = graph_data.y[graph_data.test_mask].cpu().numpy()
pred_labels = pred[graph_data.test_mask].cpu().numpy()

print("\nTrue label counts (test):", Counter(true_labels))
print("Pred label counts (test):", Counter(pred_labels))

# ---------- 11. SINGLE IMAGE PREDICTION (CANCER vs NORMAL) ----------
from google.colab import files as colab_files

def predict_pancreatic_image(img_path):
    """
    Use the trained backbone + GNN model to predict class for one image.
    (Approximate with a self-loop graph for this single node.)
    """
    model.eval()
    backbone.eval()

    img = Image.open(img_path).convert("RGB")
    img_t = transform(img).unsqueeze(0).to(device)   # [1, 3, H, W]

    with torch.no_grad():
        # CNN feature
        feat = backbone(img_t)                      # [1, feat_dim]

        # Self-loop graph for single node
        edge_index_new = torch.tensor([[0], [0]], dtype=torch.long).to(device)

        out = model(feat, edge_index_new)           # [1, num_classes]
        probs = torch.softmax(out, dim=1)[0]        # [num_classes]
        pred_idx = int(torch.argmax(probs))
        pred_class = class_names[pred_idx]
        conf = float(probs[pred_idx])

    return pred_idx, pred_class, conf

def cancer_or_not(pred_idx):
    if pred_idx in CANCER_CLASS_INDICES:
        return "‚ö†Ô∏è The model predicts: CANCER."
    else:
        return "‚úÖ The model predicts: NO CANCER (normal)."

print("\nReady for single-image prediction. Upload CT/MRI image.")

uploaded = colab_files.upload()   # choose image file(s)

for filename in uploaded.keys():
    img_path = "/content/" + filename

    pred_idx, pred_class, conf = predict_pancreatic_image(img_path)
    result_text = cancer_or_not(pred_idx)

    print("\n===================================")
    print(f"Image: {filename}")
    print(f"Predicted class label: {pred_class} (index {pred_idx})")
    print(f"Confidence: {conf:.4f}")
    print(result_text)
    print("===================================")


[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m63.7/63.7 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.3/1.3 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hUsing device: cpu
Inside DATASET: ['test', 'train']
Raw train_root_base: /content/dataset/DATASET/train
Raw test_root_base: /content/dataset/DATASET/test

Checking root: /content/dataset/DATASET/train
 Subfolders: ['train']
 -> Only one subfolder, using inner folder as class root: /content/dataset/DATASET/train/train

Checking root: /content/dataset/DATASET/test
 Subfolders: ['test']
 -> Only one subfolder, using inner folder as class root: /content/dataset/DATASET/test/test

Final train_root: /content/dataset/DATASET/train/train
Final test_root: /content/dataset/DATASET/test/test

Train cl

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 44.7M/44.7M [00:00<00:00, 70.0MB/s]



Feature matrix shape: torch.Size([1411, 512])
Labels shape: torch.Size([1411])
edge_index shape: torch.Size([2, 22576])

Train nodes: 902, Val nodes: 226, Test nodes: 283

GNN model:
 PancreaticGCN(
  (conv1): GCNConv(512, 128)
  (conv2): GCNConv(128, 2)
)
Epoch 001 | Loss: 1.0112 | Train Acc: 0.5211 | Val Acc: 0.5221
Epoch 005 | Loss: 0.2552 | Train Acc: 0.9534 | Val Acc: 0.9071
Epoch 010 | Loss: 0.1082 | Train Acc: 0.9889 | Val Acc: 0.9779
Epoch 015 | Loss: 0.0757 | Train Acc: 0.9889 | Val Acc: 0.9779
Epoch 020 | Loss: 0.0595 | Train Acc: 0.9889 | Val Acc: 0.9779
Epoch 025 | Loss: 0.0411 | Train Acc: 0.9889 | Val Acc: 0.9779
Epoch 030 | Loss: 0.0404 | Train Acc: 0.9889 | Val Acc: 0.9779
Epoch 035 | Loss: 0.0377 | Train Acc: 0.9878 | Val Acc: 0.9912
Epoch 040 | Loss: 0.0273 | Train Acc: 0.9889 | Val Acc: 0.9779
Epoch 045 | Loss: 0.0304 | Train Acc: 0.9878 | Val Acc: 0.9912
Epoch 050 | Loss: 0.0268 | Train Acc: 0.9878 | Val Acc: 0.9912

‚úÖ Final Test Accuracy: 0.9823

True label coun

Saving 1-005.jpg to 1-005.jpg

Image: 1-005.jpg
Predicted class label: normal (index 0)
Confidence: 0.9980
‚úÖ The model predicts: NO CANCER (normal).


In [None]:
# ===== SAVE TRAINED MODEL (CNN + GNN) =====
import torch

save_path = "/content/pancreas_gnn.pth"

torch.save({
    "backbone_state_dict": backbone.state_dict(),
    "gcn_state_dict": model.state_dict(),
    "class_names": class_names,
}, save_path)

print("Model saved to:", save_path)


Model saved to: /content/pancreas_gnn.pth


In [None]:
from google.colab import files
files.download("/content/pancreas_gnn.pth")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>