In [None]:
# ============================
# Tumor Image Classification using GNN + Superpixels
# ============================

# --- STEP 0: Install required libraries (stable versions for Colab) ---
import sys
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    !pip -q install --upgrade --force-reinstall "numpy==1.24.4" "pandas==2.0.3"
    !pip -q install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu121
    !pip -q install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.4.0+cu121.html
    !pip -q install torch_geometric==2.5.3
    !pip -q install scikit-image opencv-python matplotlib tqdm reportlab scikit-learn

import os, zipfile, random
import numpy as np
import cv2
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool
from skimage.segmentation import slic
from skimage.color import rgb2lab
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt

# ============================
# STEP 1: Get Dataset (Choose one method)
# ============================

## METHOD A: Mount Google Drive (if your file is stored there)
#from google.colab import drive
#drive.mount('/content/drive')
#zip_path = "/content/drive/MyDrive/archive (4).zip"

## METHOD B: Manual Upload (if file is on your computer)
from google.colab import files
uploaded = files.upload()   # select archive (4).zip
zip_path = "/content/archive (4).zip"

# Extract dataset
extract_dir = "/content/tumor_dataset"
if not os.path.exists(extract_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)

print("✅ Dataset extracted to:", extract_dir)

In [None]:
# STEP 2: Convert Image -> Graph using Superpixels
# ============================
def image_to_graph(img_path, label, num_segments=75):
    try:
        img = cv2.imread(img_path)
        if img is None:
            return None
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (128, 128))

        # Superpixels
        segments = slic(img, n_segments=num_segments, compactness=10, sigma=1, start_label=0)
        num_nodes = np.max(segments) + 1

        # Node features: mean LAB color
        lab_img = rgb2lab(img)
        node_features = []
        for i in range(num_nodes):
            mask = segments == i
            if np.sum(mask) == 0:
                node_features.append([0,0,0])
            else:
                mean_color = np.mean(lab_img[mask], axis=0)
                node_features.append(mean_color)
        x = torch.tensor(node_features, dtype=torch.float)

        # Edges: adjacency of superpixels
        edges = set()
        for i in range(segments.shape[0]):
            for j in range(segments.shape[1]):
                current = segments[i,j]
                neighbors = []
                if i > 0: neighbors.append(segments[i-1,j])
                if i < segments.shape[0]-1: neighbors.append(segments[i+1,j])
                if j > 0: neighbors.append(segments[i,j-1])
                if j < segments.shape[1]-1: neighbors.append(segments[i,j+1])
                for n in neighbors:
                    if n != current:
                        edges.add((current,n))
        edge_index = torch.tensor(list(edges), dtype=torch.long).t().contiguous()

        y = torch.tensor([label], dtype=torch.long)
        return Data(x=x, edge_index=edge_index, y=y)
    except:
        return None

In [None]:
# STEP 3: Load dataset into Graphs
# ============================
labels_map = {"tumor": 1, "normal": 0}
graph_list = []

for root, dirs, files in os.walk(extract_dir):
    for file in files:
        if file.endswith((".jpg", ".png", ".jpeg")):
            label = 1 if "tumor" in root.lower() else 0
            g = image_to_graph(os.path.join(root, file), label)
            if g is not None:
                graph_list.append(g)

print("✅ Total Graphs Created:", len(graph_list))

In [None]:
# STEP 4: Train/Test Split
# ============================
train_graphs, test_graphs = train_test_split(graph_list, test_size=0.2, random_state=42)
train_loader = DataLoader(train_graphs, batch_size=8, shuffle=True)
test_loader = DataLoader(test_graphs, batch_size=8, shuffle=False)

In [None]:
# STEP 5: Define GNN Model
# ============================
class TumorGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(TumorGNN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = global_mean_pool(x, batch)
        return self.lin(x)


In [None]:
# STEP 6: Train Model
# ============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TumorGNN(in_channels=3, hidden_channels=64, out_channels=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = F.cross_entropy(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def test(loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data.x, data.edge_index, data.batch)
            pred = out.argmax(dim=1)
            correct += (pred == data.y).sum().item()
            total += data.num_graphs
    return correct / total

for epoch in range(1, 11):
    loss = train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f"Epoch {epoch}, Loss {loss:.4f}, Train Acc {train_acc:.4f}, Test Acc {test_acc:.4f}")


In [None]:
# STEP 7: Final Predictions
# ============================
print("\n✅ Final Model Test Accuracy:", test(test_loader))