In [1]:
# Install necessary packages
!pip install torch torch-geometric scikit-image torchvision matplotlib

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m61.4/63.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp (from torch-geometric)
  Downloading aiohttp-3.11.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->torch-geometric)
  Downloading aiohappyeyeballs-2.4.3-py3-none-any.whl.metadata (6.1 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->torch-geometric)
  Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->torch-geometric)
  Downloading frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x

In [2]:
import os
import torch
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import SAGEConv
from torchvision import transforms
from skimage.segmentation import slic
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from google.colab import drive

In [3]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Paths to data folders
carcinoma_pos_folder = '/content/drive/MyDrive/Colab Notebooks/OddSemester2024/MiniProject/Data/lung_scc'
carcinoma_neg_folder = '/content/drive/MyDrive/Colab Notebooks/OddSemester2024/MiniProject/Data/lung_n'

In [5]:
# Preprocessing and graph creation functions
def preprocess_image(image_path, target_size=(256, 256)):
    image = io.imread(image_path)
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(target_size),
        transforms.ToTensor()
    ])
    return transform(image).permute(1, 2, 0).numpy()

In [6]:
def image_to_graph(image_path, n_segments=200):
    image = preprocess_image(image_path, (256, 256))
    superpixels = slic(image, n_segments=n_segments, compactness=10)
    segments = np.unique(superpixels)

    # Create feature matrix
    features = np.array([np.mean(image[superpixels == seg], axis=0) for seg in segments])
    num_segments = len(segments)
    adjacency_matrix = np.zeros((num_segments, num_segments))

    # Adding edges for neighboring superpixels
    height, width = superpixels.shape[:2]
    for y in range(height):
        for x in range(width):
            segment = superpixels[y, x]
            neighbors = superpixels[max(y - 1, 0):min(y + 2, height), max(x - 1, 0):min(x + 2, width)]
            for neighbor in np.unique(neighbors):
                if segment < num_segments and neighbor < num_segments:
                    adjacency_matrix[segment, neighbor] = 1
                    adjacency_matrix[neighbor, segment] = 1

    # Convert adjacency matrix into edge list format
    edge_index = np.array(np.nonzero(adjacency_matrix))

    # Validate edge indices to avoid out-of-bounds errors
    max_index = features.shape[0] - 1
    edge_index = edge_index[:, (edge_index[0] <= max_index) & (edge_index[1] <= max_index)]

    return features, edge_index

In [7]:
# Dataset creation function
def create_dataset(folder, label, num_images=50):
    dataset = []
    images = os.listdir(folder)[:num_images]
    for image_name in images:
        image_path = os.path.join(folder, image_name)
        try:
            features, edge_index = image_to_graph(image_path)
            x = torch.tensor(features, dtype=torch.float)
            edge_index = torch.tensor(edge_index, dtype=torch.long)
            y = torch.tensor([label], dtype=torch.long)
            data = Data(x=x, edge_index=edge_index, y=y)
            dataset.append(data)
        except Exception as e:
            print(f"Error processing {image_path}: {e}")
    return dataset


In [8]:
# Load datasets and split into training and testing
pos_data = create_dataset(carcinoma_pos_folder, label=1, num_images=50)
neg_data = create_dataset(carcinoma_neg_folder, label=0, num_images=50)
dataset = pos_data + neg_data

In [9]:
# Split into train and test sets (80/20 split)
train_size = int(0.8 * len(dataset))
train_data, test_data = dataset[:train_size], dataset[train_size:]
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)



In [10]:
from torch_geometric.nn import GCNConv
# Define the GCN model
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(3, 16)
        self.conv2 = GCNConv(16, 2)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return F.log_softmax(pyg_nn.global_mean_pool(x, batch), dim=1)

model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


In [11]:
# Training function
def train():
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        output = model(batch)
        loss = F.nll_loss(output, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

In [12]:
# Evaluation function
def evaluate(loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in loader:
            output = model(batch)
            pred = output.argmax(dim=1)
            correct += pred.eq(batch.y).sum().item()
            total += batch.y.size(0)
    return correct / total

In [13]:
import os
import torch
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges
from torchvision import transforms
from skimage.segmentation import slic
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
import torch_geometric.nn as pyg_nn

In [14]:
# Train the model
for epoch in range(10):
    train_loss = train()
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}")

Epoch 1, Train Loss: 0.7272
Epoch 2, Train Loss: 0.6415
Epoch 3, Train Loss: 0.6264
Epoch 4, Train Loss: 0.6050
Epoch 5, Train Loss: 0.5843
Epoch 6, Train Loss: 0.5676
Epoch 7, Train Loss: 0.5467
Epoch 8, Train Loss: 0.5242
Epoch 9, Train Loss: 0.5030
Epoch 10, Train Loss: 0.4776


In [15]:
# Evaluate on test set
accuracy = evaluate(test_loader)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 80.00%


In [19]:
def predict(loader):
    model.eval()  # Set the model to evaluation mode
    predictions = []
    with torch.no_grad():
        for batch in loader:
            output = model(batch)  # Get model output
            pred = output.argmax(dim=1)  # Get the predicted class (0 or 1)
            predictions.extend(pred.cpu().numpy())  # Collect predictions

    return predictions

# Use the predict function on the test loader
test_predictions = predict(test_loader)

# Print predictions
print("Predictions on test data:", test_predictions)
print(test_predictions)
print(test_predictions.count(0))
print(test_predictions.count(1))

Predictions on test data: [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
[0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
16
4
