In [None]:
# ------------------- Install Required Packages -------------------
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install torch-geometric
!pip install pandas numpy scikit-learn networkx openpyxl pennylane matplotlib seaborn

In [None]:
# ------------------- Import Libraries -------------------
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import pennylane as qml


In [None]:
# ------------------- Load Dataset from Google Drive -------------------
from google.colab import drive
drive.mount('/content/drive')

labels_file = '/content/drive/MyDrive/TCGA-PANCAN-HiSeq-801x20531/labels.csv'
data_file   = '/content/drive/MyDrive/TCGA-PANCAN-HiSeq-801x20531/data.csv'

labels_df   = pd.read_csv(labels_file, index_col=0)
features_df = pd.read_csv(data_file, index_col=0)

print("Features shape:", features_df.shape)
print("Labels shape:", labels_df.shape)


In [None]:
# ------------------- Preprocess Data -------------------
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
import torch
# Encode labels
le = LabelEncoder()
y = le.fit_transform(labels_df.values.ravel())
y_tensor = torch.tensor(y, dtype=torch.long)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features_df)

# PCA for dimensionality reduction
n_components = min(200, X_scaled.shape[0])
pca = PCA(n_components=n_components)
X_reduced = pca.fit_transform(X_scaled)
X_tensor = torch.tensor(X_reduced, dtype=torch.float)

# Build graph using k-nearest neighbors
num_nodes = X_tensor.shape[0]
sim = cosine_similarity(X_tensor)
edges = []
k = 10
for i in range(num_nodes):
    neighbors = sim[i].argsort()[-(k+1):-1]
    for j in neighbors:
        edges.append([i,j])
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()

# PyG Data object
data = Data(x=X_tensor, edge_index=edge_index, y=y_tensor)
print(data)

# Train-test split
train_idx, test_idx = train_test_split(
    np.arange(len(y_tensor)), test_size=0.2, stratify=y_tensor, random_state=42
)

In [None]:
# ------------------- Quantum Layer -------------------
n_qubits = 8
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, interface="torch")
def quantum_circuit(inputs, weights):
    for i in range(n_qubits):
        qml.RY(inputs[i], wires=i)
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

weight_shapes = {"weights": (3, n_qubits)}
qlayer = qml.qnn.TorchLayer(quantum_circuit, weight_shapes)


In [None]:
# ------------------- Quantum GNN Model -------------------
class QuantumGNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim, n_qubits, qlayer):
        super().__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim1)
        self.conv2 = GCNConv(hidden_dim1, hidden_dim2)
        self.fc_reduce = torch.nn.Linear(hidden_dim2, n_qubits)
        self.q_layer = qlayer
        self.conv_out = GCNConv(n_qubits, output_dim)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = self.fc_reduce(x)

        # Apply quantum layer per node
        x_quantum = []
        for i in range(x.shape[0]):
            x_quantum.append(self.q_layer(x[i]))
        x = torch.stack(x_quantum)

        x = F.dropout(x, p=0.3, training=self.training)
        x = self.conv_out(x, edge_index)
        return x

input_dim = X_tensor.shape[1]
hidden_dim1 = 128
hidden_dim2 = 64
output_dim = len(np.unique(y))

model = QuantumGNN(input_dim, hidden_dim1, hidden_dim2, output_dim, n_qubits, qlayer)


In [None]:
# ------------------- Training -------------------
import time
from datetime import datetime

optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
epochs = 170

for epoch in range(epochs):
    epoch_start = time.time()
    timestamp_start = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out[train_idx], y_tensor[train_idx])
    loss.backward()
    optimizer.step()

    epoch_end = time.time()
    timestamp_end = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    print(f"[Start: {timestamp_start} | End: {timestamp_end}] Epoch {epoch+1}/{epochs}, "
          f"Loss: {loss.item():.4f}, Time: {epoch_end - epoch_start:.2f}s")


In [None]:
# ------------------- Evaluation -------------------
model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index)
    preds = out.argmax(dim=1)

    test_acc = accuracy_score(y_tensor[test_idx].numpy(), preds[test_idx].numpy())
    print(f"\nFinal Test Accuracy: {test_acc:.4f}")

    # Basic Confusion Matrix (numeric)
    cm = confusion_matrix(y_tensor[test_idx].numpy(), preds[test_idx].numpy())
    plt.figure(figsize=(6,6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title("Confusion Matrix (Numeric)")
    plt.show()


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Evaluate model
model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index)
    preds = out.argmax(dim=1)

    test_acc = accuracy_score(y_tensor[test_idx].numpy(), preds[test_idx].numpy())
    print(f"Final Test Accuracy: {test_acc:.4f}")

    # Confusion Matrix
    cm = confusion_matrix(y_tensor[test_idx].numpy(), preds[test_idx].numpy())
    plt.figure(figsize=(6,6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title("Confusion Matrix")
    plt.show()



In [None]:
# t-SNE Visualization
from sklearn.manifold import TSNE

# Get quantum embeddings (before final output layer)
model.eval()
with torch.no_grad():
    x = F.relu(model.conv1(data.x, data.edge_index))
    x = F.relu(model.conv2(x, data.edge_index))
    x = model.fc_reduce(x)

    # Apply quantum layer per node
    x_quantum = []
    for i in range(x.shape[0]):
        x_quantum.append(model.q_layer(x[i]))
    quantum_embeddings = torch.stack(x_quantum).numpy()  # shape: [num_nodes, n_qubits]

# t-SNE projection to 2D
tsne = TSNE(n_components=2, random_state=42)
emb_2d = tsne.fit_transform(quantum_embeddings)

# Plot
plt.figure(figsize=(8,6))
for label in np.unique(y):
    idx = np.where(y == label)
    plt.scatter(emb_2d[idx,0], emb_2d[idx,1], label=f'Class {label}', alpha=0.7)
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.title('t-SNE Visualization of Quantum GNN Node Embeddings')
plt.legend()
plt.show()
