In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import pennylane as qml
from pennylane import numpy as npqml

In [None]:
processed_csv = "../Datasets/Processed/gaia_features_20.csv"
if not os.path.exists(processed_csv):
    raise FileNotFoundError(f"{processed_csv} not found. Run gaia_feature_selection.py first.")

df = pd.read_csv(processed_csv)
print("Loaded dataset shape:", df.shape)

# Features and labels
X = df.drop(columns=["label"]).values
y = df["label"].values

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert to Torch tensors
X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_t = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
y_test_t = torch.tensor(y_test, dtype=torch.long)

In [None]:
n_qubits = 4  # can increase depending on feature encoding
n_layers = 2

dev = qml.device("default.qubit", wires=n_qubits)

def angle_embedding(x, wires):
    """Encode features into qubit rotations (truncating/padding as needed)."""
    x = np.array(x)
    # Truncate or pad features to match n_qubits
    if len(x) < len(wires):
        x = np.pad(x, (0, len(wires) - len(x)))
    elif len(x) > len(wires):
        x = x[:len(wires)]
    for i, wire in enumerate(wires):
        qml.RY(x[i], wires=wire)

@qml.qnode(dev, interface="torch")
def circuit(inputs, weights):
    angle_embedding(inputs, wires=range(n_qubits))
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

In [None]:
weight_shapes = {"weights": (n_layers, n_qubits)}
qlayer = qml.qnn.TorchLayer(circuit, weight_shapes)

class VQCClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.qlayer = qlayer
        self.fc = nn.Linear(n_qubits, 2)  # 2 classes: nearby vs distant

    def forward(self, x):
        x = self.qlayer(x)
        return self.fc(x)

model = VQCClassifier()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

In [None]:
n_epochs = 50
for epoch in range(n_epochs):
    optimizer.zero_grad()
    outputs = model(X_train_t)
    loss = loss_fn(outputs, y_train_t)
    loss.backward()
    optimizer.step()
    if epoch % 5 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

In [None]:
with torch.no_grad():
    y_pred_t = model(X_test_t)
    y_pred = torch.argmax(y_pred_t, dim=1).numpy()

acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print(f"\nTest Accuracy: {acc:.4f}")
print("Confusion Matrix:")
print(cm)

In [None]:
os.makedirs("../Results", exist_ok=True)

# Save predictions
pred_df = pd.DataFrame({
    "y_true": y_test,
    "y_pred": y_pred
})
pred_df.to_csv("../Results/vqc_predictions.csv", index=False)
print("Predictions saved to Results/vqc_predictions.csv")

# Save model weights
torch.save(model.state_dict(), "../Results/vqc_model.pt")
print("Model weights saved to Results/vqc_model.pt")