In [6]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sentence_transformers import SentenceTransformer
from qiskit.circuit.library import ZZFeatureMap
from qiskit import QuantumCircuit
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.algorithms.classifiers import QSVC
from sklearn.metrics import accuracy_score
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"


# -------------------------------
# 1. Load your dataset
# -------------------------------
df = pd.read_csv(r"D:\fastapi_infosys\edu_policies\datasets\education_policies.csv")

# Preprocess text embeddings
df["text_for_nlp"] = (
    df["title"].astype(str) + ". " +
    df["full_text"].astype(str) + ". Stakeholders: " +
    df["stakeholders"].astype(str)
).str.lower()

# Create a binary label: Implemented=1, else=0
df["label"] = (df["status"] == "Implemented").astype(int)


# -------------------------------
# 2. Sentence embeddings
# -------------------------------
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
X = embed_model.encode(df["text_for_nlp"].tolist())
y = df["label"].values

# -------------------------------
# 3. Train/test split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------------------
# 4. Scale + PCA Dimensionality Reduction
# -------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reduce to small number of features for quantum kernel
n_qubits = 6 
pca = PCA(n_components=n_qubits)
X_train_reduced = pca.fit_transform(X_train_scaled)
X_test_reduced = pca.transform(X_test_scaled)

# -------------------------------
# 5. Quantum Kernel setup
# -------------------------------
n_qubits = X_train_reduced.shape[1]
feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=1)  # feature map
quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)  

# -------------------------------
# 6. QSVC training
# -------------------------------
qsvc = QSVC(quantum_kernel=quantum_kernel)
print(" Training QSVC...")
qsvc.fit(X_train_reduced, y_train)

# -------------------------------
# 7. Evaluate
# -------------------------------
y_pred = qsvc.predict(X_test_reduced)
acc = accuracy_score(y_test, y_pred)
print(f"QSVC Accuracy: {acc:.3f}")

# -------------------------------
# 8. Save model + kernel matrix
# -------------------------------
joblib.dump(qsvc, "quantum_policy_qsvc_model.pkl")
joblib.dump(quantum_kernel.evaluate(X_train_reduced), "quantum_policy_kernel_matrix.pkl")
joblib.dump(scaler, "policy_scaler.pkl")
joblib.dump(pca, "policy_pca.pkl")
print(" Quantum model, kernel matrix, and scaler saved!")
print(acc)

 Training QSVC...
QSVC Accuracy: 0.770
 Quantum model, kernel matrix, and scaler saved!
0.77
