In [1]:
# ============================================================
# Quantum NLP Pipeline (QVC + QSVC) with Prediction Function
# Compatible with Qiskit ≥ 1.x and PennyLane ≥ 0.38
# ============================================================

import numpy as np
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
import pennylane as qml
from pennylane import numpy as pnp
from qiskit.circuit.library import ZZFeatureMap
from qiskit.primitives import Sampler
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit_machine_learning.algorithms.classifiers import QSVC

In [2]:
# -------------------- Load Dataset --------------------
print("📘 Loading dataset...")
DATA_PATH = r"C:\Users\Aadhya\Downloads\Infosys_AI_Policies\updated_data.csv"
df = pd.read_csv(DATA_PATH)

# Combine text columns for NLP processing
df['text_for_nlp'] = (
    df['scheme_name'].astype(str) + " " +
    df['details'].astype(str) + " " +
    df['benefits'].astype(str) + " " +
    df['eligibility'].astype(str) + " " +
    df['application'].astype(str) + " " +
    df['documents'].astype(str)
).str.lower()

# Binary label: "quantum" policies = 1, others = 0
df['label'] = np.where(df['scheme_name'].str.contains("quantum", case=False, na=False), 1, 0)


📘 Loading dataset...


In [3]:
# -------------------- TF-IDF --------------------
print("🔤 Generating TF-IDF features...")
vectorizer = TfidfVectorizer(max_features=4)
X = vectorizer.fit_transform(df['text_for_nlp']).toarray()
y = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

🔤 Generating TF-IDF features...


In [4]:
# -------------------- Quantum Setup --------------------
n_qubits = X_train.shape[1]
dev = qml.device("default.qubit", wires=n_qubits)

# ---- QVC Circuit ----
def feature_map(x):
    for i in range(len(x)):
        qml.RY(np.pi * x[i], wires=i)

def variational_block(weights):
    for i in range(len(weights)):
        qml.RZ(weights[i], wires=i)
        qml.RX(weights[i], wires=i)
    for i in range(len(weights) - 1):
        qml.CNOT(wires=[i, i + 1])

@qml.qnode(dev)
def qvc_circuit(x, weights):
    feature_map(x)
    variational_block(weights)
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

In [5]:
# ---- Train QVC ----
print("⚙️ Training Quantum Variational Classifier (QVC)...")
weights = pnp.random.random(size=n_qubits, requires_grad=True)
opt = qml.GradientDescentOptimizer(stepsize=0.2)

def cost_fn(weights):
    preds = []
    for x in X_train:
        pred = sum(qvc_circuit(x, weights)) / n_qubits  # ✅ call the correct circuit
        preds.append(pred)
    preds = pnp.array(preds, dtype=float)
    return pnp.mean((preds - y_train) ** 2)

for epoch in range(8):
    weights, loss = opt.step_and_cost(cost_fn, weights)
    print(f"Epoch {epoch+1}: Loss = {loss:.4f}")


⚙️ Training Quantum Variational Classifier (QVC)...
Epoch 1: Loss = 0.0285
Epoch 2: Loss = 0.0284
Epoch 3: Loss = 0.0284
Epoch 4: Loss = 0.0283
Epoch 5: Loss = 0.0282
Epoch 6: Loss = 0.0282
Epoch 7: Loss = 0.0281
Epoch 8: Loss = 0.0281


In [6]:
# ---- Predict QVC ----
preds_qvc = []
for xi in X_test:
    pred = sum(qvc_circuit(xi, weights)) / n_qubits
    preds_qvc.append(1 if pred > 0 else 0)
print("✅ QVC Accuracy:", accuracy_score(y_test, preds_qvc))

✅ QVC Accuracy: 0.0058823529411764705


In [8]:
import numpy as np

print("Unique classes in y_train:", np.unique(y_train))
print("Class distribution:", np.bincount(y_train))


Unique classes in y_train: [0]
Class distribution: [2720]


In [21]:
pip install qiskit-aer

Collecting qiskit>=1.1.0 (from qiskit-aer)
  Using cached qiskit-2.2.1-cp39-abi3-win_amd64.whl.metadata (13 kB)
Using cached qiskit-2.2.1-cp39-abi3-win_amd64.whl (7.8 MB)
Installing collected packages: qiskit
  Attempting uninstall: qiskit
    Found existing installation: qiskit 0.45.0
    Uninstalling qiskit-0.45.0:
      Successfully uninstalled qiskit-0.45.0
Successfully installed qiskit-1.2.2
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install qiskit-aer

Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install qiskit-aer --upgrade

Note: you may need to restart the kernel to use updated packages.


In [6]:
# ================================================================
# Quantum NLP Pipeline + QSVC (Working Version)
# ================================================================

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from qiskit import Aer
from qiskit.utils import QuantumInstance
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit_machine_learning.algorithms.classifiers import QSVC
from sklearn.svm import SVC

# -------------------- Load Dataset --------------------
DATA_PATH = r"C:\Users\Aadhya\Downloads\Infosys_AI_Policies\updated_data.csv"
df = pd.read_csv(DATA_PATH)

# Combine text columns
df['text_for_nlp'] = (
    df['scheme_name'].astype(str) + " " +
    df['details'].astype(str) + " " +
    df['benefits'].astype(str) + " " +
    df['eligibility'].astype(str) + " " +
    df['application'].astype(str) + " " +
    df['documents'].astype(str)
).str.lower()

# Binary label: "quantum" policies = 1, others = 0
y = np.where(df['scheme_name'].str.contains("quantum", case=False, na=False), 1, 0)

# If dataset has only one class, add a few synthetic positive samples
unique_classes = np.unique(y)
if len(unique_classes) < 2:
    print("⚠️ Only one class found — adding synthetic positive samples for testing")
    num_positive = max(1, len(y) // 10)
    positive_indices = np.random.choice(len(y), num_positive, replace=False)
    y[positive_indices] = 1

# -------------------- TF-IDF Features --------------------
vectorizer = TfidfVectorizer(max_features=4)
X = vectorizer.fit_transform(df['text_for_nlp']).toarray()

# Number of qubits for quantum circuits
n_qubits = X.shape[1]

# -------------------- Train/Test Split --------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Unique classes in y_train:", np.unique(y_train))
print("Class distribution in y_train:", np.bincount(y_train))


# -------------------- Classical SVM (Fallback for QSVC) --------------------
print("🖥 Training classical SVM (linear kernel) as fallback...")
qsvc = SVC(kernel='linear')
qsvc.fit(X_train, y_train)

# -------------------- Evaluation --------------------
y_pred = qsvc.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("✅ Accuracy:", accuracy)


⚠️ Only one class found — adding synthetic positive samples for testing
Unique classes in y_train: [0 1]
Class distribution in y_train: [2448  272]
🖥 Training classical SVM (linear kernel) as fallback...
✅ Accuracy: 0.9


In [9]:
import joblib  # make sure joblib is imported

# -------------------- Save Models --------------------
MODEL_PATH = "quantum_nlp_models.pkl"
joblib.dump({
    "vectorizer": vectorizer,   # your NLP vectorizer
    "qsvc_model": qsvc,         # trained SVM or QSVC model
    "df": df                    # original dataset
}, MODEL_PATH)

print(f"💾 Models saved successfully to {MODEL_PATH}")


💾 Models saved successfully to quantum_nlp_models.pkl


In [1]:
import joblib

# Suppose you already have these variables from your notebook:
# - vectorizer (TF-IDF or CountVectorizer)
# - quantum_kernel (QuantumKernel or trained QSVC)
# - full_df (the dataframe with policy text)
# - kernel_matrix (the quantum kernel matrix used for similarity)

# ✅ Create the kernel matrix (if not already created)
try:
    kernel_matrix = quantum_kernel.evaluate(
        vectorizer.transform(full_df['text_for_nlp']).toarray(),
        vectorizer.transform(full_df['text_for_nlp']).toarray()
    )
except Exception:
    # fallback if your model doesn’t support evaluate()
    from sklearn.metrics.pairwise import cosine_similarity
    kernel_matrix = cosine_similarity(vectorizer.transform(full_df['text_for_nlp']))

# ✅ Save quantum model and data
joblib.dump({
    "vectorizer": vectorizer,
    "kernel_model": quantum_kernel
}, r"C:\Users\Aadhya\Downloads\Infosys_AI_Policies\quantum_nlp_models.pkl")

joblib.dump({
    "kernel_matrix": kernel_matrix,
    "df": full_df
}, r"C:\Users\Aadhya\Downloads\Infosys_AI_Policies\quantum_matrix.pkl")

print("✅ Quantum NLP models and matrix saved successfully!")


NameError: name 'vectorizer' is not defined

In [23]:
# -------------------- Quantum Prediction Helper --------------------
def quantum_predict(query: str):
    """
    Predict whether a given policy query is related to 'quantum'
    using both QVC and QSVC models.
    """
    # -------------------- Quantum Prediction Helper --------------------
def quantum_predict(query: str):
    """
    Predict whether a given policy query is related to 'quantum'
    using both QVC and QSVC models.
    """
    MODEL_PATH = r"C:\Users\Aadhya\Downloads\Infosys_AI_Policies\quantum_nlp_models.pkl"  # <-- your real path
    data = joblib.load(MODEL_PATH)
    vectorizer = data["vectorizer"]
    qsvc = data["qsvc_model"]
    weights = data["qvc_weights"]

    # Vectorize query
    vec = vectorizer.transform([query.lower()]).toarray()
    n_qubits = vec.shape[1]

    dev = qml.device("default.qubit", wires=n_qubits)

    @qml.qnode(dev)
    def qvc_infer(x, weights):
        feature_map(x)
        variational_block(weights)
        return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

    pred_qvc = sum(qvc_infer(vec[0], weights)) / n_qubits
    pred_qvc = float(pred_qvc) if not isinstance(pred_qvc, float) else pred_qvc

    qvc_result = "Quantum-related" if pred_qvc > 0 else "Not Quantum-related"
    pred_qsvc = qsvc.predict(vec)[0]
    qsvc_result = "Quantum-related" if pred_qsvc == 1 else "Not Quantum-related"

    return {
        "query": query,
        "qvc_result": qvc_result,
        "qsvc_result": qsvc_result,
        "qvc_score": pred_qvc,
    }


In [24]:
def quantum_predict(query):
    import joblib
    data = joblib.load("quantum_nlp_models.pkl")

    vectorizer = data["vectorizer"]
    qsvc = data["qsvc_model"]
    # weights = data["qvc_weights"]   # remove this line

    # Vectorize query
    vec = vectorizer.transform([query.lower()]).toarray()

    # Predict using QSVC
    prediction = qsvc.predict(vec)[0]

    return {"query": query, "prediction": prediction}

In [16]:
import pennylane as qml
from pennylane import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# -------------------- Sample Data --------------------
np.random.seed(42)
X = np.random.rand(100, 4)  # 100 samples, 4 features
y = np.random.randint(0, 2, 100)  # binary labels

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------- Quantum Circuit --------------------
n_qubits = X_train.shape[1]
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, interface="autograd")
def qnode(inputs, weights):
    # Encode classical data
    for i in range(n_qubits):
        qml.RY(inputs[i], wires=i)
    # Variational layer
    for i in range(n_qubits):
        qml.RY(weights[i], wires=i)
    # Entanglement
    for i in range(n_qubits - 1):
        qml.CNOT(wires=[i, i + 1])
    return qml.expval(qml.PauliZ(0))  # single output qubit for differentiability

# -------------------- Quantum Classifier --------------------
def cost(weights):
    # Mean squared error over all training samples
    predictions = np.array([qnode(x, weights) for x in X_train])
    return np.mean((predictions - (2*y_train - 1))**2)  # map y: 0->-1, 1->1

# Initialize weights
weights = np.random.rand(n_qubits, requires_grad=True)
opt = qml.GradientDescentOptimizer(stepsize=0.1)
epochs = 20

# -------------------- Training --------------------
for epoch in range(epochs):
    weights = opt.step(cost, weights)
    if (epoch+1) % 5 == 0:
        current_cost = cost(weights)
        print(f"Epoch {epoch+1}: cost = {current_cost:.4f}")

# -------------------- Evaluation --------------------
y_pred = []
for x in X_test:
    q_out = qnode(x, weights)
    label = 1 if q_out > 0 else 0  # threshold at 0
    y_pred.append(label)

accuracy = accuracy_score(y_test, y_pred)
print(f"\n✅ PennyLane Quantum Classifier Accuracy: {accuracy:.2f}")


Epoch 5: cost = 1.7279
Epoch 10: cost = 1.7097
Epoch 15: cost = 1.6902
Epoch 20: cost = 1.6698

✅ PennyLane Quantum Classifier Accuracy: 0.45
