# Quantum Machine Learning (QML) — Full end-to-end workflow

Goal: Use a variational quantum circuit (a small QML model) to classify Iris flower species.
This follows your ML steps with the quantum-specific steps (encoding, quantum circuit, hybrid training) included.

# Problem statement

Use a quantum machine learning model (variational quantum circuit) to classify Iris flowers (Setosa vs Versicolor). For demonstration we will convert the multiclass Iris dataset to a binary problem (class 0 vs class 1) so training on a small QPU/simulator is straightforward.

# Import basic libraries

In [None]:
# Classical libs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Scikit-learn for classical preprocessing and evaluation
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# PennyLane for quantum circuits
import pennylane as qml
from pennylane import numpy as pnp

# Load data

In [None]:
iris = load_iris(as_frame=True)
data = iris.frame.copy()
data['target'] = iris.target
data.head()

# Domain analysis

The Iris dataset records sepal length, sepal width, petal length, petal width for three species. For QML demonstration we will convert this into a binary classification problem: Setosa (target 0) vs Versicolor (target 1). This keeps the circuit and training simple while illustrating QML steps. In real applications you can extend to multiclass.

# Basic checks

In [None]:
print("Shape:", data.shape)
print("Columns:", data.columns.tolist())
print(data.info())
print(data.describe().T)
print("Missing values:\n", data.isnull().sum())

# Exploratory data analysis

In [None]:
sns.pairplot(data.loc[data['target'].isin([0,1]), ['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)','target']],
             hue='target')
plt.show()

# Quick correlation heatmap
sns.heatmap(data.iloc[:, :4].corr(), annot=True)
plt.show()

Insights: Petal length/width are very separable for Setosa vs Versicolor — good features for classification.

# Feature engineering

In [None]:
#Filter dataset to classes 0 and 1.

#Use PCA to reduce dimensions to match available qubits (e.g., 2 features → 2 qubits).
# Quantum circuits with few qubits are easier to simulate.

In [None]:
# Filter to classes 0 and 1
df = data[data['target'].isin([0,1])].copy()
X = df.iloc[:, :4].values
y = df['target'].values

# Encode labels (already 0/1) - keep as-is
np.unique(y)

# Data preprocessing

In [None]:
# Scale features to [0, 1] (helpful for angle/amplitude embedding).

# Use PCA to reduce to 2 features (2 qubits).

In [None]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reduce to 2 components for two qubits
pca = PCA(n_components=2, random_state=42)
X_pca = pca.fit_transform(X_scaled)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_pca, y, test_size=0.3, random_state=42, stratify=y
)

print("Train shape:", X_train.shape, "Test shape:", X_test.shape)

### Quantum-specific step — choose encoding and device

#### We will use angle encoding (map each feature to a rotation angle) and a small variational ansatz with entanglement. Use PennyLane default.qubit simulator.

In [None]:
n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)

# Model building — define variational circuit and classifier

In [None]:
# An AngleEmbedding to encode classical features into rotation angles on qubits.

# A variational layer (parametrized rotations + entangling gates).

# Measurement returns expectation of PauliZ on first qubit; map expectation to class label.

In [None]:
# Number of variational layers
n_layers = 3

def variational_ansatz(weights, x):
    # Encode classical data x (length 2) into qubits with AngleEmbedding
    qml.AngleEmbedding(x, wires=range(n_qubits), rotation='Y')
    # Variational layers: layers of single-qubit rotations and entanglers
    qml.StronglyEntanglingLayers(weights, wires=range(n_qubits))

@qml.qnode(dev, interface='autograd')
def qnode(weights, x):
    variational_ansatz(weights, x)
    # Measure expectation of PauliZ on wire 0
    return qml.expval(qml.PauliZ(0))

In [None]:
# We need initial weights:

In [None]:
# shape for StronglyEntanglingLayers: (n_layers, n_wires, 3)
weights_shape = (n_layers, n_qubits, 3)
weights = pnp.random.normal(scale=0.1, size=weights_shape, requires_grad=True)

#### Define helper to map expectation to class probability/label:

* Expectation is in [-1, 1]. Map to probability via (1 - exp)/2 or a sigmoid on expectation.

In [None]:
def exp_to_prob(expval):
    # Map expectation [-1,1] to probability of class 1
    return (1 - expval) / 2.0

def predict_label(weights, x):
    expval = qnode(weights, x)
    prob1 = exp_to_prob(expval)
    return 1 if prob1 >= 0.5 else 0

# Training — hybrid quantum-classical training

We define a mean squared error loss using expectation mapped to labels and optimize weights using an optimizer (PennyLane Adam).

In [None]:
def cost(weights, X, Y):
    preds = [exp_to_prob(qnode(weights, x)) for x in X]
    preds = pnp.array(preds)
    Y = pnp.array(Y, dtype=float)
    return pnp.mean((preds - Y)**2)

opt = qml.AdamOptimizer(stepsize=0.1)
epochs = 40
batch_size = 8

weights_opt = weights.copy()

for epoch in range(1, epochs + 1):
    # Mini-batch SGD
    perm = np.random.permutation(len(X_train))
    X_shuffled = X_train[perm]
    Y_shuffled = y_train[perm]
    for i in range(0, len(X_train), batch_size):
        X_batch = X_shuffled[i:i+batch_size]
        Y_batch = Y_shuffled[i:i+batch_size]
        weights_opt = opt.step(lambda w: cost(w, X_batch, Y_batch), weights_opt)

    # training loss
    train_loss = cost(weights_opt, X_train, y_train)
    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch:2d} — Train loss: {train_loss:.4f}")

#### Predictions

In [None]:
y_pred = [predict_label(weights_opt, x) for x in X_test]
y_pred = np.array(y_pred)

#### Evaluation

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification report:\n", classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
print("Confusion matrix:\n", cm)

# Optional: show heatmap for confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

Interpretation: Compare accuracy with a classical baseline (e.g., logistic regression) to see whether the QML approach performs comparably on this small dataset.

In [None]:
# Classical baseline quick test:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression().fit(X_train, y_train)
y_clf = clf.predict(X_test)
print("Classical logistic regression accuracy:", accuracy_score(y_test, y_clf))

### Conclusion

* The pipeline shows a full ML workflow adapted for QML:

    * standard data steps (EDA, preprocessing, splitting)

    * quantum-specific steps (encoding → quantum circuit → hybrid training)

* For small toy datasets like Iris, QML can match classical baselines in some experiments, but the main value of QML will appear for tasks or models where quantum advantages apply.

* Important hyperparameters: number of qubits, encoding method, circuit depth (number of layers), optimizer and training epochs.

In [None]:
# Full script (compact)

# Below is a single script that bundles the above steps — copy-paste and run if PennyLane and scikit-learn are installed.

In [None]:
# qml_iris_example.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import pennylane as qml
from pennylane import numpy as pnp

# Load data
iris = load_iris(as_frame=True)
data = iris.frame.copy()
data['target'] = iris.target

# Binary classification: class 0 vs 1
df = data[data['target'].isin([0,1])].copy()
X = df.iloc[:, :4].values
y = df['target'].values

# Preprocess
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=2, random_state=42)
X_pca = pca.fit_transform(X_scaled)

X_train, X_test, y_train, y_test = train_test_split(
    X_pca, y, test_size=0.3, random_state=42, stratify=y
)

# Quantum setup
n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)
n_layers = 3

def variational_ansatz(weights, x):
    qml.AngleEmbedding(x, wires=range(n_qubits), rotation='Y')
    qml.StronglyEntanglingLayers(weights, wires=range(n_qubits))

@qml.qnode(dev, interface='autograd')
def qnode(weights, x):
    variational_ansatz(weights, x)
    return qml.expval(qml.PauliZ(0))

weights_shape = (n_layers, n_qubits, 3)
weights = pnp.random.normal(scale=0.1, size=weights_shape, requires_grad=True)

def exp_to_prob(expval):
    return (1 - expval) / 2.0

def predict_label(weights, x):
    expval = qnode(weights, x)
    prob1 = exp_to_prob(expval)
    return 1 if prob1 >= 0.5 else 0

def cost(weights, X, Y):
    preds = [exp_to_prob(qnode(weights, x)) for x in X]
    preds = pnp.array(preds)
    Y = pnp.array(Y, dtype=float)
    return pnp.mean((preds - Y)**2)

opt = qml.AdamOptimizer(stepsize=0.1)
epochs = 40
batch_size = 8
weights_opt = weights.copy()

for epoch in range(1, epochs + 1):
    perm = np.random.permutation(len(X_train))
    X_shuffled = X_train[perm]
    Y_shuffled = y_train[perm]
    for i in range(0, len(X_train), batch_size):
        X_batch = X_shuffled[i:i+batch_size]
        Y_batch = Y_shuffled[i:i+batch_size]
        weights_opt = opt.step(lambda w: cost(w, X_batch, Y_batch), weights_opt)

    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch:2d} — Train loss: {cost(weights_opt, X_train, y_train):.4f}")

# Evaluate
y_pred = [predict_label(weights_opt, x) for x in X_test]
print("QML Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d')
plt.show()

# Classical baseline
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression().fit(X_train, y_train)
y_cl = clf.predict(X_test)
print("Classical logistic regression accuracy:", accuracy_score(y_test, y_cl))