In [None]:
import os
import json
import math
import random
from copy import deepcopy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# %%
# Quantum helpers

def pure_density(vec):
    v = np.array(vec, dtype=complex).reshape((-1,1))
    return v @ v.conj().T

def depolarizing_kraus(p):
    I = np.array([[1,0],[0,1]], dtype=complex)
    X = np.array([[0,1],[1,0]], dtype=complex)
    Y = np.array([[0,-1j],[1j,0]], dtype=complex)
    Z = np.array([[1,0],[0,-1]], dtype=complex)
    k0 = np.sqrt(max(0.0, 1 - 3*p/4)) * I
    k1 = np.sqrt(max(0.0, p/4)) * X
    k2 = np.sqrt(max(0.0, p/4)) * Y
    k3 = np.sqrt(max(0.0, p/4)) * Z
    return [k0, k1, k2, k3]

def apply_kraus(rho, kraus_list):
    out = np.zeros_like(rho, dtype=complex)
    for K in kraus_list:
        out += K @ rho @ K.conj().T
    out = (out + out.conj().T) / 2.0
    tr = np.trace(out)
    if np.abs(tr) > 1e-12:
        out = out / tr
    return out

def purity_of(rho):
    return float(np.real_if_close(np.trace(rho @ rho)))

def von_neumann_entropy(rho):
    vals = np.linalg.eigvalsh(rho)
    vals = np.clip(vals, 1e-12, None)
    return float(-np.sum(vals * np.log2(vals)))

# %%
# Models

class PFA:
    def __init__(self):
        self.initial = np.array([1.0,0.0])
        self.transitions = {}
        self.accepting = {0}
    def add_transition(self, symbol, matrix):
        self.transitions[symbol] = np.array(matrix, dtype=float)
    def recognize(self, s):
        p = self.initial.copy()
        for ch in s:
            if ch in self.transitions:
                p = self.transitions[ch] @ p
            p = np.clip(p, 0, None)
            ssum = p.sum()
            if ssum > 0:
                p = p / ssum
        return float(np.sum([p[i] for i in self.accepting]))

class OQFA:
    def __init__(self):
        self.rho0 = pure_density(np.array([1.0,0.0], dtype=complex))
        self.transitions = {}
        self.accepting = {0}
    def add_transition(self, symbol, U, depolarizing_p=0.0):
        self.transitions[symbol] = (np.array(U, dtype=complex), float(depolarizing_p))
    def recognize(self, s, return_rho=False):
        rho = self.rho0.copy()
        for ch in s:
            if ch in self.transitions:
                U, p = self.transitions[ch]
                rho = U @ rho @ U.conj().T
                if p > 0:
                    rho = apply_kraus(rho, depolarizing_kraus(p))
        p_accept = float(np.real_if_close(rho[0,0].real))
        if return_rho:
            return p_accept, rho
        return p_accept

# %%
# Dataset

intents = {
    'lights_on': ["turn on the lights", "lights on", "switch lights on", "turn lights on"],
    'lights_off': ["turn off the lights", "lights off", "switch lights off", "turn lights off"],
    'play_music': ["play music", "play some music", "start music", "play songs"],
    'stop_music': ["stop music", "pause music", "stop the music", "pause songs"],
    'call_contact': ["call mom", "call dad", "call Alice", "make a call to Bob"],
    'get_weather': ["what's the weather", "weather update", "what is the weather today", "tell me the weather"]
}

import re

def tokenize_command(cmd):
    cmd = cmd.lower()
    cmd = re.sub(r"[^\w\s]", "", cmd)
    return cmd.split()

all_rows = []
for label, phrases in intents.items():
    for ph in phrases:
        toks = tokenize_command(ph)
        all_rows.append((toks, label))
        if len(toks) > 1:
            all_rows.append((toks[:-1], label))
        if len(toks) > 0:
            t2 = toks.copy()
            t2[0] = 'please'
            all_rows.append((t2, label))
        if len(toks) > 2:
            t3 = toks.copy()
            random.shuffle(t3)
            all_rows.append((t3, label))

from collections import Counter
counter = Counter()
for toks,_ in all_rows:
    counter.update(toks)
vocab = [tok for tok,_ in counter.most_common(20)]
symbols = list('abcdefghijklmnopqrstuvwxyz')[:len(vocab)+1]
token_to_symbol = {tok: symbols[i] for i,tok in enumerate(vocab)}
token_to_symbol['<UNK>'] = symbols[len(vocab)]

def tokens_to_string(toks):
    return ''.join([token_to_symbol.get(t, token_to_symbol['<UNK>']) for t in toks])

rows = [{'sequence': tokens_to_string(toks), 'label': label, 'tokens': ' '.join(toks)}
        for toks,label in all_rows]

df = pd.DataFrame(rows).sample(frac=1, random_state=0).reset_index(drop=True)
df.head(20)

# %%
# Splits

train_df, temp_df = train_test_split(df, test_size=0.4, random_state=0, stratify=df['label'])
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=0, stratify=temp_df['label'])

print('Train size:', len(train_df), 'Val size:', len(val_df), 'Test size:', len(test_df))

# %%
# Builders

def build_baseline_pfa_for_vocab(df):
    pfa = PFA()
    alphabet = set(''.join(df['sequence'].tolist()))
    for sym in alphabet:
        pfa.add_transition(sym, [[0.6,0.4],[0.4,0.6]])
    return pfa

def build_param_oqfa_for_vocab(theta, depolarizing_p, df):
    U = np.array([[math.cos(theta), -math.sin(theta)],
                  [math.sin(theta),  math.cos(theta)]], dtype=complex)
    oq = OQFA()
    alphabet = set(''.join(df['sequence'].tolist()))
    for sym in alphabet:
        oq.add_transition(sym, U, depolarizing_p=depolarizing_p)
    return oq

# %%
# One-vs-rest training

target_intent = 'play_music'
train_labels = (train_df['label']==target_intent).astype(int).to_numpy()
val_labels = (val_df['label']==target_intent).astype(int).to_numpy()
test_labels = (test_df['label']==target_intent).astype(int).to_numpy()

train_seqs = train_df['sequence'].tolist()
val_seqs = val_df['sequence'].tolist()
test_seqs = test_df['sequence'].tolist()

theta_candidates = np.linspace(-math.pi/2, math.pi/2, 9)
noise_candidates = [0.0, 0.02, 0.05, 0.08]
threshold_candidates = [0.3, 0.4, 0.5, 0.6, 0.7]

records = []
best = None

for p in noise_candidates:
    for th in theta_candidates:
        oq = build_param_oqfa_for_vocab(th, p, df)
        acc_train = np.array([oq.recognize(s) for s in train_seqs])
        acc_val = np.array([oq.recognize(s) for s in val_seqs])
        for thr in threshold_candidates:
            preds_val = (acc_val >= thr).astype(int)
            f1 = f1_score(val_labels, preds_val, zero_division=0)
            acc = accuracy_score(val_labels, preds_val)
            try:
                auc = roc_auc_score(val_labels, acc_val)
            except Exception:
                auc = None
            records.append({'p':p, 'theta':th, 'threshold':thr,
                            'val_acc':acc, 'val_f1':f1, 'val_auc':auc})
            if best is None or f1 > best['score']:
                best = {'score':f1, 'p':p, 'theta':th,
                        'threshold':thr, 'model':deepcopy(oq)}

results_df = pd.DataFrame(records).sort_values(['val_f1'], ascending=False)
results_df.head(20)

# %%
# Evaluation

best_model = best['model']
best_params = {k:v for k,v in best.items() if k!='model'}

test_accepts = np.array([best_model.recognize(s) for s in test_seqs])
thr = best_params['threshold']
preds_test = (test_accepts >= thr).astype(int)

print('Best params:', best_params)
print('Test accuracy:', accuracy_score(test_labels, preds_test))
print('Test precision:', precision_score(test_labels, preds_test, zero_division=0))
print('Test recall:', recall_score(test_labels, preds_test, zero_division=0))
print('Test f1:', f1_score(test_labels, preds_test, zero_division=0))

# %%
# Plots

plt.figure(figsize=(6,4))
plt.hist(test_accepts[test_labels==0], bins=15, alpha=0.6, label='other')
plt.hist(test_accepts[test_labels==1], bins=15, alpha=0.6, label=target_intent)
plt.xlabel('OQFA acceptance')
plt.ylabel('Count')
plt.legend()
plt.title(f'Acceptance distribution for {target_intent}')
plt.show()

acc_vs_noise = results_df.groupby('p')['val_f1'].mean().reset_index()
plt.figure(figsize=(6,4))
plt.plot(acc_vs_noise['p'], acc_vs_noise['val_f1'], marker='o')
plt.xlabel('Depolarizing noise p')
plt.ylabel('Mean validation F1')
plt.title('Validation F1 vs noise')
plt.show()

pur_rows = []
for p in noise_candidates:
    oq_tmp = build_param_oqfa_for_vocab(best_params['theta'], p, df)
    purities = []
    for s in test_seqs:
        _, rho = oq_tmp.recognize(s, return_rho=True)
        purities.append(purity_of(rho))
    pur_rows.append({'p':p, 'purity':float(np.mean(purities))})

df_pur = pd.DataFrame(pur_rows)
plt.figure(figsize=(6,4))
plt.plot(df_pur['p'], df_pur['purity'], marker='o')
plt.xlabel('Depolarizing noise p')
plt.ylabel('Mean purity')
plt.title('Purity vs noise')
plt.show()

# %%
# Save results

outdir = './oqfa_intent_outputs'
os.makedirs(outdir, exist_ok=True)

results_df.to_csv(os.path.join(outdir, 'oqfa_intent_grid_search.csv'), index=False)

pd.DataFrame({
    'sequence':test_seqs,
    'acceptance':test_accepts,
    'label':test_df['label'].tolist(),
    'pred':preds_test
}).to_csv(os.path.join(outdir, 'oqfa_intent_test_preds.csv'), index=False)

summary = {
    'best_params':best_params,
    'test_metrics':{
        'accuracy':float(accuracy_score(test_labels, preds_test)),
        'precision':float(precision_score(test_labels, preds_test, zero_division=0)),
        'recall':float(recall_score(test_labels, preds_test, zero_division=0)),
        'f1':float(f1_score(test_labels, preds_test, zero_division=0))
    }
}

with open(os.path.join(outdir, 'oqfa_intent_summary.json'), 'w') as f:
    json.dump(summary, f, indent=2)

print('Saved outputs to', outdir)
