In [None]:
import pennylane as qml
import pennylane.numpy as np
import numpy as onp
from pennylane.optimize import AdamOptimizer
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import os

onp.random.seed(0)

# Core configuration
WINDOW_SIZE = 2                     # Number of past moves used as input
N_ROUNDS = 200                      # Moves generated per simulated player
STRATEGIES = ['random', 'rock_biased', 'cycle_RPS', 'copycat', 'frequency_based']
SYMBOLS = ['R', 'P', 'S']
SYM2I = {s: i for i, s in enumerate(SYMBOLS)}

# Single-qubit mixed-state simulator (supports noise)
dev = qml.device("default.mixed", wires=1)

# ==============================
#  Quantum QFA Evaluation
# ==============================
def apply_euler_unitary(params):
    """Applies a single-qubit Euler rotation U(α,β,γ)."""
    alpha, beta, gamma = params
    qml.RZ(alpha, wires=0)
    qml.RY(beta, wires=0)
    qml.RZ(gamma, wires=0)

@qml.qnode(dev, interface="autograd")
def oqfa_accept_prob(flat_params, window_symbol_indices, p_noise=0.0):
    """
    Computes acceptance probability of the Quantum Finite Automaton (QFA)
    after processing a window of move-symbols.
    """
    params = np.reshape(flat_params, (3,3))

    # Sequentially apply class-conditioned unitaries based on window symbols
    for idx in window_symbol_indices:
        apply_euler_unitary(params[idx])
        if p_noise > 0.0:
            qml.DepolarizingChannel(p_noise, wires=0)

    # Acceptance = ⟨1|ρ|1⟩
    return qml.expval(qml.Projector([1,0], wires=0))

# ==============================
#  Data Generation
# ==============================
def simulate_player(strategy, n_rounds=N_ROUNDS, seed=None):
    """Simulates a player move sequence according to a given strategy."""
    rng = onp.random.RandomState(seed) if seed is not None else onp.random.RandomState()
    moves = []

    if strategy == 'random':
        for _ in range(n_rounds):
            moves.append(rng.choice(SYMBOLS))

    elif strategy == 'rock_biased':
        probs = [0.6, 0.2, 0.2]
        for _ in range(n_rounds):
            moves.append(rng.choice(SYMBOLS, p=probs))

    elif strategy == 'cycle_RPS':
        seq = ['R', 'P', 'S']
        for i in range(n_rounds):
            moves.append(seq[i % 3])

    elif strategy == 'copycat':
        # Repeat last move; sometimes switch randomly
        prev = rng.choice(SYMBOLS)
        moves.append(prev)
        for _ in range(1, n_rounds):
            if rng.rand() < 0.05:
                prev = rng.choice(SYMBOLS)
            moves.append(prev)

    elif strategy == 'frequency_based':
        # Predict majority move: biased to repeat most frequent
        history = []
        for _ in range(n_rounds):
            if len(history) < 5:
                move = rng.choice(SYMBOLS)
            else:
                counts = {s: history.count(s) for s in SYMBOLS}
                sorted_moves = sorted(SYMBOLS, key=lambda s: counts[s], reverse=True)
                move = sorted_moves[0] if rng.rand() < 0.8 else rng.choice(SYMBOLS)
            history.append(move)
            moves.append(move)

    return moves

def build_windows_int(moves, window_size=WINDOW_SIZE):
    """
    Converts move sequences into sliding windows (X) and next-move labels (y).
    Converts symbols to integer indices.
    """
    X, y = [], []
    for i in range(window_size, len(moves)):
        win = moves[i-window_size:i]
        X.append([SYM2I[c] for c in win])
        y.append(moves[i])
    return X, y

# ==============================
#  Model Functions
# ==============================
def init_params_random(seed=None):
    """Randomly initializes Euler-angle parameters for QFA (3 classes × 3 symbols × 3 rotations)."""
    rng = onp.random.RandomState(seed)
    params = onp.zeros((3,3,3))
    for c in range(3):
        for s in range(3):
            params[c,s,0] = rng.uniform(-onp.pi, onp.pi)
            params[c,s,1] = rng.uniform(0.0, onp.pi)
            params[c,s,2] = rng.uniform(-onp.pi, onp.pi)
    return np.array(params, requires_grad=True)

def flat_params_class(params_c):
    """Flattens parameter matrix so it can be fed into a QNode."""
    return np.reshape(params_c, (-1,))

def acceptance_scores(params, window, p_noise=0.0):
    """Evaluates QFA acceptance scores for all 3 output classes."""
    scores = []
    for c in range(3):
        fp = flat_params_class(params[c])
        w = np.array(window)
        s = oqfa_accept_prob(fp, w, p_noise=p_noise)
        scores.append(s)
    return np.array(scores)

def softmax(v):
    e = np.exp(v - np.max(v))
    return e / np.sum(e)

def cross_entropy_loss(params, Xb, yb, p_noise=0.0):
    """
    Cross-entropy loss between QFA predictions and true labels.
    The model outputs acceptance scores → softmax probabilities.
    """
    loss = 0.0
    for w, yt in zip(Xb, yb):
        scores = acceptance_scores(params, w, p_noise)
        probs = softmax(scores)
        t = SYMBOLS.index(yt)
        loss -= np.log(np.clip(probs[t], 1e-9, 1.0))
    return loss / len(Xb)

# ==============================
#  Prediction
# ==============================
def predict(params, Xb, p_noise=0.0):
    """Returns predicted R/P/S for each input window."""
    preds = []
    for w in Xb:
        scores = acceptance_scores(params, w, p_noise)
        preds.append(SYMBOLS[int(onp.argmax(scores))])
    return preds

# ==============================
#  Training Loop
# ==============================
def train(params, X_train, y_train, X_val, y_val,
          n_epochs=10, batch_size=256, lr=0.001, p_noise_train=0.02):
    """
    Trains the QFA by minimizing cross-entropy loss using Adam optimizer.
    Noise is included during training (p_noise_train) for robustness.
    """
    opt = AdamOptimizer(stepsize=lr)
    N = len(X_train)

    for ep in range(1, n_epochs+1):
        idx = onp.arange(N)
        onp.random.shuffle(idx)

        # Mini-batch gradient updates
        for i in range(0, N, batch_size):
            batch_idx = idx[i:i+batch_size]
            Xb = [X_train[j] for j in batch_idx]
            yb = [y_train[j] for j in batch_idx]

            def closure(p):
                return cross_entropy_loss(p, Xb, yb, p_noise=p_noise_train)

            params = opt.step(closure, params)

        # Validation check
        if ep % 5 == 0:
            val_preds = predict(params, X_val, p_noise=p_noise_train)
            acc = accuracy_score(y_val, val_preds)
            print(f"Epoch {ep}/{n_epochs} — val_acc={acc:.3f}")

    return params

# ==============================
#  Main Execution
# ==============================
def main():
    # Build dataset by simulating multiple strategies
    moves_all = []
    for strat in STRATEGIES:
        for seed in range(6):
            moves_all.append(simulate_player(strat, n_rounds=N_ROUNDS, seed=seed+1))

    X, y = [], []
    for moves in moves_all:
        Xi, yi = build_windows_int(moves)
        X.extend(Xi)
        y.extend(yi)

    # Shuffle dataset
    idx = onp.arange(len(X))
    onp.random.shuffle(idx)
    X = [X[i] for i in idx]
    y = [y[i] for i in idx]

    # Train/Val/Test split
    n = len(X)
    n_train = int(0.6*n)
    n_val = int(0.2*n)

    X_train, y_train = X[:n_train], y[:n_train]
    X_val, y_val = X[n_train:n_train+n_val], y[n_train:n_train+n_val]
    X_test, y_test = X[n_train+n_val:], y[n_train+n_val:]

    params0 = init_params_random(42)

    # Train QFA model
    trained = train(params0, X_train, y_train, X_val, y_val,
                    n_epochs=40, batch_size=512, lr=0.06)

    # Evaluate on test set
    preds_test = predict(trained, X_test, p_noise=0.02)
    acc = accuracy_score(y_test, preds_test)
    print("Test accuracy:", acc)

    # Noise robustness sweep
    sweep = onp.linspace(0.0, 0.2, 21)
    accs = []
    for p in sweep:
        preds = predict(trained, X_test, p_noise=p)
        accs.append(accuracy_score(y_test, preds))

    plt.plot(sweep, accs, marker='o')
    plt.xlabel("Depolarizing noise p")
    plt.ylabel("Accuracy")
    plt.title("Noise Sweep")
    plt.grid(True)
    plt.show()

if __name__ == "__main__":
    main()
