In [None]:
# === LLM + PSO Optimization System (Credit Card + MLPClassifier) ===

import os
import re
import json
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from openai import OpenAI
from dotenv import load_dotenv
from pyswarms.single.global_best import GlobalBestPSO

# --- Step 1: Setup ---
load_dotenv("OPENAI_API_KEY.env")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# --- Step 2: Load and Preprocess Dataset ---
df = pd.read_excel("default of credit card clients.xls", header=1)
X = df.drop(columns=["ID", "default payment next month"]).values
y = df["default payment next month"].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_trainval, X_test, y_trainval, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.25, random_state=42)

# --- Step 3: Define Objective Function ---
def mlp_objective(params):
    hidden, lr, dropout, l2 = params
    hidden = int(hidden)
    clf = MLPClassifier(hidden_layer_sizes=(hidden,), learning_rate_init=lr, alpha=l2,
                        early_stopping=True, max_iter=300, random_state=42)
    try:
        clf.fit(X_train, y_train)
        probas = clf.predict_proba(X_val)[:, 1]
        auc = roc_auc_score(y_val, probas)
        return -auc
    except:
        return 1.0

# --- Step 4: PSO Execution Function ---
def run_pso(bounds, num_particles=10, max_iter=5):
    print("🔧 [Executor Agent] Running PSO with bounds:", bounds)
    logs = []
    def fitness_fn(X):
        return np.array([mlp_objective(p) for p in X])

    optimizer = GlobalBestPSO(n_particles=num_particles, dimensions=4, options={'c1': 1.5, 'c2': 1.5, 'w': 0.5}, bounds=bounds)
    best_cost, best_pos = optimizer.optimize(fitness_fn, iters=max_iter)
    all_particles = optimizer.pos_history[-1]
    aucs = [-mlp_objective(p) for p in all_particles]
    logs.extend(zip(all_particles, aucs))
    print("✅ [Executor Agent] Best AUC this round:", -best_cost)
    return best_pos, best_cost, logs

# --- Step 5: GPT Suggestion ---
def query_gpt_with_logs(logs, round_id):
    print("🧠 [Creator Agent] Analyzing logs and generating new search bounds...")
    top5 = sorted(logs, key=lambda x: -x[1])[:5]
    log_str = "\n".join([f"{i+1}. HP={p} → AUC={a:.4f}" for i, (p, a) in enumerate(top5)])

    prompt = f"""
IMPORTANT: In this round, your goal is to not only maximize AUC, but also reduce fluctuation across rounds.
Try to narrow the search ranges if the top configurations are consistent.
Avoid wide repetitive ranges like [32,128] unless necessary.

You are a task creation AI expert in machine learning that is required to optimize the model’s hyperparameter settings to accomplish the final objective.

Model Info: MLPClassifier
Dataset Info: UCI Credit Default Dataset (23 numeric features, binary classification)
Hyperparameters:
- hidden: number of neurons
- lr: learning rate
- dropout: dropout rate
- l2: L2 regularization

Here are the top 5 historical training logs:
{log_str}

Use the following format:
Objective: your goal
Thought: your reasoning
Action: Final Answer
Final Answer: JSON with min/max ranges for each hyperparameter
"""

    response = client.chat.completions.create(
        model="gpt-4o",
        temperature=0,
        messages=[{"role": "user", "content": prompt}]
    )

    text = response.choices[0].message.content.strip()
    print("📤 [Creator Agent] Full response:\n", text)
    with open("gpt_trace_round.txt", "a", encoding="utf-8") as f:
        f.write(f"\n==== Round {round_id+1} GPT Response ====\n")
        f.write(text + "\n")

    try:
        match = re.search(r"Final Answer:\s*(\{.*\})", text, re.DOTALL)
        if match:
            answer_str = match.group(1)
            suggestion = json.loads(answer_str)

            # ✅ Clamp unstable ranges to improve convergence
            suggestion["dropout"]["min"] = max(0.05, suggestion["dropout"]["min"])
            suggestion["dropout"]["max"] = min(0.5, suggestion["dropout"]["max"])
            suggestion["l2"]["min"] = max(1e-5, suggestion["l2"]["min"])
            suggestion["l2"]["max"] = min(0.01, suggestion["l2"]["max"])
            return suggestion
        else:
            raise ValueError("No Final Answer block found.")
    except Exception as e:
        print("⚠️ Parsing failed:", e)
        return {
            "hidden": {"min": 64, "max": 128},
            "lr": {"min": 0.0001, "max": 0.01},
            "dropout": {"min": 0.0, "max": 0.5},
            "l2": {"min": 0.0001, "max": 0.01}
        }

# --- Step 6: Baseline Comparison (no GPT) ---
baseline_auc_all = []
baseline_n_trials = 5
baseline_bounds = ([32, 0.0001, 0.0, 0.00001], [128, 0.01, 0.5, 0.1])

for trial in range(baseline_n_trials):
    trial_auc = []
    for _ in range(10):
        _, cost, _ = run_pso(baseline_bounds)
        trial_auc.append(-cost)
    baseline_auc_all.append(trial_auc)

baseline_auc_all = np.array(baseline_auc_all)
baseline_auc_mean = np.mean(baseline_auc_all, axis=0)
baseline_auc_std = np.std(baseline_auc_all, axis=0)

# --- Step 7: Main Loop ---
def run_llm_pso_trials(n_trials=5):
    all_trials = []
    global bound_records, log_records, search_bounds_traj
    bound_records = []
    log_records = []
    search_bounds_traj = []

    for trial in range(n_trials):
        print(f"
🧪 Running LLM+PSO Trial {trial+1}...")
        trial_auc = []
        bounds = ([32, 0.0001, 0.0, 0.00001], [128, 0.01, 0.5, 0.1])
        for round_id in range(10):
            print(f"==================== Round {round_id+1} ====================")
            best_pos, best_cost, logs = run_pso(bounds)
            trial_auc.append(-best_cost)
            suggestion = query_gpt_with_logs(logs, round_id)
            print("💡 [Creator Agent] Suggested bounds:", suggestion)

            # record bound
            bound_records.append({
                "round": round_id + 1,
                "hidden_min": suggestion["hidden"]["min"],
                "hidden_max": suggestion["hidden"]["max"],
                "lr_min": suggestion["lr"]["min"],
                "lr_max": suggestion["lr"]["max"],
                "dropout_min": suggestion["dropout"]["min"],
                "dropout_max": suggestion["dropout"]["max"],
                "l2_min": suggestion["l2"]["min"],
                "l2_max": suggestion["l2"]["max"]
            })

            # record log
            log_records.append({
                "round": round_id + 1,
                "auc": -best_cost,
                "hidden": int(best_pos[0]),
                "lr": best_pos[1],
                "dropout": best_pos[2],
                "l2": best_pos[3]
            })

            search_bounds_traj.append(suggestion)

            keys = ['hidden', 'lr', 'dropout', 'l2']
            bounds = (
                [suggestion[k]['min'] for k in keys],
                [suggestion[k]['max'] for k in keys]
            )

        all_trials.append(trial_auc)

        # ✅ Store final best parameters for use in final evaluation
        if trial == n_trials - 1:
            global best_params
            best_params = best_pos
    return np.array(all_trials)

llm_auc_all = run_llm_pso_trials(n_trials=5)
llm_auc_mean = np.mean(llm_auc_all, axis=0)
llm_auc_std = np.std(llm_auc_all, axis=0)

final_hidden, final_lr, final_dropout, final_l2 = map(float, best_params)
final_hidden = int(final_hidden)
final_clf = MLPClassifier(hidden_layer_sizes=(final_hidden,), learning_rate_init=final_lr, alpha=final_l2,
                          early_stopping=True, max_iter=300, random_state=42)
final_clf.fit(X_trainval, y_trainval)
final_preds = final_clf.predict_proba(X_test)[:, 1]
final_auc = roc_auc_score(y_test, final_preds)
print(f"\n🎯 Final Test AUC: {final_auc:.4f}")

# --- Step 8: AUC Convergence Visualization ---
plt.figure()
plt.errorbar(range(1, 11), llm_auc_mean, yerr=llm_auc_std, label='LLM+PSO (mean ± std)', marker='o', capsize=3)
plt.errorbar(range(1, 11), baseline_auc_mean, yerr=baseline_auc_std, label='PSO only (mean ± std)', marker='x', linestyle='--', capsize=3)
plt.xlabel("Round")
plt.ylabel("Best AUC")
plt.title("AUC Convergence: LLM+PSO vs PSO-only")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

plt.plot(range(1, len(all_auc)+1), all_auc, marker='o')
plt.xlabel("Round")
plt.ylabel("Best AUC")
plt.title("AUC Convergence across Rounds (LLM+PSO)")
plt.grid(True)
plt.tight_layout()
plt.show()

# --- Step 9: Save Results to CSV ---
pd.DataFrame(log_records).to_csv("round_best_results.csv", index=False)
pd.DataFrame(bound_records).to_csv("round_search_bounds.csv", index=False)

# --- Step 10: (Removed duplicate Baseline block above)

print(" Summary Statistics:")
print("LLM+PSO AUC: mean = {:.4f}, std = {:.4f}".format(np.mean(all_auc), np.std(all_auc)))
print("Baseline PSO AUC: mean = {:.4f}, std = {:.4f}".format(np.mean(baseline_auc_all), np.std(baseline_auc_all)))

# --- Step 11: LLM-Guided Search Range Width Analysis ---
width_records = []
for bound in bound_records:
    width_records.append({
        "round": bound["round"],
        "hidden_width": bound["hidden_max"] - bound["hidden_min"],
        "lr_width": bound["lr_max"] - bound["lr_min"],
        "dropout_width": bound["dropout_max"] - bound["dropout_min"],
        "l2_width": bound["l2_max"] - bound["l2_min"]
    })
width_df = pd.DataFrame(width_records)

plt.figure()
plt.plot(width_df["round"], width_df["hidden_width"], label="hidden")
plt.plot(width_df["round"], width_df["lr_width"], label="lr")
plt.plot(width_df["round"], width_df["dropout_width"], label="dropout")
plt.plot(width_df["round"], width_df["l2_width"], label="l2")
plt.xlabel("Round")
plt.ylabel("Search Range Width")
plt.title("LLM-Guided Hyperparameter Range Shrinking")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# --- Step 12: Trajectory Visualization ---
x_vals, y_vals = [], []
for s in search_bounds_traj:
    x = (s['hidden']['min'] + s['hidden']['max']) / 2
    y = (s['lr']['min'] + s['lr']['max']) / 2
    x_vals.append(x)
    y_vals.append(y)

plt.figure()
plt.plot(x_vals, y_vals, marker='o')
for i, (x, y) in enumerate(zip(x_vals, y_vals)):
    plt.text(x, y, str(i+1), fontsize=8)
plt.title("Trajectory of GPT-guided Search Ranges (hidden vs lr)")
plt.xlabel("Hidden Layer Size (center)")
plt.ylabel("Learning Rate (center)")
plt.grid(True)
plt.show()
