In [None]:
import numpy as np
from typing import TypedDict, List, Optional, Literal , Dict
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
import pandas as pd
import faiss

class AgentState(TypedDict):
    step: int
    window_features: np.ndarray  # shape (15, 7)
    predicted_state: Optional[Literal["Good", "Moderate", "Poor"]]
    prediction_probs: Optional[List[float]]
    action: Optional[str]
    outcome: Optional[Literal["Success", "Fail", "Pending"]]
    similar_case: Optional[dict]
    log: List[str]

class SelfRAGMemory:
    def __init__(self, dim):
        self.dim = dim
        self.index = faiss.IndexFlatL2(dim)
        self.metadata = []  # No cap

    def add(self, input_window, state, action, outcome):
        vec = input_window.flatten().astype('float32')
        vec /= np.linalg.norm(vec) + 1e-8  # Normalize
        self.index.add(np.expand_dims(vec, axis=0))
        self.metadata.append((state, action, outcome))

    def retrieve(self, input_window, k=3):
        if len(self.metadata) == 0:
            return []

        query_vec = input_window.flatten().astype('float32')
        query_vec /= np.linalg.norm(query_vec) + 1e-8
        D, I = self.index.search(np.expand_dims(query_vec, axis=0), k)

        results = []
        for idx, dist in zip(I[0], D[0]):
            if idx < len(self.metadata):
                similarity = 1 / (1 + dist)
                results.append({
                    'state': self.metadata[idx][0],
                    'action': self.metadata[idx][1],
                    'outcome': self.metadata[idx][2],
                    'similarity': similarity
                })
        return results


class GlobalLogMemory:
    def __init__(self):
        self.logs = []
    
    def add_log(self, entry: str):
        self.logs.append(entry)

class LSTMClassifierNode:
    def __init__(self, model_path: str, scaler: StandardScaler, class_order: List[str]):
        self.model = load_model(model_path)
        self.scaler = scaler
        self.class_order = class_order

    def __call__(self, state: AgentState) -> AgentState:
        # Prepare window for LSTM
        window = state["window_features"].reshape(1, 15, 7)
        probs = self.model.predict(window, verbose=0)
        pred_idx = int(np.argmax(probs))
        pred_class = self.class_order[pred_idx]

        # Update state
        state["predicted_state"] = pred_class
        state["prediction_probs"] = probs[0].tolist()
        state["log"].append(
            f"[Step {state['step']}] Predicted: {pred_class} | Probs: {probs[0]}"
        )
        return state

    

In [6]:
from typing import List, Dict
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
import os

# ====== CONFIG ======
OPENROUTER_API_KEY = "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"  # <-- paste your key here
OPENROUTER_MODEL = "openai/gpt-4"  # you can change to other supported models
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
# ====================

LLM_CACHE = {}

# Create LLM object using OpenRouter endpoint
llm = ChatOpenAI(
    model=OPENROUTER_MODEL,
    temperature=0.3,
    api_key=OPENROUTER_API_KEY,
    base_url=OPENROUTER_BASE_URL
)

@tool
def select_action(current_state: str, retrieved_memories: List[Dict]) -> str:
    """
    Decides the best action based on the current state and retrieved memory,
    using LLM via OpenRouter.
    """
    prompt = ChatPromptTemplate.from_template("""
    You are an AI network optimization agent.

    Current State: {state}
    Retrieved Memory:
    {memory_items}

    Based on the above, what is the best action to improve the state?
    Respond with a short action like: "Lower Modulation to QPSK + Max FEC" or "Increase Transmit Power".
    """)

    # Format retrieved memory text
    memory_text = ""
    for i, mem in enumerate(retrieved_memories):
        memory_text += (
            f"{i+1}. State: {mem['state']}, Action: {mem['action']}, "
            f"Outcome: {mem['outcome']} (Similarity: {mem['similarity']:.2f})\n"
        )
    
    # Cache key for identical queries
    cache_key = f"{current_state}::{memory_text}"

    if cache_key in LLM_CACHE:
        return LLM_CACHE[cache_key]
    
    # Call the model
    chain = prompt | llm
    response = chain.invoke({"state": current_state, "memory_items": memory_text})
    action = response.content.strip()
    
    # Store in cache
    LLM_CACHE[cache_key] = action
    return action


In [9]:
from typing import Dict
import numpy as np
from langchain_core.tools import tool

@tool
def execute_action(current_window: np.ndarray, action: str, simulation_mode: bool = True) -> np.ndarray:
    """
    Simulates or applies the effect of an action on the signal feature window.

    Args:
        current_window: np.ndarray shape (15, 7) - the current sliding window of features
        action: str - action chosen by the LLM
        simulation_mode: bool - if True, apply effect to entire window; else only last row.

    Returns:
        np.ndarray: modified feature window after action
    """
    action_effects = {
        "Lower Modulation to QPSK + Max FEC": np.array([+0.5, -0.2, -0.3, 0.0, -0.1, +0.2, 0.0]),
        "Increase Transmit Power":             np.array([+0.7, -0.1, -0.2, 0.0, +0.2, +0.1, 0.0]),
        "Switch Frequency Band":               np.array([-0.2, +0.1, 0.0, +0.3, -0.4, -0.1, +0.2]),
        "Apply Beamforming":                   np.array([+0.6, -0.3, -0.2, 0.0, +0.1, +0.4, 0.0]),
        "Use Relay Node":                      np.array([+0.4, -0.1, -0.1, 0.0, 0.0, 0.0, 0.0]),
    }

    modified_window = current_window.copy()

    for known_action, delta in action_effects.items():
        if known_action.lower() in action.lower():
            if simulation_mode:
                # Apply delta to ALL rows (full simulated trend shift)
                modified_window = modified_window + delta
            else:
                # Apply delta only to the most recent row
                modified_window[-1, :] = modified_window[-1, :] + delta
            break

    return modified_window


In [None]:
from typing import Tuple, Dict

class OutcomeEvaluatorNode:
    """
    Re-evaluates the updated window with the LSTM and decides Success/Fail.
    """

    def __init__(self, model, class_order, delta_thresh: float = 0.10, simulation_mode: bool = True):
        self.model = model
        self.class_order = class_order
        self.delta_thresh = delta_thresh
        self.simulation_mode = simulation_mode

    def __call__(self, state: "AgentState", modified_window: np.ndarray) -> "AgentState":
        """
        Inputs:
          - state["window_features"]: (15, 7) standardized features
          - modified_window: (15, 7) after ActionExecutor
        """
        prev_probs = np.array(state["prediction_probs"], dtype=float) if state["prediction_probs"] else None
        good_idx = self.class_order.index("Good")

        # If in real-hardware mode, only replace last row in existing window
        if not self.simulation_mode:
            new_window = state["window_features"].copy()
            new_window[-1, :] = modified_window[-1, :]
        else:
            # In simulation, the full modified window is already simulated
            new_window = modified_window.copy()

        # Pass through LSTM
        new_probs = self.model.predict(new_window[np.newaxis, :, :], verbose=0)[0]
        new_pred_idx = int(np.argmax(new_probs))
        new_pred_label = self.class_order[new_pred_idx]

        # Decide outcome
        if new_pred_label == "Good":
            outcome = "Success"
            reason = "Class flipped to Good."
        else:
            if prev_probs is not None:
                gain = float(new_probs[good_idx] - prev_probs[good_idx])
                if gain >= self.delta_thresh:
                    outcome = "Success"
                    reason = f"Good prob improved by +{gain:.2f} (≥ {self.delta_thresh:.2f})."
                else:
                    outcome = "Fail"
                    reason = f"Good prob gain +{gain:.2f} (< {self.delta_thresh:.2f})."
            else:
                outcome = "Fail"
                reason = "No prior baseline to compare; still not Good."

        # Update state
        state["window_features"] = new_window
        state["predicted_state"] = new_pred_label
        state["prediction_probs"] = new_probs.tolist()
        state["outcome"] = outcome

        state["log"].append(
            f"[Step {state['step']}] OutcomeEval → NewState: {new_pred_label} | "
            f"Probs: {np.round(new_probs, 3).tolist()} | Outcome: {outcome} ({reason})"
        )
        return state


In [20]:
import numpy as np
from typing import TypedDict, List, Optional, Literal, Dict
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
import faiss
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

OPENROUTER_API_KEY = "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"
#OPENROUTER_MODEL = "openai/gpt-4"
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"

# LLM init: cheaper model + max_tokens
llm = ChatOpenAI(
    model="openai/gpt-4o-mini",   # or "qwen/qwen2.5-7b-instruct", "meta-llama/llama-3.1-8b-instruct"
    temperature=0.3,
    api_key=OPENROUTER_API_KEY,
    base_url=OPENROUTER_BASE_URL,
    max_tokens=128                 # hard cap
)

LLM_CACHE = {}

class AgentState(TypedDict):
    step: int
    window_features: np.ndarray       # shape (15, 7)
    predicted_state: Optional[Literal["Good", "Moderate", "Poor"]]
    prediction_probs: Optional[List[float]]
    action: Optional[str]
    outcome: Optional[Literal["Success", "Fail", "Pending"]]
    similar_case: Optional[List[dict]]
    log: List[str]

class SelfRAGMemory:
    """Stores numeric cases for FAISS retrieval."""
    def __init__(self, dim):
        self.dim = dim
        self.index = faiss.IndexFlatL2(dim)
        self.metadata = []  # (state, action, outcome)

    def add(self, input_window, state, action, outcome):
        vec = input_window.flatten().astype('float32')
        vec /= np.linalg.norm(vec) + 1e-8
        self.index.add(np.expand_dims(vec, axis=0))
        self.metadata.append((state, action, outcome))

    def retrieve(self, input_window, k=3):
        if len(self.metadata) == 0:
            return []
        query_vec = input_window.flatten().astype('float32')
        query_vec /= np.linalg.norm(query_vec) + 1e-8
        D, I = self.index.search(np.expand_dims(query_vec, axis=0), k)

        results = []
        for idx, dist in zip(I[0], D[0]):
            if idx < len(self.metadata):
                similarity = 1 / (1 + dist)
                results.append({
                    'state': self.metadata[idx][0],
                    'action': self.metadata[idx][1],
                    'outcome': self.metadata[idx][2],
                    'similarity': similarity
                })
        return results

class GlobalLogMemory:
    """Keeps human-readable logs of the whole run."""
    def __init__(self):
        self.logs = []
    
    def add_log(self, entry: str):
        self.logs.append(entry)

    def get_full_log(self):
        return "\n".join(self.logs)

class LSTMClassifierNode:
    def __init__(self, model_path: str, scaler: StandardScaler, class_order: List[str]):
        self.model = load_model(model_path)
        self.scaler = scaler
        self.class_order = class_order

    def __call__(self, state: AgentState, global_log: GlobalLogMemory) -> AgentState:
        window = state["window_features"].reshape(1, 15, 7)
        probs = self.model.predict(window, verbose=0)
        pred_idx = int(np.argmax(probs))
        pred_class = self.class_order[pred_idx]

        state["predicted_state"] = pred_class
        state["prediction_probs"] = probs[0].tolist()
        log_entry = f"[Step {state['step']}] Prediction: {pred_class} | Probs: {np.round(probs[0], 3)}"
        state["log"].append(log_entry)
        global_log.add_log(log_entry)
        return state

@tool
def select_action(current_state: str, retrieved_memories: List[Dict]) -> str:
    """Uses LLM to choose the best action."""
    prompt = ChatPromptTemplate.from_template("""
    You are an AI network optimization agent.

    Current State: {state}
    Retrieved Memory:
    {memory_items}

    Based on the above, suggest the best action to improve the state.
    Respond briefly, e.g., "Lower Modulation to QPSK + Max FEC".
    """)

    memory_text = "\n".join(
        f"{i+1}. State: {m['state']}, Action: {m['action']}, Outcome: {m['outcome']} (Sim: {m['similarity']:.2f})"
        for i, m in enumerate(retrieved_memories)
    )

    cache_key = f"{current_state}::{memory_text}"
    if cache_key in LLM_CACHE:
        return LLM_CACHE[cache_key]

    chain = prompt | llm
    response = chain.invoke({"state": current_state, "memory_items": memory_text})
    action = response.content.strip()
    LLM_CACHE[cache_key] = action
    return action

@tool
def execute_action(current_window: np.ndarray, action: str, simulation_mode: bool = True) -> np.ndarray:
    """Simulates the effect of an action on the feature window."""
    action_effects = {
        "Lower Modulation to QPSK + Max FEC": np.array([+0.5, -0.2, -0.3, 0.0, -0.1, +0.2, 0.0]),
        "Increase Transmit Power":             np.array([+0.7, -0.1, -0.2, 0.0, +0.2, +0.1, 0.0]),
        "Switch Frequency Band":               np.array([-0.2, +0.1, 0.0, +0.3, -0.4, -0.1, +0.2]),
        "Apply Beamforming":                   np.array([+0.6, -0.3, -0.2, 0.0, +0.1, +0.4, 0.0]),
        "Use Relay Node":                      np.array([+0.4, -0.1, -0.1, 0.0, 0.0, 0.0, 0.0]),
    }
    modified_window = current_window.copy()
    for known_action, delta in action_effects.items():
        if known_action.lower() in action.lower():
            if simulation_mode:
                modified_window += delta  # full-window modification
            else:
                modified_window[-1, :] += delta  # last-row only
            break
    return modified_window

class OutcomeEvaluatorNode:
    def __init__(self, model, class_order, delta_thresh: float = 0.10, simulation_mode: bool = True):
        self.model = model
        self.class_order = class_order
        self.delta_thresh = delta_thresh
        self.simulation_mode = simulation_mode

    def __call__(self, state: AgentState, modified_window: np.ndarray,
                 selfrag: SelfRAGMemory, global_log: GlobalLogMemory) -> AgentState:
        prev_probs = np.array(state["prediction_probs"], dtype=float)
        good_idx = self.class_order.index("Good")

        if not self.simulation_mode:
            new_window = state["window_features"].copy()
            new_window[-1, :] = modified_window[-1, :]
        else:
            new_window = modified_window.copy()

        new_probs = self.model.predict(new_window[np.newaxis, :, :], verbose=0)[0]
        new_pred_idx = int(np.argmax(new_probs))
        new_pred_label = self.class_order[new_pred_idx]

        if new_pred_label == "Good":
            outcome, reason = "Success", "Class flipped to Good."
        else:
            gain = float(new_probs[good_idx] - prev_probs[good_idx])
            if gain >= self.delta_thresh:
                outcome, reason = "Success", f"Good prob improved by +{gain:.2f}."
            else:
                outcome, reason = "Fail", f"Good prob gain +{gain:.2f} (< {self.delta_thresh})."

        state["window_features"] = new_window
        state["predicted_state"] = new_pred_label
        state["prediction_probs"] = new_probs.tolist()
        state["outcome"] = outcome

        log_entry = f"[Step {state['step']}] Outcome: {outcome} | NewState: {new_pred_label} | Probs: {np.round(new_probs, 3)} ({reason})"
        state["log"].append(log_entry)
        global_log.add_log(log_entry)

        # Store into retrieval memory
        selfrag.add(new_window, new_pred_label, state["action"], outcome)
        return state    

In [21]:

from langgraph.graph import StateGraph, END
import pandas as pd
import numpy as np


DATASET_PATH = r"C:\Users\DHARMESH M\Documents\Projects\SIG-COG\Dataset\features_dataset.csv"  # <-- your CSV
FEATURE_COLS = ['SNR_dB','BER','PacketLoss_pct','Jitter_ms','Throughput_Mbps','SNR_trend','Jitter_spike']
WINDOW_SIZE = 15
ITERATIONS = 8                 # how many windows to run
START_MODE = "random"      # "sequential" or "random"
SIMULATION_MODE = True         # full-window modification (we’re simulating)
CLASS_ORDER = ["Good", "Moderate", "Poor"]

# ---- shared memories and nodes ----
rag_memory = SelfRAGMemory(dim=WINDOW_SIZE * len(FEATURE_COLS))
global_log = GlobalLogMemory()

# LSTM node — use your real paths/objects
# NOTE: If your trained model expects scaled inputs, ensure the CSV is the scaled version
scaler = StandardScaler()  # present for interface; we’re not calling transform here
classifier_node = LSTMClassifierNode(
    model_path=r"C:\Users\DHARMESH M\Documents\Projects\SIG-COG\ML\link_state_lstm.h5",  # <-- your model path
    scaler=scaler,
    class_order=CLASS_ORDER
)
outcome_node = OutcomeEvaluatorNode(
    model=classifier_node.model,
    class_order=CLASS_ORDER,
    delta_thresh=0.10,
    simulation_mode=SIMULATION_MODE
)

# ---- action fallback (cold-start / no-memory) ----
FALLBACK_ACTIONS = [
    "Lower Modulation to QPSK + Max FEC",
    "Increase Transmit Power",
    "Switch Frequency Band",
    "Apply Beamforming",
    "Use Relay Node",
]

def fallback_decide_action(current_state: str) -> str:
    # simple heuristic before memory/LLM exist
    if current_state == "Poor":
        return "Lower Modulation to QPSK + Max FEC"
    if current_state == "Moderate":
        return "Increase Transmit Power"
    return "Keep current settings"

# ---- graph nodes (wrappers around your classes/tools) ----
def classify_node(state: AgentState) -> AgentState:
    return classifier_node(state, global_log)

def retrieve_node(state: AgentState) -> AgentState:
    if state["predicted_state"] == "Good":
        # skip retrieval entirely for Good
        state["similar_case"] = []
        state["log"].append(f"[Step {state['step']}] Good → skipping RAG/LLM.")
        global_log.add_log(state["log"][-1])
        return state
    retrieved = rag_memory.retrieve(state["window_features"], k=3)
    state["similar_case"] = retrieved
    msg = f"[Step {state['step']}] Retrieved {len(retrieved)} similar cases."
    state["log"].append(msg)
    global_log.add_log(msg)
    return state

def select_action_node(state: AgentState) -> AgentState:
    if state["predicted_state"] == "Good":
        state["action"] = "Keep current settings"
        return state

    # Cold-start or no helpful memory → use fallback (trial-and-error baseline)
    if not state["similar_case"]:
        action = fallback_decide_action(state["predicted_state"])
        state["action"] = action
        msg = f"[Step {state['step']}] No memory yet → fallback action: {action}"
        state["log"].append(msg); global_log.add_log(msg)
        return state

    # Otherwise ask LLM (still cached)
    action = select_action.invoke({
        "current_state": state["predicted_state"],
        "retrieved_memories": state["similar_case"]
    })
    state["action"] = action
    msg = f"[Step {state['step']}] LLM selected action: {action}"
    state["log"].append(msg); global_log.add_log(msg)
    return state

def execute_node(state: AgentState) -> AgentState:
    if state["predicted_state"] == "Good":
        # We won’t mutate window in Good state
        return state
    modified_window = execute_action.invoke({
        "current_window": state["window_features"],
        "action": state["action"],
        "simulation_mode": SIMULATION_MODE
    })
    state["window_features"] = modified_window
    msg = f"[Step {state['step']}] Executed action."
    state["log"].append(msg); global_log.add_log(msg)
    return state

def evaluate_node(state: AgentState) -> AgentState:
    if state["predicted_state"] == "Good":
        # Ensure state is consistent & outcome is trivially success
        state["outcome"] = "Success"
        state["log"].append(f"[Step {state['step']}] Good window → no action; marked Success.")
        global_log.add_log(state["log"][-1])
        return state

    state = outcome_node(state, state["window_features"], rag_memory, global_log)
    # Also store experience in RAG (ensures first Moderate/Poor creates memory)
    rag_memory.add(
        input_window=state["window_features"],
        state=state["predicted_state"],
        action=state["action"],
        outcome=state["outcome"]
    )
    return state

# ---- build the graph ----
graph = StateGraph(AgentState)
graph.add_node("classify", classify_node)
graph.add_node("retrieve", retrieve_node)
graph.add_node("select_action", select_action_node)
graph.add_node("execute", execute_node)
graph.add_node("evaluate", evaluate_node)

graph.add_edge("classify", "retrieve")
graph.add_edge("retrieve", "select_action")
graph.add_edge("select_action", "execute")
graph.add_edge("execute", "evaluate")

graph.set_entry_point("classify")
app = graph.compile()

# ---- runner over your dataset windows ----
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def run_agent_over_dataset(csv_path: str, iterations: int, start_mode="sequential"):
    df = pd.read_csv(csv_path)
    X_raw = df[FEATURE_COLS].values

    # ✅ Fit scaler on the whole dataset (OK for simulation)
    scaler = StandardScaler().fit(X_raw)
    X_scaled = scaler.transform(X_raw)

    # ---- pick start indices as before ----
    max_start = len(X_scaled) - WINDOW_SIZE
    if max_start < 0:
        raise ValueError("Dataset too short for given WINDOW_SIZE.")
    if start_mode == "random":
        starts = np.random.randint(0, max_start + 1, size=iterations)
    else:
        hop = max(1, max_start // max(1, iterations))
        starts = np.clip(np.arange(0, hop*iterations, hop), 0, max_start)

    for i, s in enumerate(starts, 1):
        window_scaled = X_scaled[s:s+WINDOW_SIZE]  # <-- use scaled window

        state: AgentState = {
            "step": i,
            "window_features": window_scaled,  # <-- keep scaled in state
            "predicted_state": None,
            "prediction_probs": None,
            "action": None,
            "outcome": None,
            "similar_case": None,
            "log": []
        }
        state = app.invoke(state)
        print(f"\n[ITER {i}] Pred={state['predicted_state']} | Action={state['action']} | Outcome={state['outcome']}")


    print("\n=== Global Log (tail) ===")
    print("\n".join(global_log.get_full_log().splitlines()[-min(50, len(global_log.logs)):]))



run_agent_over_dataset(DATASET_PATH, ITERATIONS, START_MODE)





[ITER 1] Pred=Good | Action=Keep current settings | Outcome=Success

[ITER 2] Pred=Moderate | Action=Increase Transmit Power | Outcome=Fail

[ITER 3] Pred=Moderate | Action="Lower Modulation to QPSK + Max FEC." | Outcome=Fail

[ITER 4] Pred=Good | Action=Keep current settings | Outcome=Success

[ITER 5] Pred=Moderate | Action=Change Frequency Band. | Outcome=Fail

[ITER 6] Pred=Good | Action=Keep current settings | Outcome=Success

[ITER 7] Pred=Good | Action=Keep current settings | Outcome=Success

[ITER 8] Pred=Good | Action=Keep current settings | Outcome=Success

=== Global Log (tail) ===
[Step 1] Prediction: Good | Probs: [1. 0. 0.]
[Step 1] Good → skipping RAG/LLM.
[Step 1] Good window → no action; marked Success.
[Step 2] Prediction: Moderate | Probs: [0.    0.998 0.002]
[Step 2] Retrieved 0 similar cases.
[Step 2] No memory yet → fallback action: Increase Transmit Power
[Step 2] Executed action.
[Step 2] Outcome: Fail | NewState: Moderate | Probs: [0. 1. 0.] (Good prob gain +0