In [None]:
# agent_router_with_memory.py

import os
import re
import numpy as np
from typing import Any, Dict, List, TypedDict

from langchain import LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.schema import BaseMemory
from langchain.memory import ConversationBufferMemory, CombinedMemory
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import Tool, AgentType, create_pandas_dataframe_agent

from deep_translator import GoogleTranslator
from sklearn.metrics.pairwise import cosine_similarity
from langgraph.graph import StateGraph, START, END

from sentence_transformers import SentenceTransformer
from transformers import pipeline, LlamaForCausalLM, LlamaTokenizer

# ——————————————————————————————————————————————————————————————
# 1) Llama 3.2-3B local HF loading
# ——————————————————————————————————————————————————————————————

MODEL_PATH = "/path/to/your/llama-3.2-3b-checkpoint"
# force local-only so it won’t recurse into unwanted subfolders:
tokenizer = LlamaTokenizer.from_pretrained(
    MODEL_PATH, trust_remote_code=True, local_files_only=True
)
model = LlamaForCausalLM.from_pretrained(
    MODEL_PATH, 
    device_map="auto", 
    torch_dtype="auto", 
    trust_remote_code=True,
    local_files_only=True,
)

text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.2,
    top_p=0.9,
    device_map="auto",
)

hf_llm = HuggingFacePipeline(pipeline=text_gen)


# ——————————————————————————————————————————————————————————————
# 2) Define your 3 tools
# ——————————————————————————————————————————————————————————————

def calculator_tool(expr: str) -> str:
    try:
        return f"The result of '{expr}' is {eval(expr)}"
    except Exception as e:
        return f"Error: {e}"
calculator = Tool("Calculator", calculator_tool, "Basic arithmetic")

def sentiment_analysis_tool(text: str) -> str:
    t = text.lower()
    if any(w in t for w in ("happy","great","good","excellent")):
        return "Positive 👍"
    if any(w in t for w in ("sad","bad","terrible","awful")):
        return "Negative 👎"
    return "Neutral 🤔"
sentiment = Tool("SentimentAnalysis", sentiment_analysis_tool, "Detect sentiment")

# Pandas agent: uses the same hf_llm under the hood
# assume `df` is defined elsewhere in your script
pandas_agent = create_pandas_dataframe_agent(
    hf_llm,
    df,
    verbose=True,
    allow_dangerous_code=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
)
def run_pandas(q: str) -> str:
    return pandas_agent.run(q)
pandas = Tool("PandasDataAnalysis", run_pandas, "Query Pandas DataFrame")


# ——————————————————————————————————————————————————————————————
# 3) Episodic Memory & Semantic Cache
# ——————————————————————————————————————————————————————————————

_long_term: Dict[str,List[str]]    = {}
_qa_cache:    Dict[str,str]        = {}
_qa_embeds:   Dict[str,np.ndarray] = {}
SESSION_ID = "user_123"

def update_long_term_memory(user: str, inp: str, outp: str):
    mem = _long_term.setdefault(user, [])
    if inp:  mem.append(f"User: {inp}")
    if outp: mem.append(f"Bot:  {outp}")
    _long_term[user] = mem[-10:]

def get_long_term_memory(user: str) -> str:
    return "\n".join(_long_term.get(user, []))


# load sentence-transformers locally, no recursion
EMB_MODEL = SentenceTransformer(
    "/path/to/your/all-MiniLM-L6-v2", 
    local_files_only=True
)

def find_similar_cached(q: str, threshold: float = 0.85) -> str:
    vec = EMB_MODEL.encode(q, convert_to_numpy=True)
    for orig, ov in _qa_embeds.items():
        sim = cosine_similarity(vec.reshape(1,-1), ov.reshape(1,-1))[0][0]
        if sim >= threshold:
            return orig
    return None


TOOL_KEYWORDS = ["calculate","sentiment","pandas","data"]
def is_tool_query(txt: str) -> bool:
    low = txt.lower().strip()
    return any(low.startswith(kw) for kw in TOOL_KEYWORDS)


# ——————————————————————————————————————————————————————————————
# 4) LangGraph router for those 3 tools
# ——————————————————————————————————————————————————————————————

class RouterState(TypedDict):
    input: str
    decision: str
    output: Any

def llm_call_router(state: RouterState) -> RouterState:
    t = state["input"].strip().lower()
    if t.startswith("calculate"):
        state["decision"] = "Calculator"
    elif any(w in t for w in ("happy","sad","sentiment")):
        state["decision"] = "SentimentAnalysis"
    else:
        state["decision"] = "PandasDataAnalysis"
    return state

def run_Calculator(s):    s["output"]=calculator_tool(s["input"]); return s
def run_Sentiment(s):     s["output"]=sentiment_analysis_tool(s["input"]); return s
def run_Pandas(s):        s["output"]=run_pandas(s["input"]);            return s

router = StateGraph(state_schema=RouterState)
router.add_node("router", llm_call_router)
router.add_node("Calculator", run_Calculator)
router.add_node("SentimentAnalysis", run_Sentiment)
router.add_node("PandasDataAnalysis", run_Pandas)

router.add_edge(START, "router")
router.add_conditional_edges("router", lambda s: s["decision"], {
    "Calculator":"Calculator",
    "SentimentAnalysis":"SentimentAnalysis",
    "PandasDataAnalysis":"PandasDataAnalysis"
})
for node in ("Calculator","SentimentAnalysis","PandasDataAnalysis"):
    router.add_edge(node, END)

router.set_entry_point("router")
router_workflow = router.compile()


# ——————————————————————————————————————————————————————————————
# 5) Pure‐chat Llama chain + memory
# ——————————————————————————————————————————————————————————————

prompt = ChatPromptTemplate.from_messages([
    ("system","You are a helpful AI assistant."),
    ("system","Chat history:\n{chat_history}"),
    ("system","Long-term memory:\n{long_term_memory}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human","{input}"),
])

short_term = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="input",
    return_messages=True
)
long_term = CombinedMemory([short_term, LongTermChatMemory(session_id=SESSION_ID)])

chat_chain = LLMChain(
    prompt=prompt,
    llm=hf_llm,
    memory=long_term
)


# ——————————————————————————————————————————————————————————————
# 6) Unified converse()
# ——————————————————————————————————————————————————————————————

def converse(user_input: str) -> str:
    low = user_input.lower().strip()
    if "remember" in low:
        return "Memory:\n" + get_long_term_memory(SESSION_ID)

    if is_tool_query(user_input):
        # semantic cache
        sim = find_similar_cached(user_input)
        if sim:
            return _qa_cache[sim]
        st  = router_workflow.invoke({"input": user_input})
        out = st["output"]
        _qa_cache[user_input]    = out
        _qa_embeds[user_input]   = EMB_MODEL.encode(user_input, convert_to_numpy=True)
        update_long_term_memory(SESSION_ID, user_input, out)
        return out

    # fallback to pure chat
    out = chat_chain.run(user_input)
    update_long_term_memory(SESSION_ID, user_input, out)
    return out


# ——————————————————————————————————————————————————————————————
# 7) Smoke‐test
# ——————————————————————————————————————————————————————————————

if __name__ == "__main__":
    for q in [
        "Calculate 3 * 8",
        "How am I feeling? Sentiment 'I am sad'",
        "Show me summary stats on the DataFrame",
        "Do you remember our previous requests?"
    ]:
        print("→", converse(q))


In [None]:
import re, warnings
from typing import TypedDict, Any
from transformers import pipeline, logging as tf_logging
from langgraph.graph import StateGraph, START, END

# — Silence warnings —
warnings.filterwarnings("ignore")
tf_logging.set_verbosity_error()

# ——— State schema ———
class State(TypedDict):
    input: str
    decision: str
    output: Any

# ——— In‑memory stores ———
short_term_memory: list[str] = []
long_term_memory:  list[str] = []

# ——— LLM wrapper ———
def generate_chat_template(model, tokenizer, sys_pmt: str, prompt: str,
                           temperature=0.0, top_p=0.9,
                           max_new_tokens=100, do_sample=False) -> str:
    msgs = [
        {"role": "system", "content": sys_pmt},
        {"role": "user",   "content": prompt},
    ]
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
    formatted = pipe.tokenizer.apply_chat_template(msgs,
                                                   tokenize=False,
                                                   add_generation_prompt=True)
    out = pipe(formatted,
               max_new_tokens=max_new_tokens,
               do_sample=do_sample,
               temperature=temperature,
               top_p=top_p)
    return out[0]["generated_text"].split(formatted, 1)[1].strip()

# ——— Tools ———
def calculator(state: State) -> State:
    expr = re.sub(r'(?i)^calculate\s*', '', state["input"]).strip()
    try:
        result = eval(expr, {"__builtins__":None}, {})
    except Exception:
        result = "Error"
    state["output"] = result
    short_term_memory.append(str(result))
    return state

def chat(state: State) -> State:
    reply = generate_chat_template(
        model, tokenizer,
        sys_pmt="You are a concise assistant.",
        prompt=state["input"],
        temperature=0.7,
        do_sample=True
    )
    state["output"] = reply
    short_term_memory.append(reply)
    return state

def short_term(state: State) -> State:
    # last 5 entries
    history = short_term_memory[-5:]
    state["output"] = "\n".join(history)
    return state

def long_term(state: State) -> State:
    txt = state["input"].strip()
    # match both “Remember X” and “Remember the following: X”
    m = re.match(r'(?i)^remember(?: the following)?:\s*(.+)', txt)
    if m:
        fact = m.group(1).strip()
        long_term_memory.append(fact)
        state["output"] = ""        # silent ack
    else:
        state["output"] = "\n".join(long_term_memory)
    return state

# ——— Router ———
def llm_call_router(state: State) -> State:
    txt = state["input"].strip()
    low = txt.lower()

    # explicit memory commands
    if low.startswith("remember"):
        state["decision"] = "long_term"
        return state
    if re.match(r'(?i)^what is my name\??', txt):
        state["decision"] = "long_term"
        return state
    if low.startswith("show recent") or low.startswith("what did we do recently"):
        state["decision"] = "short_term"
        return state

    # explicit calculator
    if low.startswith("calculate") or re.fullmatch(r"[\d\.\s\+\-\*\/\^\(\)]+", txt):
        state["decision"] = "calculator"
        return state

    # fallback: ask llama
    choice = generate_chat_template(
        model, tokenizer,
        sys_pmt="Classify this as exactly 'calculator' or 'chat'.",
        prompt=txt,
        temperature=0.0,
        top_p=0.9,
        do_sample=False
    ).lower()
    state["decision"] = "calculator" if "calculator" in choice else "chat"
    return state

def route_decision(state: State) -> str:
    return {
        "calculator": "CALC",
        "chat":       "CHAT",
        "short_term":"SHORT",
        "long_term": "LONG",
    }[state["decision"]]

# ——— Build & compile ———
router = StateGraph(State)
router.add_node("CALC",    calculator)
router.add_node("CHAT",    chat)
router.add_node("SHORT",   short_term)
router.add_node("LONG",    long_term)
router.add_node("ROUTER",  llm_call_router)

router.add_edge(START, "ROUTER")
router.add_conditional_edges(
    "ROUTER",
    route_decision,
    {"CALC":"CALC","CHAT":"CHAT","SHORT":"SHORT","LONG":"LONG"},
)
router.add_edge("CALC",  END)
router.add_edge("CHAT",  END)
router.add_edge("SHORT", END)
router.add_edge("LONG",  END)

router.set_entry_point("ROUTER")
router_workflow = router.compile()

# ——— Test harness ———
tests = [
    "Calculate 3 * 8",
    "Remember the following: my name is Lucius Vila Volum",
    "What is my name?",
    "Show recent",
    "Tell me a joke"
]

for q in tests:
    state = router_workflow.invoke({"input": q})
    out = state["output"] or "(blank)"
    print(f">>> {q!r}")
    print("-> Routed to:", state["decision"])
    print("-> Output:\n", out, "\n")
