In [None]:
!pip install -q chromadb sentence-transformers transformers accelerate scikit-learn pandas numpy

In [None]:
!pip install langgraph langchain langchain-community

In [None]:
import os, random, time
from datetime import datetime, UTC
import numpy as np
import pandas as pd
from typing import List, Dict
from langgraph.graph import StateGraph, END
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import SentenceTransformerEmbeddings
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

In [None]:
CHROMA_DIR = "./chromadb_langgraph"
EMBED_MODEL = "all-MiniLM-L6-v2"
LLM_MODEL = "google/flan-t5-small"
ALERT_THRESHOLD = 0.75
SIM_STEPS = 60
os.makedirs(CHROMA_DIR, exist_ok=True)

In [None]:
embeddings = SentenceTransformerEmbeddings(model_name=EMBED_MODEL)
vector_store = Chroma(collection_name="memory", embedding_function=embeddings, persist_directory=CHROMA_DIR)

In [None]:
llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
llm_model = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL)
llm_pipe = pipeline("text2text-generation", model=llm_model, tokenizer=llm_tokenizer, device=-1)

def call_local_llm(prompt: str, max_length: int = 256):
    out = llm_pipe(prompt, max_length=max_length, do_sample=False)
    return out[0]["generated_text"].strip()

In [None]:
state = {
    "transactions": [],
    "ticks": [],
    "fraud_alerts": [],
    "blocked_accounts": {},
    "invest_ranking": []
}

In [None]:
fraud_model = None
fraud_scaler = None
fraud_feat = None
inv_model = None
inv_scaler = None
inv_feat = None

In [None]:
def generate_transactions(n=1):
    merchants = ["grocery","electronics","crypto","travel","utilities","gambling"]
    for i in range(n):
        f = random.random() < 0.08
        amt = np.random.exponential(80) + (1000 if f else 0)
        tx = {
            "id": f"tx_{len(state['transactions'])+i}",
            "acct": random.randint(1,2000),
            "amount": float(amt),
            "merchant": random.choice(merchants),
            "country": random.choice(["US","IN","DE","FR","CA"]),
            "label": int(f),
            "timestamp": datetime.now(UTC).isoformat(),
            "features": np.array([amt/3000.0, random.random(), 1.0 if f else 0.0, random.random(), random.random()])
        }
        yield tx

In [None]:
def generate_ticks(n=1):
    syms = ["AAPL", "TSLA", "BTC-USD", "GOOG"]
    prices = {sym: 100 + random.random() * 50 for sym in syms}
    for i in range(n):
        sym = random.choice(syms)
        change = np.random.normal(0, 0.02)
        prices[sym] *= (1 + change)
        price = prices[sym]
        vol = int(np.random.exponential(100))
        label = int(change > 0)
        yield {
            "id": f"tick_{len(state['ticks'])+i}",
            "symbol": sym,
            "price": float(price),
            "volume": vol,
            "change": change,
            "label": label,
            "features": np.array([price/200.0, vol/500.0, change, random.random(), random.random()])
        }


In [None]:
def index_texts(ids, texts):
    vector_store.add_texts(texts, ids=ids)

def retrieve_texts(query: str, k: int = 5):
    docs = vector_store.similarity_search(query, k=k)
    return [d.page_content for d in docs]

In [None]:
def train_fraud(batch):
    global fraud_model, fraud_scaler, fraud_feat
    if not batch:
        return []
    X = np.vstack([t["features"] for t in batch])
    y = np.array([t["label"] for t in batch])
    if (fraud_model is None) or (fraud_feat != X.shape[1]):
        fraud_model = SGDClassifier(loss="log_loss", max_iter=5, tol=None, learning_rate="adaptive", eta0=0.01)
        fraud_scaler = StandardScaler()
        fraud_scaler.fit(X)
        fraud_model.partial_fit(fraud_scaler.transform(X), y, classes=[0, 1])
        fraud_feat = X.shape[1]
        print(f"[INFO] Fraud model initialized ({fraud_feat} features).")
    else:
        fraud_model.partial_fit(fraud_scaler.transform(X), y)
    probs = fraud_model.predict_proba(fraud_scaler.transform(X))[:, 1]
    alerts = []
    for t, p in zip(batch, probs):
        if p > ALERT_THRESHOLD:
            alerts.append({
                "tx_id": t["id"],
                "acct": t["acct"],
                "prob": round(float(p), 3),
                "amount": t["amount"],
                "merchant": t["merchant"],
                "country": t["country"],
                "timestamp": t["timestamp"]
            })
    if alerts:
        print(f"###### {len(alerts)} fraud alerts detected")
        for a in alerts:
            print(f"  TX={a['tx_id']} ACC={a['acct']} PROB={a['prob']} AMT={a['amount']}")
        state["fraud_alerts"].extend(alerts)
    return alerts

In [None]:
def train_invest(batch):
    global inv_model, inv_scaler, inv_feat
    if not batch: return []
    X = np.vstack([t["features"] for t in batch])
    y = np.array([t["label"] for t in batch])
    if (inv_model is None) or (inv_feat != X.shape[1]):
        inv_model = SGDClassifier(loss="log_loss", max_iter=5, tol=None)
        inv_scaler = StandardScaler()
        inv_scaler.fit(X)
        inv_model.partial_fit(inv_scaler.transform(X), y, classes=[0,1])
        inv_feat = X.shape[1]
    else:
        inv_model.partial_fit(inv_scaler.transform(X), y)
    probs = inv_model.predict_proba(inv_scaler.transform(X))[:,1]
    df = pd.DataFrame({"symbol":[t["symbol"] for t in batch],"prob":probs})
    ranking = df.groupby("symbol")["prob"].mean().sort_values(ascending=False)
    state["invest_ranking"] = list(ranking.items())
    return state["invest_ranking"]

In [None]:
def update_blocked(alerts):
    for a in alerts:
        acc = a["acct"]
        info = state["blocked_accounts"].get(acc, {"count":0,"blocked":False})
        info["count"] += 1
        if info["count"] >= 3:
            info["blocked"] = True
        state["blocked_accounts"][acc] = info

def get_blocked():
    return [k for k,v in state["blocked_accounts"].items() if v.get("blocked")]

In [None]:
def get_fraud_summary(n=5):
    df = pd.DataFrame(state["fraud_alerts"])
    if df.empty: return []
    return df.sort_values("prob", ascending=False).head(n).to_dict("records")

def get_investment_picks(n=3):
    return state["invest_ranking"][:n]

In [None]:
def synthesize_answer(query: str, retrieved, tool_outputs):
    prompt = f"""
    You are **FinGuard**, an AI financial intelligence assistant.
    Your task is to provide **clear, factual, and risk-aware** insights
    based on both structured model outputs and retrieved historical context.
    User query: {query}
    Structured data:{tool_outputs}
    Retrieved context:{retrieved[:3]}
    Explain clearly and concisely."""
    return call_local_llm(prompt)

In [None]:
def classify_intent(s):
    q = s["query"].lower()
    if "fraud" in q: s["intent"] = "fraud"
    elif "invest" in q: s["intent"] = "invest"
    elif "block" in q: s["intent"] = "block"
    else: s["intent"] = "general"
    return s

In [None]:
def retrieve_node(s):
    s["retrieved"] = retrieve_texts(s["query"], k=5)
    return s

In [None]:
def tool_node(s):
    intent = s.get("intent")
    tools = {}
    if intent == "fraud": tools["fraud_summary"] = get_fraud_summary()
    if intent == "invest": tools["invest_picks"] = get_investment_picks()
    if intent == "block":
        tools["blocked_accounts"] = [{"account": acc, "details": state["blocked_accounts"][acc]} for acc in get_blocked()]
    s["tool_outputs"] = tools
    return s

In [None]:
def llm_reason_node(s):
    s["answer"] = synthesize_answer(s["query"], s.get("retrieved", []), s.get("tool_outputs", {}))
    return s

In [None]:
from typing import TypedDict, Any
class AgentState(TypedDict):
    query: str
    intent: str
    retrieved: list
    tool_outputs: dict
    answer: str
graph = StateGraph(AgentState)
graph.add_node("ClassifyIntent", classify_intent)
graph.add_node("RetrieveRAG", retrieve_node)
graph.add_node("RunTools", tool_node)
graph.add_node("LLMReason", llm_reason_node)
graph.add_edge("ClassifyIntent", "RetrieveRAG")
graph.add_edge("RetrieveRAG", "RunTools")
graph.add_edge("RunTools", "LLMReason")
graph.add_edge("LLMReason", END)
graph.set_entry_point("ClassifyIntent")
agent_graph = graph.compile()


In [None]:
def run_simulation(steps=SIM_STEPS):
    for step in range(steps):
        tx_batch = list(generate_transactions(random.randint(10,30)))
        tick_batch = list(generate_ticks(random.randint(5,15)))
        state["transactions"].extend(tx_batch)
        state["ticks"].extend(tick_batch)
        index_texts([t["id"] for t in tx_batch], [f"Transaction {t['id']} amount={t['amount']} merchant={t['merchant']} label={t['label']}" for t in tx_batch])
        index_texts([t["id"] for t in tick_batch], [f"Tick {t['id']} symbol={t['symbol']} price={t['price']}" for t in tick_batch])
        alerts = train_fraud(tx_batch)
        update_blocked(alerts)
        train_invest(tick_batch)
        if step % 10 == 0:
            print(f"[STEP {step}] TX={len(state['transactions'])} FRAUD={len(state['fraud_alerts'])} BLOCKED={len(get_blocked())}")
        time.sleep(0.02)

In [None]:
run_simulation(10)

queries = [
    "Show me recent fraud alerts and risky accounts.",
    "Which accounts are currently blocked?",
    "Give me the top investment picks right now."
]
for q in queries:
    print(f"\n> Query: {q}")
    result = agent_graph.invoke({"query": q})
    print("🧠 Agentic Answer:\n", result["answer"])