In [3]:
from langgraph.graph import StateGraph, END
from typing import TypedDict
from langchain_ollama import ChatOllama
import os
# -------------------------------
# 1. Define Agent State
# -------------------------------
class AgentState(TypedDict):
    user_input: str
    synthetic_data_code: str
    pycaret_code: str
    interpretation: str


# -------------------------------
# 2. Initialize LLM (with fallback)
# -------------------------------
def init_llm():
    try:
        return ChatOllama(model="llama3", temperature=0.0)
    except Exception:
        print("⚠️ LLaMA 3 not found. Using mistral model instead.")
        return ChatOllama(model="gemma3", temperature=0.0)

llm = init_llm()


# -------------------------------
# 3. Agent 1: Create Synthetic Fraud Data
# -------------------------------
def generate_synthetic_data(state: AgentState) -> AgentState:
    prompt = """
Generate complete Python code to create a synthetic fraud detection dataset using the Faker library.

Requirements:
- Columns: ['TransactionID', 'Amount', 'CardType', 'Location', 'IsFraud']
- 1000 rows.
- 'IsFraud' should be 1 for ~5% of cases and 0 otherwise.
- Save the dataset as 'synthetic_fraud.csv'.
- Output only the runnable code.
"""
    response = llm.invoke(prompt)
    state["synthetic_data_code"] = response.content.strip()
    return state


# -------------------------------
# 4. Agent 2: Train ML Model using PyCaret
# -------------------------------
def generate_pycaret_model(state: AgentState) -> AgentState:
    prompt = """
Generate complete Python code using PyCaret for fraud detection using the dataset 'synthetic_fraud.csv'.

Steps:
1. Import required libraries.
2. Load 'synthetic_fraud.csv'.
3. Use PyCaret Classification setup with target = 'IsFraud'.
4. Compare models and select the best.
5. Finalize the model.
6. Predict on the dataset.
7. Print evaluation metrics and predictions.

Output only the runnable code.
"""
    response = llm.invoke(prompt)
    state["pycaret_code"] = response.content.strip()
    return state


# -------------------------------
# 5. Agent 3: Interpret Results
# -------------------------------
def interpret_results(state: AgentState) -> AgentState:
    prompt = f"""
You are a data scientist. Interpret the results of a fraud detection model built using PyCaret.

Here is the PyCaret code used:
{state['pycaret_code']}

Provide a short analysis of:
- Which metrics matter most for fraud detection.
- Why certain models might perform better.
- Business impact of false positives vs false negatives.

Answer concisely.
"""
    response = llm.invoke(prompt)
    state["interpretation"] = response.content.strip()
    return state


# -------------------------------
# 6. Create Agent Chain
# -------------------------------
graph = StateGraph(AgentState)
graph.add_node("generate_data", generate_synthetic_data)
graph.add_node("generate_model", generate_pycaret_model)
graph.add_node("interpret", interpret_results)

graph.set_entry_point("generate_data")
graph.add_edge("generate_data", "generate_model")
graph.add_edge("generate_model", "interpret")
graph.set_finish_point("interpret")

app = graph.compile()


# -------------------------------
# 7. Run Chain
# -------------------------------
if __name__ == "__main__":
    result = app.invoke({"user_input": "Build fraud detection pipeline"})

    print("\n=== Synthetic Data Code ===\n")
    print(result["synthetic_data_code"])

    print("\n=== PyCaret Code ===\n")
    print(result["pycaret_code"])

    print("\n=== Interpretation ===\n")
    print(result["interpretation"])



=== Synthetic Data Code ===

Here is the complete Python code to create a synthetic fraud detection dataset using the Faker library:
```
import pandas as pd
from faker import Faker
fake = Faker()

# Set the number of rows and columns
n_rows = 1000
columns = ['TransactionID', 'Amount', 'CardType', 'Location', 'IsFraud']

# Create a DataFrame with random data
data = {
    'TransactionID': [str(fake.random_int(min=1, max=10000)) for _ in range(n_rows)],
    'Amount': [fake.random_float(min=0.01, max=100) for _ in range(n_rows)],
    'CardType': [fake.credit_card_type() for _ in range(n_rows)],
    'Location': [fake.city() for _ in range(n_rows)],
    'IsFraud': [(1 if fake.random_int(1, 20) <= 50 else 0) for _ in range(n_rows)]
}

df = pd.DataFrame(data)

# Save the dataset to a CSV file
df.to_csv('synthetic_fraud.csv', index=False)
```
This code uses the Faker library to generate random data for each column. The `IsFraud` column is set to 1 for approximately 5% of cases (based on the ra