## Archetype Tutorial

#### Step 1: Setup
First, let's set up our environment and import the necessary libraries:

In [1]:
from agent_torch.core.llm.archetype import Archetype
from agent_torch.core.llm.behavior import Behavior
from agent_torch.populations import NYC
from agent_torch.core.llm.backend import LangchainLLM

OPENAI_API_KEY = None

Setup : Covid Cases Data and Unemployment Rate

In [2]:
from utils import get_covid_cases_data

csv_path = "/models/covid/data/county_data.csv"
monthly_cases_kings = get_covid_cases_data(
    csv_path=csv_path, county_name="Kings County"
)

#### Step 2: Initialise LLM Instance

We can use either of the Langchain and Dspy backends to initialise a LLM instance. While these are the frameworks we are supporting currently, you may choose to use your own framework of choice by extending the LLMBackend class provided with AgentTorch.

Let's see how we can use Langchain to initialise an LLM instance

GPT 3.5 Turbo

In [3]:
agent_profile = "You are an helpful agent who is trying to help the user make a decision. Give answer as a single number between 0 and 1, only."
llm_langchain_35 = LangchainLLM(
    openai_api_key=OPENAI_API_KEY, agent_profile=agent_profile, model="gpt-3.5-turbo"
)

#### Step 3: Define an Archetype

In [None]:
# New API: Define a Template and use Archetype directly (no Behavior object)
import agent_torch.core.llm.template as lm
from agent_torch.core.llm.mock_llm import MockLLM
import agent_torch.populations.astoria as astoria
import pandas as pd

class DemoTemplate(lm.Template):
    system_prompt = "You are evaluating willingness based on job profile and context."
    age = lm.Variable(desc="agent age", learnable=True)
    gender = lm.Variable(desc="agent gender", learnable=False)
    soc_code = lm.Variable(desc="job id", learnable=False)
    abilities = lm.Variable(desc="abilities required", learnable=True)
    work_context = lm.Variable(desc="work context", learnable=True)

    def __prompt__(self):
        self.prompt_string = (
            "You are in your {age}'s and are a {gender}. "
            "As a {soc_code}...you have {abilities} and work in {work_context}."
        )

    def __output__(self):
        return "Rate your willingness to continue normal activities, respond in [0, 1] binary decision only."

# Create Archetype
llm = MockLLM()
arch = Archetype(prompt=DemoTemplate(), llm=llm, n_arch=3)

# Configure external data and ground truth
all_jobs_df = pd.read_pickle("job_data_clean.pkl")
gt_csv = pd.read_csv("agent_torch/core/llm/data/ground_truth_willingness_all_soc.csv")
soc_to_val = {str(r['soc_code']): float(r['willingness']) for _, r in gt_csv.iterrows()}
gt_list = [soc_to_val.get(str(row.get('soc_code')), 0.0) for _, row in all_jobs_df.iterrows()]

arch.configure(
    external_df=all_jobs_df,
    ground_truth=gt_list,
    match_on="soc_code",
)

# Sample before and after broadcast
arch.sample(print_examples=3)
arch.broadcast(population=astoria)
arch.sample(print_examples=3)

### P3O Optimization (New API)

In [None]:
# Optimize Template variables using P3O
from agent_torch.optim import P3O

# Create optimizer; default uses PSPGO-shaped reward
opt = P3O(arch.parameters(), archetype=arch)

# Run a few optimization steps over the broadcast population
for _ in range(3):
    arch.sample()
    opt.step()
    opt.zero_grad()

# Inspect learnable parameter count
len(list(arch.parameters()))

In [7]:
# Define arguments to be used for creating a query for the LLM Instance
kwargs = {
    "month": "January",
    "year": "2020",
    "covid_cases": 1200,
    "device": "cpu",
    "current_memory_dir": "/populations/astoria/conversation_history",
    "unemployment_rate": 0.05,
}

#### Step 4: Compare performance between different Configurations of Archetype

In [None]:
from utils import get_labor_data, get_labor_force_correlation

labor_force_df_n_2, observed_labor_force_n_2, correlation_n_2 = (
    get_labor_force_correlation(
        monthly_cases_kings,
        earning_behavior_n_2,
        "agent_torch/models/macro_economics/data/unemployment_rate_csvs/Brooklyn-Table.csv",
        kwargs,
    )
)
labor_force_df_n_12, observed_labor_force_n_12, correlation_n_12 = (
    get_labor_force_correlation(
        monthly_cases_kings,
        earning_behavior_n_12,
        "agent_torch/models/macro_economics/data/unemployment_rate_csvs/Brooklyn-Table.csv",
        kwargs,
    )
)
print(
    f"Correlation with 2 Archetypes is {correlation_n_2} and 12 Archetypes is {correlation_n_12}"
)