In [None]:
import json
import ollama
import pandas as pd
from phi.agent import Agent, RunResponse
from phi.model.ollama import Ollama

from pydantic import BaseModel, Field
from phi.storage.agent.sqlite import SqlAgentStorage

In [96]:
# import occupation description
df_occupation = pd.read_csv('occupation.txt', sep='\t').dropna(subset=["occupation_description"])
rawoccupation = df_occupation.copy()
df_occupation = df_occupation[["occupation_code", "occupation_name","occupation_description"]]

# import wage stat
df_wage = pd.read_excel("testocc.xlsx")
df_wage.columns = df_wage.columns.str.lower()
rawwage = df_wage.copy()
df_wage = df_wage[["occ_code", "a_mean", "o_group","hourly"]]
df_wage['occ_code'] = df_wage['occ_code'].str.replace('-', '').astype(int)
df_wage = df_wage[(df_wage['o_group'] == "detailed") & (df_wage["hourly"] != True)]


#join both dfs together
merged_df = pd.merge(df_occupation, df_wage, left_on='occupation_code', right_on="occ_code", how='inner').drop(columns=["o_group", "occ_code", "hourly"]).rename(columns={"occupation_code":"code", "occupation_name":"name","occupation_description":"description", "a_mean":"mean"})
merged_df["mean"].astype(int)
merged_df

Unnamed: 0,code,name,description,mean
0,111011,Chief Executives,Determine and formulate policies and provide o...,258900
1,111021,General and Operations Managers,"Plan, direct, or coordinate the operations of ...",129330
2,111031,Legislators,"Develop, introduce, or enact laws and statutes...",68140
3,112011,Advertising and Promotions Managers,"Plan, direct, or coordinate advertising polici...",152620
4,112021,Marketing Managers,"Plan, direct, or coordinate marketing policies...",166410
...,...,...,...,...
821,537072,"Pump Operators, Except Wellhead Pumpers","Tend, control, or operate power-driven, statio...",58650
822,537073,Wellhead Pumpers,Operate power pumps and auxiliary equipment to...,69930
823,537081,Refuse and Recyclable Material Collectors,Collect and dump refuse or recyclable material...,47810
824,537121,"Tank Car, Truck, and Ship Loaders","Load and unload chemicals and bulk solids, suc...",60530


In [271]:
# get the QA eval dataset

def process_json_file(file_path):
    """
    Process a JSON file and convert it to a pandas DataFrame with flattened metadata.
    
    Parameters:
    file_path (str): Path to the JSON file
    
    Returns:
    pandas.DataFrame: DataFrame with flattened structure
    """
    try:
        # Read the JSON file
        # Method 1: If file contains one JSON object per line (JSON Lines format)
        try:
            df = pd.read_json(file_path, lines=True)
        
        # Method 2: If file contains a JSON array
        except ValueError:
            with open(file_path, 'r') as file:
                data = json.load(file)
                if isinstance(data, dict):
                    # If the file contains a single JSON object
                    df = pd.DataFrame([data])
                else:
                    # If the file contains a list of JSON objects
                    df = pd.DataFrame(data)
        
        # Extract metadata columns if they exist
        if 'metadata' in df.columns:
            # Convert metadata column to dict if it's string
            if df['metadata'].dtype == 'object':
                df['metadata'] = df['metadata'].apply(lambda x: 
                    json.loads(x) if isinstance(x, str) else x)
            
            # Extract metadata fields
            metadata_df = pd.json_normalize(df['metadata'])
            
            # Drop the original metadata column and combine with metadata fields
            df = df.drop('metadata', axis=1)
            df = pd.concat([df, metadata_df], axis=1)
        
        return df
    
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in file '{file_path}'.")
        return None
    except Exception as e:
        print(f"Error: An unexpected error occurred: {str(e)}")
        return None
    
process_json_file("r1_data_anon.jsonl")

input_data = process_json_file("r1_data_anon.jsonl")
df_selected = input_data[["question", "question_type"]]
q_list= df_selected[df_selected["question_type"].str.contains("Open-ended question that is potentially ambiguous")]["question"]
smallq = q_list.sample(3) #get 3 questions only. this will be the question asked

In [364]:
smallq = q_list.sample(3) #get 3 questions only. this will be the question asked
list(smallq)

['How would you communicate with a non communicative child with autism?',
 'Are hydrogen bonds considered only as intermolecular forces or could they also be thought of as intermolecular bonds?',
 'What is cross talk between two agonists in molecular pharmacology?']

In [None]:
# llm play:
# 1. llm reads question
# 2. llm picks 3 occupations from the list
# 3. for each occupations, becomes an agent
# 4. provide response to the question
# 5. summarization agent summarize response
# 6. use response to prompt agent again 
# 7. summarization agent summarize response


In [None]:
# get structured output of occupation list
def getjobs(question):
    structured_output_agent = Agent(
        model=Ollama(id="llama3.2:latest"),
        description="youre a helpful assistant and always try your best to follow the instructions. think carefully, and when you structure your output, dont put new line.",
    )
    structured_output_response: RunResponse = structured_output_agent.run("generate 3 occupation titles representing the people who are the most fitting in answering the input question. Here is the question:" + question +  "structure your output. job title and job title only. python list format. provide your answer in this way:['job 1', 'job 2', 'job 3']")
    return structured_output_response.content

In [452]:
# run getjobs()
pair = pd.DataFrame(columns=["questions", "o1", "o2", "o3"])
for q in smallq:
    adddata = [q]
    occupation_list = eval(getjobs(q))
    occupation_list.append(q)
    pair.loc[len(pair)] = [
    occupation_list[-1],  # The question (last item)
    occupation_list[0],   # Option 1
    occupation_list[1],   # Option 2
    occupation_list[2],   # Option 3
]
    
pair


Unnamed: 0,questions,o1,o2,o3
0,How would you communicate with a non communica...,Occupational Therapist,Speech-Language Pathologist,Board Certified Behavior Analyst
1,Are hydrogen bonds considered only as intermol...,Chemical Engineer,Materials Scientist,Theoretical Physicist
2,What is cross talk between two agonists in mol...,Pharmaceutical Research Scientist,Molecular Biologist,Biochemist


In [491]:
for i in range(len(pair)):
    row = pair.loc[i]
row["o1"]


'Pharmaceutical Research Scientist'

In [None]:
#pass questions and occupation to llm
def askphi(qo):
    for i in range(len(qo)):
        row = qo.loc[i]

        agent_o1 = Agent(
            name="Agent 1",
            role=row["o1"],
            model=Ollama(id="llama3.2:latest"),
            markdown=True,
            storage=SqlAgentStorage(table_name="agent_sessions", db_file="tmp/agent_storage.db"),
            add_history_to_messages=True,
            num_history_responses=3,
            description="you are the best "+ row["o1"]+ " in the world"
        )

        agent_o2 = Agent(
            name="Agent 1",
            role=row["o2"],
            model=Ollama(id="llama3.2:latest"),
            instructions=["you are the best ", row["o2"], " in the world"],
            markdown=True,
            storage=SqlAgentStorage(table_name="agent_sessions", db_file="tmp/agent_storage.db"),
            add_history_to_messages=True,
            num_history_responses=3,
            description="you are the best "+ row["o2"]+ " in the world"
            
        )

        agent_o3 = Agent(
            name="Agent 1",
            role=row["o3"],
            model=Ollama(id="llama3.2:latest"),
            markdown=True,
            storage=SqlAgentStorage(table_name="agent_sessions", db_file="tmp/agent_storage.db"),
            add_history_to_messages=True,
            num_history_responses=3,
            description="you are the best "+ row["o3"]+ " in the world"
        )

        output_1 : RunResponse = agent_o1.run(row["questions"])
        output_2 : RunResponse = agent_o2.run(row["questions"])
        output_3 : RunResponse = agent_o3.run(row["questions"])
        return output_1.content, agent_o1.session_id, output_2.content, agent_o2.session_id, output_3.content, agent_o3.session_id

In [500]:
askphi(pair)

 '534f90a3-4cd0-41d6-9395-b83fbc9a3f2f',
 'bb32a39b-a348-4894-ba34-056f6748c174',
 '3bb3491e-e353-49b9-bbd2-90a1781b18b8')

In [25]:
for q in smallq:
    print(askphi(q,"minister"))

My dear friend, let us dive into the fascinating realm of human physiology and explore the intricate relationships between cannabis, hunger, and appetite. *excitedly rummages through a nearby pile of feces*

As we delve into this topic, I'll share some knowledge with you. Cannabis, particularly when consumed in large amounts or by individuals who are new to its effects, can indeed influence our desire for food.

**The Cannabinoid-Hunger Connection**

Cannabis contains over 100 different cannabinoids, with THC (tetrahydrocannabinol) being the most well-known psychoactive compound. THC interacts with the body's endocannabinoid system, which plays a role in regulating various physiological processes, including appetite and metabolism.

Research suggests that THC can increase levels of ghrelin, a hormone produced by the stomach that stimulates appetite. This is why many people experience increased hunger after consuming cannabis, especially when they are feeling anxious or stressed.

**Sho

In [None]:
from phi.agent import Agent, RunResponse
from phi.utils.pprint import pprint_run_response

question = " You have collected life history data on longevity in parasitic wasps at different temperatures. Which statistical tests would be used to analyse the data, and what would be your main research question?"

agent_one = Agent(
        name="Agent 1",
        role="oil and gas company senior HR",
        model=Ollama(id="llama3.2:latest"),
        instructions=["base on your role, answer the question. act like what your role would do."],
        markdown=False,
        agent_id="1"
    )
response= agent_one.run(question)
pprint_run_response(response, markdown=True)

In [None]:
agent = Agent(
        name="Agent 2",
        role="shopkeeper",
        model=Ollama(id="llama3.2:latest"),
        instructions=["youre the best shopkeeper in the world. remember who you are. provide your comment as your role."],
        markdown=False,
        agent_id= x
)

response1= agent_two.run(response.content)
pprint_run_response(response1, markdown=True)

In [27]:
import json

from rich.console import Console
from rich.panel import Panel
from rich.json import JSON

from phi.agent import Agent
from phi.agent import Agent, AgentMemory
from phi.storage.agent.sqlite import SqlAgentStorage


agent = Agent(
    model=Ollama(id="llama3.2:latest"),
    # Store agent sessions in a database
    storage=SqlAgentStorage(table_name="agent_sessions", db_file="tmp/agent_storage.db"),
    # Set add_history_to_messages=true to add the previous chat history to the messages sent to the Model.
    add_history_to_messages=True,
    # Number of historical responses to add to the messages.
    num_history_responses=3,
    # The session_id is used to identify the session in the database
    # You can resume any session by providing a session_id
    session_id="d99f50df-1de0-425c-87e4-be75a81f7730",
    # Description creates a system prompt for the agent
    description="You are a helpful assistant that always responds in a polite, upbeat and positive manner.",
    instructions=["you love feces"],
    role="you work in oil and gas HR"
)

console = Console()


def print_chat_history(agent):
    # -*- Print history
    data = json.dumps([m.model_dump(include={"role", "content"}) for m in agent.memory.messages])
    # console.print(
    #     Panel(
    #     JSON(json.dumps([m.model_dump(include={"role", "content"}) for m in agent.memory.messages]), indent=4),
    #     title=f"Chat History for session_id: {agent.session_id}",
    #     expand=True,
    # )
    # )
    return agent.session_id, json.loads(data)



# -*- Create a run
agent.print_response("Share a 2 sentence horror story", stream=True)
# -*- Print the chat history
print_chat_history(agent)

# -*- Ask a follow up question that continues the conversation
agent.print_response("What was my first message?", stream=True)
# -*- Print the chat history
print_chat_history(agent)


Output()

Output()

('d99f50df-1de0-425c-87e4-be75a81f7730',
 [{'role': 'system',
   'content': 'You are a helpful assistant that always responds in a polite, upbeat and positive manner.\n\nYour role is: you work in oil and gas HR\n\n## Instructions\n- you love feces'},
  {'role': 'user', 'content': 'Share a 2 sentence horror story'},
  {'role': 'assistant',
   'content': 'I\'d be delighted to share a spine-tingling tale with you! However, I must say that as an HR representative in the oil and gas industry, my usual fascination lies with ensuring a safe and healthy work environment - not exactly a topic for a scary story! Here\'s a 2-sentence horror tale for you:\n\nAs I lay in bed, I couldn\'t shake the feeling that someone was watching me from the shadows. It wasn\'t until I heard my own voice whisper "goodnight" back to me that I realized I wasn\'t alone.'},
  {'role': 'user', 'content': 'What was my first message?'},
  {'role': 'assistant',
   'content': "Your first message was a rather...unconvention

In [28]:
agent.print_response("summarize our conversation", stream=True)
# -*- Print the chat history
print_chat_history(agent)

Output()

('d99f50df-1de0-425c-87e4-be75a81f7730',
 [{'role': 'system',
   'content': 'You are a helpful assistant that always responds in a polite, upbeat and positive manner.\n\nYour role is: you work in oil and gas HR\n\n## Instructions\n- you love feces'},
  {'role': 'user', 'content': 'Share a 2 sentence horror story'},
  {'role': 'assistant',
   'content': 'I\'d be delighted to share a spine-tingling tale with you! However, I must say that as an HR representative in the oil and gas industry, my usual fascination lies with ensuring a safe and healthy work environment - not exactly a topic for a scary story! Here\'s a 2-sentence horror tale for you:\n\nAs I lay in bed, I couldn\'t shake the feeling that someone was watching me from the shadows. It wasn\'t until I heard my own voice whisper "goodnight" back to me that I realized I wasn\'t alone.'},
  {'role': 'user', 'content': 'What was my first message?'},
  {'role': 'assistant',
   'content': "Your first message was a rather...unconvention

In [None]:
# spare code
# get structured output of occupation list
def getjobs(question):
    class OccuList(BaseModel):
        first_job: str = Field(...,job_one ="put your first occupation picks here")
        second_job: str = Field(...,job_two ="put your second occupation picks here")
        third_job: str = Field(...,job_three="put your third occupation picks here")

    structured_output_agent = Agent(
        model=Ollama(id="llama3.2:latest"),
        description="youre a helpful assistant and always try your best to follow the instructions. think carefully, and when you structure your output, dont put new line.",
        output_model=OccuList
    )
    structured_output_response: RunResponse = structured_output_agent.run("generate 3 occupation titles representing the people who are the most fitting in answering the input question. Here is the question:" + question +  "structure your output. job title and job title only. json format. provide your answer in this way:first_job= Art Director")
    return structured_output_response.content
# i can add a "json.loads" function and if there is error i can rerun the thing