In [53]:
import pandas as pd
import openai
import json

In [55]:
# adapted from: https://github.com/composable-models/llm_multiagent_debate/blob/main/gsm/gen_gsm.py
class Agent():
    def __init__(self,port: str, name: str) -> None:
        # inits an openAi client which is rerouted using the litellm library
        self.client = openai.OpenAI(api_key="placeholder",base_url=f"http://0.0.0.0:{port}")
        # identifier of the model. Used to understand the chatlogs from a human perspective
        self.name = name
        # system message to the model
        self.context = [{"role": "system", "content": "Give short and precise answers. You will get up to two pieces of evidence and a statement. Your job is to decide if the statement makes logical sense, given the pieces of evidence."}]
        # holds references to the other models in the "debate"
        self.other_agents = []
        # saves the most recent answer from the model
        self.current_solution = ""


    def generate_answer(self):
        """Function to send a completion request to the litellm proxy given the current context

        Returns:
            _type_: _description_
        """        
        # request sent to model set on litellm proxy, `litellm --model` -> model param is just a placeholder for the API
        return self.client.chat.completions.create(model="gpt-3.5-turbo", messages = self.context)


    def append_user_message(self,lastRound):
        """Function to append a user message to the context of the model. The message informs the agent of the results of the other agents

        Args:
            lastRound (bool): whether this is the last round of the "debate"
        """        
        if len(self.other_agents) == 0:
            self.context.append({"role": "user", "content": "Can you double check that your answer is correct. Put your final answer at the end of your response."})
        else:
            prefix_string = "These are the solutions to the problem from other agents: "

            for agent in self.other_agents:
                response = f"\n\n Solution from agent {agent.name}: ```{agent.current_solution}```"

                prefix_string = prefix_string + response

            prefix_string = prefix_string + """\n\n Using the reasoning from other agents as additional advice, can you give an updated answer? Examine your solution and that other agents step by step. Give a short answer and put your answer at the end of your response.""".format(question)
            if lastRound:
                prefix_string = prefix_string + " This is your last chance to reply.  Your final answer should be a single word: \"Entailment\" or \"Contradiction\""
            self.context.append({"role": "user", "content": prefix_string})


    def append_assistant_message(self,content:str):
        """This function appends an assistant message to the context ("chatlog") of the model

        Args:
            content (str): string to append to the models context
        """        
        self.current_solution = content
        self.context.append({"role": "assistant", "content": content})

    def reset(self):
        """Resets all member variables, so the agent can be used in a new conversation
        """        
        None
    

In [56]:
ORCA_PORT = "8000"
WIZARD_PORT = "37236"
MEDLLAMA_PORT = "19346"

In [13]:
# initialize all the agents. set the ports according to how they are assigned by litellm
# use following cmd in a terminal to start an agent proxy: litellm --model ollama/<model_name>
# you can get a list of installed ollama agents with: ollama list
orcaAgent = Agent(ORCA_PORT,"orca")
wizardAgent = Agent(WIZARD_PORT,"wizard-math")
mistralAgent = Agent(MEDLLAMA_PORT,"medllama")

agent_list = [orcaAgent,wizardAgent,mistralAgent]
for i, agent in enumerate(agent_list):
    agent.other_agents = agent_list[:i] + agent_list[i+1:]

In [14]:
# how many rounds the conversation should go
ROUNDS = 3
# location of the folder to write the chatlogs to
CHATLOGS_FOLDER = "./data/chatlogs/"
# dataset to generate results from
df = pd.read_json("./data/val.json", lines=True)

for index, row in df.iterrows():
    print(index)
    chatlogs_loc = f'{CHATLOGS_FOLDER}chatlog{index}.json'
    
    #question = "Primary trial evidence: INTERVENTION 1:   Intraductal Arm  Participants received intraductal administration of dextrose or dextrose with pegylated liposomal doxorubicin hydrochloride (or PLD) prior to conventional surgery for breast cancer. Secondary trial evidence: INTERVENTION 1:   Cetuximab  Patients receive cetuximab IV over 60-120 minutes once a week. Treatment repeats every 28 days in the absence of disease progression or unacceptable toxicity. Patients not responding to treatment may cross over to arm II.  cetuximab: Given IVINTERVENTION 2:   Cetuximab and Carboplatin After Cetuximab Alone  Patients from Arm 1 who progressed on cetuximab alone who went on to receive cetuximab + carboplatin. Question: Given this information, does it make logical sense that the primary trial and the secondary trial participants do not receive any medication orally? Entailment or Contradiction?"
    question = row['text']

    with open(chatlogs_loc, 'w') as f:
        json.dump({'question': question, 'label': row['label']}, f)
        f.write("\n")
    
    # init the agents
    orcaAgent = Agent(ORCA_PORT,"orca")
    wizardAgent = Agent(WIZARD_PORT,"wizard-math")
    mistralAgent = Agent(MEDLLAMA_PORT,"medllama")

    agent_list = [orcaAgent,wizardAgent,mistralAgent]
    for i, agent in enumerate(agent_list):
        agent.other_agents = agent_list[:i] + agent_list[i+1:]
        # append the current question as an user message
        agent.context.append({"role": "user", "content": question})

    for round in range(ROUNDS):
        for i,agent in enumerate(agent_list):
            
            # if we are not in the first round anymore, append the results from the other agents to the context
            if round != 0:
                # appends a user message with the results from the other agents
                agent.append_user_message(round == ROUNDS - 1)

            completion = agent.generate_answer()
            content = completion.choices[0].message.content

            # append the output of the agent as an assistant message to the context
            agent.append_assistant_message(content)
            #print(f"{agent.name}: {content}")

            # append logs to json
            with open(chatlogs_loc, 'a') as f:
                json_record = json.dumps({'round': round, 'agent': agent.name, 'text': content})
                f.write(json_record + "\n")
            
            # write current solution to dataframe so we can extract the results at the end
            df.at[index,f"{agent.name}-{round}"] = agent.current_solution
    


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199


In [57]:
# preprocess data for evaluation
import re

# which round should be evaluated
EVAL_ROUND = 0
# pattern to look for a decision
pattern = r"(?i)\b(entail|contradict)\w*\b"
# counts how many times no clear decision was found
noDecisionCount = 0

for rowIndex, row in df.iterrows():
    for agentIndex, agent in enumerate(agent_list):

        matches = re.findall(pattern, row[f"{agent.name}-{EVAL_ROUND}"])        
        if matches:
            # if match was found
            df.at[rowIndex,f"{agent.name}-res-{EVAL_ROUND}"] = 1 if matches[-1].lower() == "entail" else 0
        else:
            # if no match was found, this was likely due to the agent not willing to decide on a solution -> count this as contradiction
            df.at[rowIndex,f"{agent.name}-res-{EVAL_ROUND}"] = 0
            noDecisionCount += 1
        
        #print(df.iloc[rowIndex].label)
        df.at[rowIndex,f"labelId"] = 1 if df.iloc[rowIndex].label == "Entailment" else 0
        

columns_list = [f"{agent.name}-res-{EVAL_ROUND}" for agent in agent_list]
# take the majority vote of the 3 columns: if sum >= 2, two columns voted for 1
df[f'majority_vote-{EVAL_ROUND}'] = df[columns_list].sum(axis=1).apply(lambda x: 1 if x >= 2 else 0)
# the number of times where the agent did not want to decide
print(noDecisionCount)

171


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score


precision = precision_score(df["labelId"], df[f"majority_vote-{EVAL_ROUND}"])
recall = recall_score(df["labelId"], df[f"majority_vote-{EVAL_ROUND}"])
f1 = f1_score(df["labelId"], df[f"majority_vote-{EVAL_ROUND}"])

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

In [58]:
# save dataframe to csv
df.to_csv("./data/out.csv")