In [37]:
%reload_ext autoreload
%autoreload 2

import asyncio
import json
import nest_asyncio
import os
import sys
from dotenv import load_dotenv
import numpy as np
import pandas as pd

sys.path.append('../')
from lattereview.providers.openai_provider import OpenAIProvider
from lattereview.providers.ollama_provider import OllamaProvider
from lattereview.agents.scoring_reviewer import ScoringReviewer
from lattereview.review_workflow import ReviewWorkflow

## Setting up the notebook

Loading environment variables:

In [38]:
# Load environment variables from .env file
load_dotenv('../.env')
print(os.getenv('OPENAI_API_KEY'))

# Enable asyncio in Jupyter
nest_asyncio.apply()

sk-cq_M0pNgHhCFnlDOCMnagYA1l2X7Yea5CL0ci5pZMNT3BlbkFJ0m0x9wm5M_EstX5SjLu_kdwGMDYjkUdviNPs4pe9cA


Loading a dummy dataset:

In [3]:
data = pd.read_excel('data.xlsx')
data.head()

Unnamed: 0,ID,Title,1st author,repo,year,abstract
0,1,Segmentized quarantine policy for managing a t...,"Kim, J.",arXiv,2024,"By the end of 2021, COVID-19 had spread to ove..."
1,2,AutoProteinEngine: A Large Language Model Driv...,"Liu, Y.",arXiv,2024,Protein engineering is important for biomedica...
2,3,Integration of Large Vision Language Models fo...,"Chen, Z.",arXiv,2024,Traditional natural disaster response involves...
3,4,Choice between Partial Trajectories,"Marklund, H.",arXiv,2024,As AI agents generate increasingly sophisticat...
4,5,Building Altruistic and Moral AI Agent with Br...,"Zhao, F.",arXiv,2024,"As AI closely interacts with human society, it..."


## Testing the base functionalities

Testing the OpenAI provider:

In [16]:
openanai_provider = OpenAIProvider(model="gpt-4o-mini")
question = "What is the capital of France?"
asyncio.run(openanai_provider.get_response(question, temperature=0.9))

('The capital of France is Paris.',
 {'input_cost': 1.05e-06, 'output_cost': 4.2e-06, 'total_cost': 5.25e-06})

Testing the Ollama provider:

In [17]:
ollama_provider = OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434")
question = "What is the capital of France?"
asyncio.run(ollama_provider.get_response(question))

('The capital of France is Paris!',
 {'input_cost': 0, 'output_cost': 0, 'total_cost': 0})

Testing the ScoringReviewer agent:

In [41]:
agent = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o-mini"),
    # provider=OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434"),
    name="Pouria",
    backstory="an expert reviewer and researcher!",
    input_description = "article title",
    temperature=0.1, # ineffective for ollama
    reasoning = "brief",
    max_tokens=100, # ineffective for ollama
    review_criteria="Look for articles that certainly do not employ any AI or machine learning agents",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper does not meet the criteria, and 2 if the paper meets the criteria.',
)


# Dummy input
text_list = data.Title.str.lower().tolist()
print("Inputs:\n\n", '\n'.join(text_list[:3]))

# Dummy review
results, total_cost = asyncio.run(agent.review_items(text_list[:3]))
print("\n\n Outputs:")
for result in results:
    print(result)

# Dummy costs
print("\nCosts:\n")
for item in agent.memory:
    print(item['cost'])

print("\nTotal cost:\n")
print(total_cost)

Inputs:

 segmentized quarantine policy for managing a tradeoff between containment of infectious disease and social cost of quarantine
autoproteinengine: a large language model driven agent framework for multimodal automl in protein engineering
integration of large vision language models for efficient post-disaster damage assessment and reporting


Reviewing 3 items - 2024-12-14 14:14:18: 100%|██████████| 3/3 [00:01<00:00,  2.30it/s]



 Outputs:
{"reasoning":"The paper presents a novel framework for automating protein engineering using a large language model, which aligns well with current trends in multimodal AI applications, thus meeting the criteria.","score":2}
{"reasoning":"The paper presents a clear framework for balancing public health needs with economic impacts, effectively addressing the tradeoff between disease containment and social costs, thus meeting the criteria.","score":2}
{"reasoning":"The integration of large vision language models for post-disaster damage assessment is a relevant and innovative approach that meets the criteria for effective reporting in disaster management.","score":2}

Costs:

{'input_cost': 1.98e-05, 'output_cost': 2.52e-05, 'total_cost': 4.5e-05}
{'input_cost': 1.965e-05, 'output_cost': 2.4e-05, 'total_cost': 4.365e-05}
{'input_cost': 1.905e-05, 'output_cost': 2.28e-05, 'total_cost': 4.185e-05}

Total cost:

4.185e-05





## Testing the main Functionalities

#### A multiagent review workflow for doing title/abstract analysis

Setting up the agents:

In [42]:
pouria = ScoringReviewer(
    # provider=OpenAIProvider(model="gpt-4o-mini"),
    provider=OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434"),
    name="Pouria",
    backstory="a radiologist with many years of background in statistcis and data science, who are famous among your colleagues for your systematic thinking, organizaton of thoughts, and being conservative",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.1, # ineffective for ollama
    reasoning = "cot",
    max_tokens=100, # ineffective for ollama
    scoring_task="Look for articles that disucss large languange models-based AI agents applied to medical imaging data",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper meets the criteria, and 2 if the paper does not meet the criteria.',
)

bardia = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o-mini"),
    name="Bardia",
    backstory="an expert in data science with a background in developing ML models for healthcare, who are famous among your colleagues for your creativity and out of the box thinking",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.7,
    reasoning = "brief",
    max_tokens=100,
    scoring_task="Look for articles that disucss large languange models-based AI agents applied to medical imaging data",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper meets the criteria, and 2 if the paper does not meet the criteria.',
)

brad = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o"),
    name="Brad",
    backstory="a senior radiologist with a PhD in computer science and years of experience as the director of a DL lab focused on developing ML models for radiology and healthcare",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.4,
    reasoning = "cot",
    max_tokens=100,
    scoring_task="""Pouria and Bardia have Looked for articles that disucss large languange models-based AI agents applied to medical imaging data. 
                       They scored an article 1 if they thought it does not meet this criteria, 2 if they thought it meets the criteria, 0 if they were uncertain of scoring.
                       You will receive an article they have had different opinions about, as well as each of their scores and their reasoning for that score. Read their reviews and determine who you agree with. 
                    """,
    score_set=[1, 2],
    scoring_rules="""Score 1 if you agree with Pouria, and score 2 if you agree with Bardia.""",
)


Setting up the review workflow:

In [43]:
title_abs_review = ReviewWorkflow(
    workflow_schema=[
        {
            "round": 'A',
            "reviewers": [pouria, bardia],
            "inputs": ["Title", "abstract"]
        },
        {
            "round": 'B',
            "reviewers": [brad],
            "inputs": ["Title", "abstract", "round-A_Pouria_output", "round-A_Bardia_output"],
            "filter": lambda row: row["round-A_Pouria_output"]["score"] != row["round-A_Bardia_output"]["score"]
        }
    ]
)

Applying the review workflow to a number of sample articles:

In [44]:
# Reload the data if needed.
sample_data = pd.read_excel('data.xlsx').sample(10).reset_index(drop=True)
updated_data = asyncio.run(title_abs_review(sample_data))

print("Total cost: ")
print(title_abs_review.get_total_cost())

print("\nDetailed cost:")
print(title_abs_review.reviewer_costs)

updated_data


Starting review round A (1/2)...
Reviewers: ['Pouria', 'Bardia']
Input data: ['Title', 'abstract']


                                                       

Number of eligible rows for review: 10


['round: A', 'reviewer_name: Pouria'] - 2024-12-14 15:16:44: 100%|██████████| 10/10 [01:43<00:00, 10.37s/it]
['round: A', 'reviewer_name: Bardia'] - 2024-12-14 15:18:28: 100%|██████████| 10/10 [00:01<00:00,  6.08it/s]



Starting review round B (2/2)...
Reviewers: ['Brad']
Input data: ['Title', 'abstract', 'round-A_Pouria_output', 'round-A_Bardia_output']


                                                       

Number of eligible rows for review: 2


['round: B', 'reviewer_name: Brad'] - 2024-12-14 15:18:30: 100%|██████████| 2/2 [00:19<00:00,  9.66s/it]

Total cost: 
0.0041978

Detailed cost:
{('A', 'Pouria'): 0, ('A', 'Bardia'): 8.28e-05, ('B', 'Brad'): 0.004115}





Unnamed: 0,ID,Title,1st author,repo,year,abstract,round-A_Pouria_output,round-A_Bardia_output,round-B_Brad_output
0,539,Provably Efficient Cooperative Multi-Agent Rei...,"Dubey, A.",arXiv,2021,Reinforcement learning in cooperative multi-ag...,{'properties': {'reasoning': {'title': 'Reason...,{'reasoning': 'The article discusses an LLM-ba...,{'reasoning': 'The article provided in the inp...
1,551,Challenging Social Media Threats using Collect...,"Ognibene, D.",arXiv,2021,Social media have become an integral part of o...,"{'properties': {'title': 'Reasoning', 'type': ...",{'reasoning': 'The article discusses the appli...,{'reasoning': 'The article provided in the inp...
2,113,A quantitative characterization of the heterog...,"Dixit, P.",bioRxiv,2024,Most cancers are genetically and phenotypicall...,{'reasoning': 'The input item discusses social...,{'reasoning': 'The article discusses a generat...,
3,87,Be More Real: Travel Diary Generation Using LL...,"Li, X.",arXiv,2024,Human mobility is inextricably linked to socia...,{'reasoning': 'This article discusses reinforc...,{'reasoning': 'The paper discusses reinforceme...,
4,689,Better guider predicts future better: Differen...,"Ying, G.",arXiv,2019,Predicting the future is a fantasy but practic...,{'reasoning': 'The article discusses the appli...,{'reasoning': 'The article focuses on trust mo...,
5,626,Self punishment and reward backfill for deep Q...,"Bonyadi, M.R.",arXiv,2020,Reinforcement learning agents learn by encoura...,{'reasoning': 'The input item discusses a trus...,{'reasoning': 'The article discusses belief fo...,
6,242,RCAgent: Cloud Root Cause Analysis by Autonomo...,"Wang, Z.",arXiv,2023,Large language model (LLM) applications in clo...,{'reasoning': 'The article discusses the use o...,{'reasoning': 'The article discusses social me...,
7,219,On the Impact of School Closures on COVID-19 T...,"Heger, L.",arXiv,2023,The effect of school closures on the spread of...,{'reasoning': 'The input item discusses gliobl...,{'reasoning': 'The article focuses on cooperat...,
8,202,"Trust model of privacy-concerned, emotionally-...","Carbo, J.",arXiv,2024,In this paper we propose a trust model to be u...,{'reasoning': 'The input item discusses the ap...,{'reasoning': 'The article discusses the impac...,
9,283,Multi-topic belief formation through bifurcati...,"Bizyaeva, A.",arXiv,2023,We propose and analyze a nonlinear dynamic mod...,{'reasoning': 'The input item discusses the im...,{'reasoning': 'The article discusses the respo...,


In [45]:
for i, row in updated_data.iterrows():
    print(
        f"""
        Title: {row.Title}
        Abstract: {row.abstract}
        Pouria's review: {row["round-A_Pouria_output"]}
        Bardia's review: {row["round-A_Bardia_output"]}
        Brad's review: {None if "round-B_Brad_output" not in row else row["round-B_Brad_output"]}
        """
    )


        Title: Provably Efficient Cooperative Multi-Agent Reinforcement Learning with Function Approximation
        Abstract: Reinforcement learning in cooperative multi-agent settings has recently advanced significantly in its scope, with applications in cooperative estimation for advertising, dynamic treatment regimes, distributed control, and federated learning. In this paper, we discuss the problem of cooperative multi-agent RL with function approximation, where a group of agents communicates with each other to jointly solve an episodic MDP. We demonstrate that via careful message-passing and cooperative value iteration, it is possible to achieve near-optimal no-regret learning even with a fixed constant communication budget. Next, we demonstrate that even in heterogeneous cooperative settings, it is possible to achieve Pareto-optimal no-regret learning with limited communication. Our work generalizes several ideas from the multi-agent contextual and multi-armed bandit literature