In [6]:
%reload_ext autoreload
%autoreload 2

import asyncio
import json
import nest_asyncio
import os
import sys
from dotenv import load_dotenv
import numpy as np
import pandas as pd

sys.path.append('../')
from lattereview.providers.openai_provider import OpenAIProvider
from lattereview.providers.ollama_provider import OllamaProvider
from lattereview.providers.litellm_provider import LiteLLMProvider
from lattereview.agents.scoring_reviewer import ScoringReviewer
from lattereview.review_workflow import ReviewWorkflow

  from .autonotebook import tqdm as notebook_tqdm


## Setting up the notebook

Loading environment variables:

In [2]:
# Load environment variables from .env file
load_dotenv('../.env')
print(os.getenv('OPENAI_API_KEY'))

# Enable asyncio in Jupyter
nest_asyncio.apply()

sk-cq_M0pNgHhCFnlDOCMnagYA1l2X7Yea5CL0ci5pZMNT3BlbkFJ0m0x9wm5M_EstX5SjLu_kdwGMDYjkUdviNPs4pe9cA


Loading a dummy dataset:

In [3]:
data = pd.read_excel('data.xlsx')
data.head()

Unnamed: 0,ID,Title,1st author,repo,year,abstract
0,1,Segmentized quarantine policy for managing a t...,"Kim, J.",arXiv,2024,"By the end of 2021, COVID-19 had spread to ove..."
1,2,AutoProteinEngine: A Large Language Model Driv...,"Liu, Y.",arXiv,2024,Protein engineering is important for biomedica...
2,3,Integration of Large Vision Language Models fo...,"Chen, Z.",arXiv,2024,Traditional natural disaster response involves...
3,4,Choice between Partial Trajectories,"Marklund, H.",arXiv,2024,As AI agents generate increasingly sophisticat...
4,5,Building Altruistic and Moral AI Agent with Br...,"Zhao, F.",arXiv,2024,"As AI closely interacts with human society, it..."


## Testing the base functionalities

Testing the OpenAI provider:

In [4]:
openanai_provider = OpenAIProvider(model="gpt-4o-mini")
question = "What is the capital of France?"
asyncio.run(openanai_provider.get_response(question, temperature=0.9))

('The capital of France is Paris.',
 {'input_cost': 1.05e-06, 'output_cost': 4.2e-06, 'total_cost': 5.25e-06})

Testing the Ollama provider:

In [5]:
ollama_provider = OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434")
question = "What is the capital of France?"
asyncio.run(ollama_provider.get_response(question))

('The capital of France is Paris.',
 {'input_cost': 0, 'output_cost': 0, 'total_cost': 0})

Testing the LiteLLM provider:

In [4]:
# litellm_provider = LiteLLMProvider(model="gpt-4o-mini")
# litellm_provider = LiteLLMProvider(model="claude-3-5-sonnet-20240620")
litellm_provider = LiteLLMProvider(model="groq/llama-3.3-70b-versatile")

question = "What is the capital of France?"
asyncio.run(litellm_provider.get_response(question))

response: <Response [200 OK]>


('The capital of France is Paris.', 3.464e-05)

Testing the ScoringReviewer agent:

In [7]:
agent = ScoringReviewer(
    # provider=OpenAIProvider(model="gpt-4o-mini"),
    # provider=OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434"),
    # provider=LiteLLMProvider(model="gpt-4o-mini"),
    # provider=LiteLLMProvider(model="claude-3-5-sonnet-20240620"),
    provider=LiteLLMProvider(model="groq/llama-3.3-70b-versatile"),
    name="Pouria",
    max_concurrent_requests=1, 
    backstory="an expert reviewer and researcher!",
    input_description = "article title",
    model_args={"max_tokens": 100, "temperature": 0.1, "tools": None, "tool_choice": None},
    reasoning = "brief",
    review_criteria="Look for articles that certainly do not employ any AI or machine learning agents",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper does not meet the criteria, and 2 if the paper meets the criteria.',
)


# Dummy input
text_list = data.Title.str.lower().tolist()
print("Inputs:\n\n", '\n'.join(text_list[:3]))

# Dummy review
results, total_cost = asyncio.run(agent.review_items(text_list[:3]))
print("\n\n Outputs:")
for result in results:
    print(result)

# Dummy costs
print("\nCosts:\n")
for item in agent.memory:
    print(item['cost'])

print("\nTotal cost:\n")
print(total_cost)

Inputs:

 segmentized quarantine policy for managing a tradeoff between containment of infectious disease and social cost of quarantine
autoproteinengine: a large language model driven agent framework for multimodal automl in protein engineering
integration of large vision language models for efficient post-disaster damage assessment and reporting


Reviewing 3 items - 2024-12-14 19:21:39:  67%|██████▋   | 2/3 [00:00<00:00,  3.62it/s]

response: <Response [200 OK]>


Reviewing 3 items - 2024-12-14 19:21:39: 100%|██████████| 3/3 [00:01<00:00,  2.42it/s]

response: <Response [200 OK]>


Reviewing 3 items - 2024-12-14 19:21:39: 100%|██████████| 3/3 [00:01<00:00,  1.94it/s]

response: <Response [200 OK]>


 Outputs:
{'reasoning': 'The segmentized quarantine policy effectively manages the tradeoff between containment of infectious disease and social cost of quarantine, thus meeting the criteria.', 'score': 2}
{'reasoning': 'The paper meets the criteria because it presents a large language model driven agent framework for multimodal automl in protein engineering, which is a relevant topic.', 'score': 2}
{'reasoning': 'The paper meets the criteria because it discusses the integration of large vision language models for efficient post-disaster damage assessment and reporting, which is a relevant and meaningful topic.', 'score': 2}

Costs:

0.00029238
0.00029554
0.00029555

Total cost:

0.00029555





## Testing the main Functionalities

#### A multiagent review workflow for doing title/abstract analysis

Setting up the agents:

In [8]:
pouria = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o-mini"),
    # provider=OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434"),
    # provider=LiteLLMProvider(model="groq/llama-3.3-70b-versatile"),
    name="Pouria",
    max_concurrent_requests=20, 
    backstory="a radiologist with many years of background in statistcis and data science, who are famous among your colleagues for your systematic thinking, organizaton of thoughts, and being conservative",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.1, # ineffective for ollama
    reasoning = "cot",
    max_tokens=100, # ineffective for ollama
    scoring_task="Look for articles that disucss large languange models-based AI agents applied to medical imaging data",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper meets the criteria, and 2 if the paper does not meet the criteria.',
)

bardia = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o-mini"),
    name="Bardia",
    max_concurrent_requests=20, 
    backstory="an expert in data science with a background in developing ML models for healthcare, who are famous among your colleagues for your creativity and out of the box thinking",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.7,
    reasoning = "brief",
    max_tokens=100,
    scoring_task="Look for articles that disucss large languange models-based AI agents applied to medical imaging data",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper meets the criteria, and 2 if the paper does not meet the criteria.',
)

brad = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o"),
    name="Brad",
    max_concurrent_requests=20, 
    backstory="a senior radiologist with a PhD in computer science and years of experience as the director of a DL lab focused on developing ML models for radiology and healthcare",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.4,
    reasoning = "cot",
    max_tokens=100,
    scoring_task="""Pouria and Bardia have Looked for articles that disucss large languange models-based AI agents applied to medical imaging data. 
                       They scored an article 1 if they thought it does not meet this criteria, 2 if they thought it meets the criteria, 0 if they were uncertain of scoring.
                       You will receive an article they have had different opinions about, as well as each of their scores and their reasoning for that score. Read their reviews and determine who you agree with. 
                    """,
    score_set=[1, 2],
    scoring_rules="""Score 1 if you agree with Pouria, and score 2 if you agree with Bardia.""",
)


Setting up the review workflow:

In [9]:
title_abs_review = ReviewWorkflow(
    workflow_schema=[
        {
            "round": 'A',
            "reviewers": [pouria, bardia],
            "inputs": ["Title", "abstract"]
        },
        {
            "round": 'B',
            "reviewers": [brad],
            "inputs": ["Title", "abstract", "round-A_Pouria_output", "round-A_Bardia_output"],
            "filter": lambda row: row["round-A_Pouria_output"]["score"] != row["round-A_Bardia_output"]["score"]
        }
    ]
)

Applying the review workflow to a number of sample articles:

In [10]:
# Reload the data if needed.
sample_data = pd.read_excel('data.xlsx').sample(10).reset_index(drop=True)
updated_data = asyncio.run(title_abs_review(sample_data))

print("Total cost: ")
print(title_abs_review.get_total_cost())

print("\nDetailed cost:")
print(title_abs_review.reviewer_costs)

updated_data


Starting review round A (1/2)...
Processing 10 eligible rows


['round: A', 'reviewer_name: Pouria'] - 2024-12-14 19:21:49: 100%|██████████| 10/10 [00:01<00:00,  5.76it/s]
['round: A', 'reviewer_name: Bardia'] - 2024-12-14 19:21:50: 100%|██████████| 10/10 [00:06<00:00,  1.50it/s]


Starting review round B (2/2)...
Skipping review round B - no eligible rows
Total cost: 
0.00018735

Detailed cost:
{('A', 'Pouria'): 0.0001125, ('A', 'Bardia'): 7.485e-05}





Unnamed: 0,ID,Title,1st author,repo,year,abstract,round-A_Pouria_output,round-A_Pouria_score,round-A_Pouria_reasoning,round-A_Bardia_output,round-A_Bardia_score,round-A_Bardia_reasoning
0,224,AVA: Towards Autonomous Visualization Agents t...,"Liu, S.",arXiv,2023,With recent advances in multi-modal foundation...,{'reasoning': 'The article describes the devel...,2,The article describes the development of Auton...,{'reasoning': 'The article discusses the devel...,2,The article discusses the development of Auton...
1,331,Ask and You Shall be Served: Representing and ...,"Lavie, M.",arXiv,2023,In scenarios with numerous emergencies that ar...,{'reasoning': 'To evaluate the input article a...,2,To evaluate the input article against the crit...,{'reasoning': 'The article discusses multi-age...,2,The article discusses multi-agent optimization...
2,308,Adaptive therapy in cancer: the role of restri...,"Fontaneda, D.",bioRxiv,2023,BACKGROUND: Cancer is currently one of the lea...,{'reasoning': 'Upon reviewing the title and ab...,2,"Upon reviewing the title and abstract, it is c...",{'reasoning': 'The article discusses adaptive ...,2,The article discusses adaptive therapy and can...
3,404,Automated Kantian Ethics: A Faithful Implement...,"Singh, L.",arXiv,2022,As we grant artificial intelligence increasing...,{'reasoning': 'The title and abstract provided...,2,The title and abstract provided discuss the im...,{'reasoning': 'This article discusses automate...,2,This article discusses automated ethics in AI ...
4,71,CREW: Facilitating Human-AI Teaming Research,"Zhang, L.",arXiv,2024,With the increasing deployment of artificial i...,{'reasoning': 'The article discusses Human-AI ...,2,The article discusses Human-AI teaming researc...,{'reasoning': 'The article discusses a platfor...,2,The article discusses a platform for Human-AI ...
5,752,Behavioral analytics for myopic agents,"Mintz, Y.",arXiv,2017,Many multi-agent systems have the structure of...,{'reasoning': 'The article discusses a behavio...,2,The article discusses a behavioral analytics f...,{'reasoning': 'The article discusses behaviora...,2,The article discusses behavioral analytics in ...
6,402,Fusing an agent-based model of mosquito popula...,"Cavany, S.M.",bioRxiv,2022,The mosquito Aedes aegypti is the vector of a ...,{'reasoning': 'The title and abstract of the p...,2,The title and abstract of the provided article...,{'reasoning': 'The paper discusses a model for...,2,The paper discusses a model for mosquito popul...
7,534,Deep learning for visualization and novelty de...,"Banko, L.",arXiv,2021,We apply variational autoencoders (VAE) to X-r...,{'reasoning': 'The title and abstract of the i...,2,The title and abstract of the input item discu...,{'reasoning': 'The article discusses the appli...,2,The article discusses the application of varia...
8,289,INFERRING EPIDEMIC DYNAMICS USING GAUSSIAN PRO...,"Ahmed, A.A.",arXiv,2023,Computational models help decision makers unde...,{'reasoning': 'The article title and abstract ...,2,The article title and abstract provided focus ...,{'reasoning': 'The article focuses on agent-ba...,2,The article focuses on agent-based simulations...
9,539,Provably Efficient Cooperative Multi-Agent Rei...,"Dubey, A.",arXiv,2021,Reinforcement learning in cooperative multi-ag...,{'reasoning': 'The title and abstract of the a...,2,The title and abstract of the article discuss ...,{'reasoning': 'The article discusses reinforce...,2,The article discusses reinforcement learning i...


In [11]:
for i, row in updated_data.iterrows():
    print(
        f"""
        Title: {row.Title}
        Abstract: {row.abstract}
        Pouria's score: {row["round-A_Pouria_score"]}
        Pouria's reasoning: {row["round-A_Pouria_reasoning"]}
        Bardia's score: {row["round-A_Bardia_score"]}
        Bardia's reasoning: {row["round-A_Bardia_reasoning"]}
        Brad's score: {None if "round-B_Brad_score" not in row else row["round-B_Brad_score"]}
        Brad's reasoning: {None if "round-B_Brad_reasoning" not in row else row["round-B_Brad_reasoning"]}
        """
    )


        Title: AVA: Towards Autonomous Visualization Agents through Visual Perception-Driven Decision-Making
        Abstract: With recent advances in multi-modal foundation models, the previously text-only large language models (LLM) have evolved to incorporate visual input, opening up unprecedented opportunities for various applications in visualization. Our work explores the utilization of the visual perception ability of multi-modal LLMs to develop Autonomous Visualization Agents (AVAs) that can interpret and accomplish user-defined visualization objectives through natural language. We propose the first framework for the design of AVAs and present several usage scenarios intended to demonstrate the general applicability of the proposed paradigm. The addition of visual perception allows AVAs to act as the virtual visualization assistant for domain experts who may lack the knowledge or expertise in fine-tuning visualization outputs. Our preliminary exploration and proof-of-concept a