In [1]:
%reload_ext autoreload
%autoreload 2

import asyncio
import json
import nest_asyncio
import os
import sys
from dotenv import load_dotenv
import numpy as np
import pandas as pd

sys.path.append('../')
from lattereview.providers.openai_provider import OpenAIProvider
from lattereview.providers.ollama_provider import OllamaProvider
from lattereview.agents.scoring_reviewer import ScoringReviewer
from lattereview.review_workflow import ReviewWorkflow

Failed to update token costs. Using static costs.
  logger.error("Failed to update token costs. Using static costs.")
  from .autonotebook import tqdm as notebook_tqdm


## Setting up the notebook

Loading environment variables:

In [2]:
# Load environment variables from .env file
load_dotenv('../.env')
print(os.getenv('OPENAI_API_KEY'))

# Enable asyncio in Jupyter
nest_asyncio.apply()

sk-cq_M0pNgHhCFnlDOCMnagYA1l2X7Yea5CL0ci5pZMNT3BlbkFJ0m0x9wm5M_EstX5SjLu_kdwGMDYjkUdviNPs4pe9cA


Loading a dummy dataset:

In [3]:
data = pd.read_excel('data.xlsx')
data.head()

Unnamed: 0,ID,Title,1st author,repo,year,abstract
0,1,Segmentized quarantine policy for managing a t...,"Kim, J.",arXiv,2024,"By the end of 2021, COVID-19 had spread to ove..."
1,2,AutoProteinEngine: A Large Language Model Driv...,"Liu, Y.",arXiv,2024,Protein engineering is important for biomedica...
2,3,Integration of Large Vision Language Models fo...,"Chen, Z.",arXiv,2024,Traditional natural disaster response involves...
3,4,Choice between Partial Trajectories,"Marklund, H.",arXiv,2024,As AI agents generate increasingly sophisticat...
4,5,Building Altruistic and Moral AI Agent with Br...,"Zhao, F.",arXiv,2024,"As AI closely interacts with human society, it..."


## Testing the base functionalities

Testing the OpenAI provider:

In [16]:
openanai_provider = OpenAIProvider(model="gpt-4o-mini")
question = "What is the capital of France?"
asyncio.run(openanai_provider.get_response(question, temperature=0.9))

('The capital of France is Paris.',
 {'input_cost': 1.05e-06, 'output_cost': 4.2e-06, 'total_cost': 5.25e-06})

Testing the Ollama provider:

In [17]:
ollama_provider = OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434")
question = "What is the capital of France?"
asyncio.run(ollama_provider.get_response(question))

('The capital of France is Paris!',
 {'input_cost': 0, 'output_cost': 0, 'total_cost': 0})

Testing the ScoringReviewer agent:

In [4]:
agent = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o-mini"),
    # provider=OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434"),
    name="Pouria",
    backstory="an expert reviewer and researcher!",
    input_description = "article title",
    temperature=0.1, # ineffective for ollama
    reasoning = "brief",
    max_tokens=100, # ineffective for ollama
    review_criteria="Look for articles that certainly do not employ any AI or machine learning agents",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper does not meet the criteria, and 2 if the paper meets the criteria.',
)


# Dummy input
text_list = data.Title.str.lower().tolist()
print("Inputs:\n\n", '\n'.join(text_list[:3]))

# Dummy review
results, total_cost = asyncio.run(agent.review_items(text_list[:3]))
print("\n\n Outputs:")
for result in results:
    print(result)

# Dummy costs
print("\nCosts:\n")
for item in agent.memory:
    print(item['cost'])

print("\nTotal cost:\n")
print(total_cost)

Inputs:

 segmentized quarantine policy for managing a tradeoff between containment of infectious disease and social cost of quarantine
autoproteinengine: a large language model driven agent framework for multimodal automl in protein engineering
integration of large vision language models for efficient post-disaster damage assessment and reporting


 Outputs:
{'reasoning': 'The article title suggests a clear and specific policy approach that aims to balance the need for disease containment with social costs, which meets the criteria.', 'score': 2}
{'reasoning': "The input item 'autoproteinengine: a large language model driven agent framework for multimodal automl in protein engineering' appears to meet the criteria as it describes a specific application of AI in protein engineering, which is relevant to my expertise.", 'score': 2}
{'reasoning': 'The input item seems to meet the criteria as it involves the integration of large vision language models for post-disaster damage assessment a

## Testing the main Functionalities

#### A multiagent review workflow for doing title/abstract analysis

Setting up the agents:

In [19]:
pouria = ScoringReviewer(
    # provider=OpenAIProvider(model="gpt-4o-mini"),
    provider=OllamaProvider(model="llama3.2-vision:latest", host="http://localhost:11434"),
    name="Pouria",
    backstory="a radiologist with many years of background in statistcis and data science, who are famous among your colleagues for your systematic thinking, organizaton of thoughts, and being conservative",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.1, # ineffective for ollama
    reasoning = "cot",
    max_tokens=100, # ineffective for ollama
    scoring_task="Look for articles that disucss large languange models-based AI agents applied to medical imaging data",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper meets the criteria, and 2 if the paper does not meet the criteria.',
)

bardia = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o-mini"),
    name="Bardia",
    backstory="an expert in data science with a background in developing ML models for healthcare, who are famous among your colleagues for your creativity and out of the box thinking",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.7,
    reasoning = "brief",
    max_tokens=100,
    scoring_task="Look for articles that disucss large languange models-based AI agents applied to medical imaging data",
    score_set=[1, 2],
    scoring_rules='Score 1 if the paper meets the criteria, and 2 if the paper does not meet the criteria.',
)

brad = ScoringReviewer(
    provider=OpenAIProvider(model="gpt-4o"),
    name="Brad",
    backstory="a senior radiologist with a PhD in computer science and years of experience as the director of a DL lab focused on developing ML models for radiology and healthcare",
    input_description = "tilte and abstract of scientific articles",
    temperature=0.4,
    reasoning = "cot",
    max_tokens=100,
    scoring_task="""Pouria and Bardia have Looked for articles that disucss large languange models-based AI agents applied to medical imaging data. 
                       They scored an article 1 if they thought it does not meet this criteria, 2 if they thought it meets the criteria, 0 if they were uncertain of scoring.
                       You will receive an article they have had different opinions about, as well as each of their scores and their reasoning for that score. Read their reviews and determine who you agree with. 
                    """,
    score_set=[1, 2],
    scoring_rules="""Score 1 if you agree with Pouria, and score 2 if you agree with Bardia.""",
)


Setting up the review workflow:

In [30]:
title_abs_review = ReviewWorkflow(
    workflow_schema=[
        {
            "round": 'A',
            "reviewers": [pouria, bardia],
            "inputs": ["Title", "abstract"]
        },
        {
            "round": 'B',
            "reviewers": [brad],
            "inputs": ["Title", "abstract", "round-A_Pouria_output", "round-A_Bardia_output"],
            "filter": lambda row: row["round-A_Pouria_output"]["score"] != row["round-A_Bardia_output"]["score"]
        }
    ]
)

Applying the review workflow to a number of sample articles:

In [31]:
# Reload the data if needed.
sample_data = pd.read_excel('data.xlsx').sample(10).reset_index(drop=True)
updated_data = asyncio.run(title_abs_review(sample_data))

print("Total cost: ")
print(title_abs_review.get_total_cost())

print("\nDetailed cost:")
print(title_abs_review.reviewer_costs)

updated_data


Starting review round A (1/2)...
Reviewers: ['Pouria', 'Bardia']
Input data: ['Title', 'abstract']


                                                       

Number of eligible rows for review: 10


Reviewing items: 100%|██████████| 10/10 [02:11<00:00, 13.19s/it]
Reviewing items: 100%|██████████| 10/10 [00:03<00:00,  2.74it/s]
                                                         


Starting review round B (2/2)...
Reviewers: ['Brad']
Input data: ['Title', 'abstract', 'round-A_Pouria_output', 'round-A_Bardia_output']


                                                       

Number of eligible rows for review: 0
Skipping review round B - no eligible rows
Total cost: 
8.385e-05

Detailed cost:
{('A', 'Pouria'): 0, ('A', 'Bardia'): 8.385e-05}




Unnamed: 0,ID,Title,1st author,repo,year,abstract,round-A_Pouria_output,round-A_Bardia_output,input_item
0,497,Group-based Motion Prediction for Navigation i...,"Wang, A.",arXiv,2021,We focus on the problem of planning the motion...,{'reasoning': 'The input item abstract discuss...,{'reasoning': 'The article discusses multi-age...,Title: Group-based Motion Prediction for Navig...
1,34,Improved precision oncology question-answering...,"Das, R.",medRxiv,2024,Despite the widespread application of Large La...,{'reasoning': 'The article discusses the devel...,{'reasoning': 'The article discusses robot mot...,Title: Improved precision oncology question-an...
2,42,Multi-Agent Obstacle Avoidance using Velocity ...,"Roncero, A.S.",arXiv,2024,Velocity Obstacles (VO) methods form a paradig...,{'reasoning': 'The input item discusses the ge...,{'reasoning': 'The article discusses synthetic...,Title: Multi-Agent Obstacle Avoidance using Ve...
3,117,EXPLORING PROSOCIAL IRRATIONALITY FOR LLM AGEN...,"Liu, X.",arXiv,2024,Large language models (LLMs) have been shown t...,{'reasoning': 'The input item discusses topolo...,{'reasoning': 'The article discusses agent-bas...,Title: EXPLORING PROSOCIAL IRRATIONALITY FOR L...
4,644,Topological data analysis of zebrafish patterns,"McGuirl, M.R.",arXiv,2019,Self-organized pattern behavior is ubiquitous ...,{'reasoning': 'The input item discusses the de...,{'reasoning': 'The article focuses on topologi...,Title: Topological data analysis of zebrafish ...
5,325,Generating synthetic multi-dimensional molecul...,"An, G.",arXiv,2023,The use of synthetic data is recognized as a c...,{'reasoning': 'The input item discusses Large ...,{'reasoning': 'The article focuses on a roboti...,Title: Generating synthetic multi-dimensional ...
6,229,Towards a Surgeon-in-the-Loop Ophthalmic Robot...,"Gomaa, A.",arXiv,2023,Robot-assisted surgical systems have demonstra...,{'reasoning': 'The input item discusses the ap...,{'reasoning': 'The paper discusses reinforceme...,Title: Towards a Surgeon-in-the-Loop Ophthalmi...
7,166,Promising and worth-to-try future directions f...,"Elsheikh, A.",arXiv,2024,The execution and runtime performance of model...,{'reasoning': 'The input item discusses an age...,{'reasoning': 'The article discusses the appli...,Title: Promising and worth-to-try future direc...
8,261,PDRL: Multi-Agent based Reinforcement Learning...,"Shaik, T.",arXiv,2023,Reinforcement learning has been increasingly a...,{'reasoning': 'The input item discusses a rein...,{'reasoning': 'The article focuses on the soci...,Title: PDRL: Multi-Agent based Reinforcement L...
9,103,MALLM-GAN: MULTI-AGENT LARGE LANGUAGE MODEL AS...,"Ling, Y.",arXiv,2024,"In the era of big data, access to abundant dat...",{'reasoning': 'The input item discusses surrog...,{'reasoning': 'The article discusses a framewo...,Title: MALLM-GAN: MULTI-AGENT LARGE LANGUAGE M...


In [36]:
for i, row in updated_data.iterrows():
    print(
        f"""
        Title: {row.Title}
        Abstract: {row.abstract}
        Pouria's review: {row["round-A_Pouria_output"]}
        Bardia's review: {row["round-A_Bardia_output"]}
        Brad's review: {None if "round-B_Brad_output" not in row else row["round-B_Brad_output"]}
        """
    )


        Title: Group-based Motion Prediction for Navigation in Crowded Environments
        Abstract: We focus on the problem of planning the motion of a robot in a dynamic multiagent environment such as a pedestrian scene. Enabling the robot to navigate safely and in a socially compliant fashion in such scenes requires a representation that accounts for the unfolding multiagent dynamics. Existing approaches to this problem tend to employ microscopic models of motion prediction that reason about the individual behavior of other agents. While such models may achieve high tracking accuracy in trajectory prediction benchmarks, they often lack an understanding of the group structures unfolding in crowded scenes. Inspired by the Gestalt theory from psychology, we build a Model Predictive Control framework (G-MPC) that leverages group-based prediction for robot motion planning. We conduct an extensive simulation study involving a series of challenging navigation tasks in scenes extracted fr