# SciAgents
## Automating scientific discovery through multi-agent intelligent graph reasoning

#### Alireza Ghafarollahi, Markus J. Buehler, MIT, 2024 mbuehler@MIT.EDU

In [None]:
'''
!git clone https://github.com/lamm-mit/SciAgentsDiscovery.git
%cd SciAgentsDiscovery
!pip install -e .
'''

In [None]:
import os

OpenAI_key=''
os.environ['OPENAI_API_KEY']=OpenAI_key

SemanticScholar_api_key = ''
os.environ['SEMANTIC_SCHOLAR_API_KEY']=SemanticScholar_api_key

data_dir_output='./graph_giant_component_LLMdiscovery_example/'

In [None]:
from huggingface_hub import hf_hub_download

graph_name = 'large_graph_simple_giant.graphml'
hf_hub_download(
    repo_id='lamm-mit/bio-graph-1K',
    filename=graph_name,
    local_dir='./graph_giant_component'
)

from huggingface_hub import hf_hub_download
embedding_name = 'embeddings_simple_giant_ge-large-en-v1.5.pkl'
hf_hub_download(
    repo_id='lamm-mit/bio-graph-1K',
    filename=embedding_name,
    local_dir='./graph_giant_component'
)

In [None]:
from ScienceDiscovery import *
make_dir_if_needed(data_dir_output)

## Research idea generation using the automated multi-agent model

In [None]:
res = user.initiate_chat(recipient=manager,
message='''Develop a research proposal using random concepts. In the end, rate the novelty and feasibility of the research idea.''',
                        clear_history=True)

### Saving the output

In [None]:
formatted_text = ""
formatted_text_summary = ""
for i in range(len(res.chat_history)):
    try:
        formatted_text += f'''{res.chat_history[i]['tool_calls'][0]['function']['name']}-{res.chat_history[1]['tool_calls'][0]['function']['arguments']}\n\n'''
    except:
        if i==0:
            formatted_text += '### ' + f'''{res.chat_history[i]['content']}\n\n'''
        else:
            formatted_text += f'''{res.chat_history[i]['content']}\n\n'''
            if re.search("Summary of the Initial Research Hypothesis", f'''{res.chat_history[i]['content']}'''):
                formatted_text_summary += f'''{res.chat_history[i]['content']}'''

text_markdown = Markdown(formatted_text)

markdown_to_pdf(formatted_text, 'output_research')

In [None]:
from typing import List, Tuple
from ScienceDiscovery.utils import ThompsonSamplingBandit, parse_scores, create_path
from ScienceDiscovery.graph import G, embedding_tokenizer, embedding_model, node_embeddings
from ScienceDiscovery.agents import rate_novelty_feasibility

alphas = [i / 10 for i in range(11)]
ks = list(range(1, 11))

bandit = ThompsonSamplingBandit(alphas, ks)

# (alpha, k, novelty, feasibility)
history: List[Tuple[float, int, float, float]] = []

for _ in range(20):
    arm = bandit.select_arm()

    _, path_string = create_path(
        G,
        embedding_tokenizer,
        embedding_model,
        node_embeddings,
        generate_graph_expansion=None,
        randomness_factor=arm.alpha,
        num_random_waypoints=arm.k,
        shortest_path=False,
        second_hop=False,
        data_dir='./',
        save_files=False,
        verbatim=True,
        keyword_1='silk',
        keyword_2='energy-intensive'
    )

    result = rate_novelty_feasibility(path_string)
    novelty, feasibility = parse_scores(result)
    reward = 0.5 * (novelty / 10) + 0.5 * (feasibility / 10)

    arm.update(reward)
    history.append((arm.alpha, arm.k, novelty, feasibility))

for i, (a, k, n, f) in enumerate(history):
    print(f"Round {i+1:>2}: α={a:.1f}, k={k:<2d} → Novelty={n:.1f}, Feasibility={f:.1f}")
