In [1]:
# Install dependencies
!pip -q install -U langchain langchain-community chromadb sentence-transformers pypdf transformers accelerate
# Optional OpenAI
%pip -q install -U openai tiktoken langchain-openai

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m64.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m73.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m67.3 MB/s[0m eta [36m0:00:0

In [2]:
import json, sys, platform, os, chromadb, transformers, sentence_transformers
try:
    import torch
    torch_v = torch.__version__
    cuda_ok = torch.cuda.is_available()
    device_name = torch.cuda.get_device_name(0) if cuda_ok else "CPU"
except:
    torch_v, cuda_ok, device_name = "N/A", False, "CPU"

env = {
    "python": sys.version,
    "platform": platform.platform(),
    "torch": torch_v,
    "cuda": cuda_ok,
    "device": device_name,
    "transformers": transformers.__version__,
    "sentence_transformers": sentence_transformers.__version__,
    "chromadb": chromadb.__version__
}
print(json.dumps(env, indent=2))
with open("env_rag.json","w") as f: json.dump(env, f, indent=2)

{
  "python": "3.12.11 (main, Jun  4 2025, 08:56:18) [GCC 11.4.0]",
  "platform": "Linux-6.1.123+-x86_64-with-glibc2.35",
  "torch": "2.8.0+cu126",
  "cuda": true,
  "device": "Tesla T4",
  "transformers": "4.56.1",
  "sentence_transformers": "5.1.0",
  "chromadb": "1.1.0"
}


In [3]:
from google.colab import files
from pathlib import Path

print("Upload your PDFs/TXTs (you can select multiple):")
uploaded = files.upload()

CORPUS_DIR = Path("corpus")
CORPUS_DIR.mkdir(exist_ok=True)
for name, data in uploaded.items():
    (CORPUS_DIR / name).write_bytes(data)
all_files = [p for p in CORPUS_DIR.iterdir()]
print("Saved files:", [p.name for p in all_files])

Upload your PDFs/TXTs (you can select multiple):


Saving NeurIPS-2024-can-large-language-model-agents-simulate-human-trust-behavior-Paper-Conference.pdf to NeurIPS-2024-can-large-language-model-agents-simulate-human-trust-behavior-Paper-Conference.pdf
Saving NeurIPS-2024-richelieu-self-evolving-llm-based-agents-for-ai-diplomacy-Paper-Conference.pdf to NeurIPS-2024-richelieu-self-evolving-llm-based-agents-for-ai-diplomacy-Paper-Conference.pdf
Saving NeurIPS-2024-mdagents-an-adaptive-collaboration-of-llms-for-medical-decision-making-Paper-Conference.pdf to NeurIPS-2024-mdagents-an-adaptive-collaboration-of-llms-for-medical-decision-making-Paper-Conference.pdf
Saved files: ['NeurIPS-2024-can-large-language-model-agents-simulate-human-trust-behavior-Paper-Conference.pdf', 'NeurIPS-2024-richelieu-self-evolving-llm-based-agents-for-ai-diplomacy-Paper-Conference.pdf', 'NeurIPS-2024-mdagents-an-adaptive-collaboration-of-llms-for-medical-decision-making-Paper-Conference.pdf']


In [4]:
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

docs = []
for p in all_files:
    ext = p.suffix.lower()
    try:
        if ext == ".pdf":
            docs.extend(PyPDFLoader(str(p)).load())
        elif ext in [".txt", ".text", ".md"]:
            docs.extend(TextLoader(str(p), encoding="utf-8").load())
        else:
            print(f"[SKIP] Unsupported file type: {p.name}")
    except Exception as e:
        print(f"[WARN] Could not read {p.name}: {e}")

if not docs:
    raise ValueError("No supported documents parsed. Please upload at least one PDF or TXT file.")

In [5]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(docs)
print("Chunks:", len(chunks))
if chunks:
    print("First chunk:\n", chunks[0].page_content[:300])

Chunks: 1066
First chunk:
 Can Large Language Model Agents Simulate
Human Trust Behavior?
Chengxing Xie∗1, 11 Canyu Chen∗2
Feiran Jia4 Ziyu Ye5 Shiyang Lai5 Kai Shu6 Jindong Gu3 Adel Bibi3 Ziniu Hu7
David Jurgens8 James Evans5, 9, 10 Philip H.S. Torr3 Bernard Ghanem1 Guohao Li †3, 11
1KAUST 2Illinois Institute of Technology 3


In [6]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma

emb = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(chunks, emb, persist_directory="chroma_minilm")
retriever = vectordb.as_retriever(search_kwargs={"k": 4})
print("Chroma DB ready")

  emb = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Chroma DB ready


In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms import HuggingFacePipeline

MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # fallback: "distilgpt2" if downloads are slow
tok = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
pipe = pipeline("text-generation", model=model, tokenizer=tok, max_new_tokens=200)
llm = HuggingFacePipeline(pipeline=pipe)
print("LLM ready:", MODEL_ID)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0


LLM ready: TinyLlama/TinyLlama-1.1B-Chat-v1.0


  llm = HuggingFacePipeline(pipeline=pipe)


In [8]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
q = "Compare how the three papers structure multi-agent behavior"
print("Q:", q)
print("A:", qa.run(q))

Q: Compare how the three papers structure multi-agent behavior


  print("A:", qa.run(q))


A: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Similar frameworks include voting [82], multi-disciplinary collaboration [ 72], group discussions
(ReConcile [10]), and negotiating [ 23]. Table 1 compares existing setups across key dimensions
in multi-agent interaction. Although these frameworks have shown improvement in the respective
tasks, they rely on a pre-determined number of agents and interaction settings. When applied on a
wider variety of tasks , this static architecture may lead to suboptimal multi-agent configurations,

Philip Paquette, Yuchen Lu, Seton Steven Bocco, Max Smith, Satya O-G, Jonathan K Kummerfeld,
Joelle Pineau, Satinder Singh, and Aaron C Courville. No-press diplomacy: Modeling multi-agent
gameplay. In Advances in Neural Information Processing Systems, volume 32, pages 4474–4485,
2019.
Siyuan Qi, Shuo Chen, Yexin Li, Xiangyu Kong, Junqi Wang, 

# Embedding Swap

In [11]:
emb_e5 = SentenceTransformerEmbeddings(model_name="intfloat/e5-small-v2")
vectordb_e5 = Chroma.from_documents(chunks, emb_e5, persist_directory="chroma_e5")
qa_e5 = RetrievalQA.from_chain_type(llm=llm, retriever=vectordb_e5.as_retriever(), chain_type="stuff")
print("MiniLM vs E5-small test:\n")
print("MiniLM:", qa.run("Define VRR in the Trust paper in one sentence."))
print("E5-small:", qa_e5.run("Define VRR in the Trust paper in one sentence."))

MiniLM vs E5-small test:

MiniLM: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

davinci
003
vicuna
13b
gpt-4 vicuna
7b
human
0
1
2
3
4
5
6
7
8
9
10Amount Sent in Trust Game($)
Human Average(5.97)
30
40
50
60
70
80
90
100
Valid Response Rate (VRR) (%)
Figure 2: Amount Sent Distribution of LLM Agents
and Humans as the Trustor in the Trust Game. The
size of circles represents the number of personas for each
amount sent. The bold lines show the medians. The
crosses indicate the VRR (%) for different LLMs.
In this section, we investigate whether

crosses indicate the VRR (%) for different LLMs.
In this section, we investigate whether
or not LLM agents manifest trust be-
havior by letting LLM agents play the
Trust Game (Section 2.1 Game 1). In
Behavioral Economics, trust is widely
measured by the initial amount sent from
the trustor to the trustee in the Trust
Game (Glaeser e

# Chunk Sensitivity

In [12]:
splitter_small = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks_small = splitter_small.split_documents(docs)
vectordb_small = Chroma.from_documents(chunks_small, emb)
qa_small = RetrievalQA.from_chain_type(llm=llm, retriever=vectordb_small.as_retriever(), chain_type="stuff")
print("Default chunks:", qa.run("List the four MDAgents stages in order."))
print("Smaller chunks:", qa_small.run("List the four MDAgents stages in order."))

Default chunks: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

across different data modalities provide insights into how quickly and effectively MDAgents can
reach a unified decision.
C Prompt Templates
C.1 A single agent setting
{{instruction}}
The following are multiple choice questions (with answers) about medical knowledge.
{{few_shot_examples}}
{{context}} **Question:** {{question}} {{answer_choices}} **Answer:**(
Few-shot multiple choice questions
{{instruction}}
The following are multiple choice questions (with answers) about medical knowledge.

the performance and characteristics of our MDAgents framework.
You are a {{role}} who {{description}}. Your job is to collaborate with other medical experts in a
team.
Agent initialization prompt
Given the opinions from other medical agents in your team, please indicate whether you want to talk to
any expert (yes/no). If 

In [13]:
repro = {
    "embedding_models": ["all-MiniLM-L6-v2","intfloat/e5-small-v2"],
    "chunking": [{"size":500,"overlap":100},{"size":300,"overlap":50}],
    "llm": MODEL_ID
}
with open("rag_run_config.json","w") as f: json.dump(repro,f,indent=2)
print("Saved rag_run_config.json")

Saved rag_run_config.json


I uploaded the three neurips PDFs and ran the track A notebook. First run was miniLM + a small local HF model. Then I swapped to e5-small, which gave a cleaner one line VRR definition. Also tried chunk 500/100 vs 300/50 smaller chunks sometimes pulled appendix/template bits, while the default chunks worked better for listing the four MDAgents stages. Saved env_rag.json and rag_run_config.json. main takeaway: embeddings + chunking decide what gets retrieved, so the answers change with those settings always peek at the source snippets.