In [1]:
%%capture
! pip install -q --upgrade llama-index llama-index llama-index-embeddings-huggingface sentence-transformers

In [2]:
%%capture
!pip install llama-index-llms-llama-cpp
!pip install llama-index-vector-stores-chroma

In [3]:
## get the data
!wget "https://openreview.net/pdf?id=VtmBAGCN7o" -O metagpt.pdf

--2024-06-26 08:55:00--  https://openreview.net/pdf?id=VtmBAGCN7o
Resolving openreview.net (openreview.net)... 35.184.86.251
Connecting to openreview.net (openreview.net)|35.184.86.251|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16911937 (16M) [application/pdf]
Saving to: ‘metagpt.pdf’


2024-06-26 08:55:01 (42.6 MB/s) - ‘metagpt.pdf’ saved [16911937/16911937]



In [11]:
from llama_index.core.settings import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core import SimpleDirectoryReader
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import LongContextReorder

In [12]:
llm  = LlamaCPP(
   model_url='https://huggingface.co/bartowski/Llama-3-8B-Instruct-Gradient-1048k-GGUF/resolve/main/Llama-3-8B-Instruct-Gradient-1048k-Q5_K_S.gguf',
   model_path=None,
   temperature=0.1,
   max_new_tokens=256,
   context_window=3900,
   generate_kwargs={},
   model_kwargs={"n_gpu_layers":-1},
   verbose=True
)
Settings.llm = llm
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

documents = SimpleDirectoryReader(input_files=['metagpt.pdf']).load_data()

Downloading url https://huggingface.co/bartowski/Llama-3-8B-Instruct-Gradient-1048k-GGUF/resolve/main/Llama-3-8B-Instruct-Gradient-1048k-Q5_K_S.gguf to path /tmp/llama_index/models/Llama-3-8B-Instruct-Gradient-1048k-Q5_K_S.gguf
total size (MB): 5599.29


5340it [02:15, 39.54it/s]                          
llama_model_loader: loaded meta data with 26 key-value pairs and 291 tensors from /tmp/llama_index/models/Llama-3-8B-Instruct-Gradient-1048k-Q5_K_S.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Llama-3-8B-Instruct-Gradient-1048k
llama_model_loader: - kv   2:                          llama.block_count u32              = 32
llama_model_loader: - kv   3:                       llama.context_length u32              = 1048576
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.att

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [13]:
print((documents[0].text))

Preprint
METAGPT: M ETA PROGRAMMING FOR A
MULTI -AGENT COLLABORATIVE FRAMEWORK
Sirui Hong1∗, Mingchen Zhuge2∗, Jonathan Chen1, Xiawu Zheng3, Yuheng Cheng4,
Ceyao Zhang4,Jinlin Wang1,Zili Wang ,Steven Ka Shing Yau5,Zijuan Lin4,
Liyang Zhou6,Chenyu Ran1,Lingfeng Xiao1,7,Chenglin Wu1†,J¨urgen Schmidhuber2,8
1DeepWisdom,2AI Initiative, King Abdullah University of Science and Technology,
3Xiamen University,4The Chinese University of Hong Kong, Shenzhen,
5Nanjing University,6University of Pennsylvania,
7University of California, Berkeley,8The Swiss AI Lab IDSIA/USI/SUPSI
ABSTRACT
Remarkable progress has been made on automated problem solving through so-
cieties of agents based on large language models (LLMs). Existing LLM-based
multi-agent systems can already solve simple dialogue tasks. Solutions to more
complex tasks, however, are complicated through logic inconsistencies due to
cascading hallucinations caused by naively chaining LLMs. Here we introduce
MetaGPT, an innovative meta-programm

In [14]:
parser = SentenceSplitter(chunk_size = 128, chunk_overlap = 20)
nodes = parser.get_nodes_from_documents(documents)

print(f"Created {len(nodes)} nodes from {len(documents)} documents")

Created 223 nodes from 29 documents


In [15]:
#Let's print a few example nodes to get a feeling for what the Node Parser has done.
for i in range(3):
    print(f"Chunk {i}:")
    print("Text:")
    print(nodes[i].text)

Chunk 0:
Text:
Preprint
METAGPT: M ETA PROGRAMMING FOR A
MULTI -AGENT COLLABORATIVE FRAMEWORK
Sirui Hong1∗, Mingchen Zhuge2∗, Jonathan Chen1, Xiawu Zheng3, Yuheng Cheng4,
Ceyao Zhang4,Jinlin Wang1,Zili Wang ,Steven Ka Shing Yau5,Zijuan Lin4,
Liyang Zhou6,Chenyu Ran1,Lingfeng Xiao1,7,
Chunk 1:
Text:
Chenyu Ran1,Lingfeng Xiao1,7,Chenglin Wu1†,J¨urgen Schmidhuber2,8
1DeepWisdom,2AI Initiative, King Abdullah University of Science and Technology,
3Xiamen University,4The Chinese University of Hong Kong, Shenzhen,
5Nanjing University,6University of Pennsylvania,
7University of California, Berkeley,
Chunk 2:
Text:
5Nanjing University,6University of Pennsylvania,
7University of California, Berkeley,8The Swiss AI Lab IDSIA/USI/SUPSI
ABSTRACT
Remarkable progress has been made on automated problem solving through so-
cieties of agents based on large language models (LLMs). Existing LLM-based
multi-agent systems can already solve simple dialogue tasks. Solutions to more
complex tasks, however, are 

In [16]:
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb

# create client and a new collection
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("quickstart")

# Construct vector store
# set up ChromaVectorStore and load in data
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=embed_model
)

In [17]:
# Define Postprocessor
node_postprocessor = LongContextReorder(top_k=2, middle_k=1)

In [18]:
# Define QueryEngine
retriever = index.as_retriever(similarity_top_k=2)
retriever_query_engine = RetrieverQueryEngine.from_args(
    retriever, node_postprocessors=[node_postprocessor]
)

In [19]:
# Used your advanced RAG
response = retriever_query_engine.query("How do agents share information with other agents?")


llama_print_timings:        load time =  329500.20 ms
llama_print_timings:      sample time =     127.24 ms /    47 runs   (    2.71 ms per token,   369.39 tokens per second)
llama_print_timings: prompt eval time =  973731.25 ms /  1488 tokens (  654.39 ms per token,     1.53 tokens per second)
llama_print_timings:        eval time =   46421.81 ms /    46 runs   ( 1009.17 ms per token,     0.99 tokens per second)
llama_print_timings:       total time = 1020378.14 ms /  1534 tokens


In [20]:
response.response

' Agents utilize role-specific interests to extract relevant information from the shared message pool. They can select information to follow based on their role profiles. In practical implementations, an agent activates its action only after receiving all its prerequisite dependencies.</s>'