In [1]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader('./assets/', filename_as_id=True).load_data()

In [4]:
import chromadb
chroma_clint = chromadb.EphemeralClient()
chroma_collection = chroma_clint.create_collection("llama2")

In [7]:
from llama_index.vector_stores import ChromaVectorStore
from llama_index import VectorStoreIndex
from llama_index.storage.storage_context import StorageContext

In [9]:
db = chromadb.PersistentClient(path='./storage/chroma')
chroma_collection =db.get_or_create_collection("llama2")

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

In [14]:
from llama_index.embeddings import OpenAIEmbedding

# chroma collection 기본 사용법
embed_model = OpenAIEmbedding()
search_text = "llama2"
embedding = embed_model.get_text_embedding(search_text)

# chroma collection에서 검색할 수 있음. 2개의 결과를 찾음
results = chroma_collection.query(
    query_embeddings = [embedding],
    n_results = 2
)
print(results)

{'ids': [['a18cb57d-4d5e-499f-b109-6db32bf97675', '68a3be7b-8fa0-4a8c-9aef-f881f4c94981']], 'distances': [[0.2875857353210449, 0.289158707777537]], 'metadatas': [[{'_node_content': '{"id_": "a18cb57d-4d5e-499f-b109-6db32bf97675", "embedding": null, "metadata": {"page_label": "1", "file_name": "llama2.pdf", "file_path": "assets/llama2.pdf", "file_type": "application/pdf", "file_size": 13661300, "creation_date": "2024-02-05", "last_modified_date": "2023-12-16", "last_accessed_date": "2024-02-05"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "assets/llama2.pdf_part_0", "node_type": "4", "metadata": {"page_label": "1", "file_name": "llama2.pdf", "file_path": "assets/llama2.pdf", "file_type": "application/pdf", "file_size": 13661300, "cre

In [10]:
query_engine = index.as_query_engine()

In [11]:
response = query_engine.query('What is llama2?')
print(response)

Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. These models, specifically the Llama 2-Chat models, are optimized for dialogue use cases and have demonstrated competitiveness with existing open-source chat models. The developers of Llama 2 have provided a detailed description of their approach to fine-tuning and safety improvements, with the aim of enabling the community to build on their work and contribute to the responsible development of LLMs.
