## Embedding/Indexing

### Embedding

In [None]:
%pip install -Uq llama-index-embeddings-huggingface

In [2]:
from llama_index.core import Settings, SimpleDirectoryReader
documents = SimpleDirectoryReader(input_dir="../data/json/").load_data()

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-m3")

In [4]:
embeddings = embed_model.get_text_embedding("這是一堂關於LlamaIndex的教學課程")
print(len(embeddings))
print(embeddings[:5])

1024
[-0.09537022560834885, -0.0135239502415061, -0.012298086658120155, -0.023206280544400215, -0.01730787567794323]


### Indexing

In [5]:
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage

Local Disk

In [6]:
storage_path = "./storage/"

In [7]:
# Persist into local disk
index = VectorStoreIndex.from_documents(
    documents,
    embed_model=embed_model
)
index.storage_context.persist(persist_dir=storage_path)

In [8]:
# Restore from local disk
storage_context = StorageContext.from_defaults(persist_dir=storage_path)
loaded_index = load_index_from_storage(storage_context=storage_context, embed_model=embed_model)

In [9]:
loaded_index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x1692d7350>

Chroma

In [None]:
%pip -q install chromadb
%pip install -q llama-index-vector-stores-chroma

In [11]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

chroma_storage_path = "./chroma_db"
chroma_collection_name = "demo"

In [12]:
# save to disk
db = chromadb.PersistentClient(path=chroma_storage_path)
chroma_collection = db.get_or_create_collection(chroma_collection_name)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    embed_model=embed_model
)

In [13]:
# load from disk
db2 = chromadb.PersistentClient(path=chroma_storage_path)
chroma_collection = db2.get_or_create_collection(chroma_collection_name)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=embed_model,
)

In [14]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x169622190>