In [None]:
import os
import dotenv # Hide the api token


os.environ["HUGGINGFACEHUB_API_TOKEN"] = "YOUR_API_TOKEN"

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from transformers import pipeline

# Load the document
documents = SimpleDirectoryReader(input_files=["./data/alice.md"]).load_data() # Load 'alice.md' from data folder



In [None]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

# Load the LLM
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
    huggingfacehub_api_token="YOUR_API_TOKEN",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
)

# Load the embeddings
embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key="YOUR_API_TOKEN",
    model_name="sentence-transformers/all-MiniLM-l6-v2"
)

# Set the settings of the index
Settings.embed_model = embeddings
Settings.llm = llm

In [None]:
# Create the index
index = VectorStoreIndex.from_documents(documents)

In [None]:
# Query the index
query_engine = index.as_query_engine()
en_response = query_engine.query("your query here")
print(en_response)