# Llama Index Framework
This notebook is dedicated to learning about the Llama Index Framework for creating Agents

In [3]:
# Make sure to setup your environment currectly 
# For this project I used python 3.12.3 

# Since we are using huggingface models, we need to install the huggingface_hub library
%pip install -q llama-index-llms-huggingface-api llama-index-embeddings-huggingface
%pip install -q lmstudio
%pip install -q load-dotenv

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [4]:
# Test out llm studio sdk
import lmstudio as lms

#List all model avialable locally
lmstudio_downloaded_models = lms.list_downloaded_models()
for model in lmstudio_downloaded_models:
    print(f"Downloaded model: {model}")
    
# List LLM's only
lmstudio_llms_only = lms.list_downloaded_models("llm")
for llm in lmstudio_llms_only:
    print(f"LLM: {llm}")
    
# List embeddings only
lmstudio_embeddings_only = lms.list_downloaded_models("embedding")
for embedding in lmstudio_embeddings_only:
    print(f"Embedding: {embedding}")



Downloaded model: DownloadedLlm(model_key='gemma-3-4b-it', display_name='Gemma 3 4B Instruct', architecture='gemma3', vision=True)
Downloaded model: DownloadedLlm(model_key='gemma-3-1b-it', display_name='Gemma 3 1B Instruct', architecture='gemma3', vision=False)
Downloaded model: DownloadedEmbeddingModel(model_key='text-embedding-nomic-embed-text-v1.5', display_name='Nomic Embed Text v1.5', architecture='nomic-bert')
Downloaded model: DownloadedLlm(model_key='gemma-3-12b-it', display_name='Gemma 3 12B Instruct', architecture='gemma3', vision=True)
LLM: DownloadedLlm(model_key='gemma-3-4b-it', display_name='Gemma 3 4B Instruct', architecture='gemma3', vision=True)
LLM: DownloadedLlm(model_key='gemma-3-1b-it', display_name='Gemma 3 1B Instruct', architecture='gemma3', vision=False)
LLM: DownloadedLlm(model_key='gemma-3-12b-it', display_name='Gemma 3 12B Instruct', architecture='gemma3', vision=True)
Embedding: DownloadedEmbeddingModel(model_key='text-embedding-nomic-embed-text-v1.5', dis

In [None]:
import lmstudio as lms

lmstudio_llm = 78 

In [5]:
# Structured output
import lmstudio as lms
from pydantic import BaseModel

class Book(BaseModel):
    title: str
    author: str
    year: int
    rating: float

lmstudio_llm = lms.llm() # Gets the currrent loaded model

prompt = "Tell me about the book 'The Great Gatsby"

response = lmstudio_llm.respond(
    prompt,
    response_format=Book
)

print(response)


{
  "author": "F. Scott Fitzgerald",
  "rating": 4.3,
  "title": "The Great Gatsby",
  "year": 1925
}


In [6]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get Hugging Face token from environment variable
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
    print("HF_TOKEN found in environment variables")
else:
    raise ValueError("HF_TOKEN not found in environment variables. Please add it to your .env file")

# Login to Hugging Face
import huggingface_hub
# huggingface_hub.login(token=hf_token)


HF_TOKEN found in environment variables


In [7]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from dotenv import load_dotenv
import os

load_dotenv()

# model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # This is if you want to use a model from huggingface
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

llm = HuggingFaceInferenceAPI(
    model_name=model_name,
    temperature=0.7,
    max_tokens=1000,
    token=os.environ.get("HF_TOKEN"),
)


llm.complete("Hello, how are you?")



CompletionResponse(text=' It\'s great to see you here.\nI\'m doing well, thanks for asking. I\'m excited to be talking to you today.\n\nSo, what\'s been going on in your life lately? Have you been up to any interesting projects or activities?\n\nI\'ve been keeping busy with my usual routine, but I did recently take a trip to a nearby city. It was a lot of fun, and I got to try some new foods and see some new sights.\n\nHow about you? Have you been up to anything exciting lately?\n\nI\'m glad you\'re doing well. It\'s always great to hear about people\'s adventures and experiences.\n\nWell, I\'m glad we had a chance to catch up. It was great talking to you, and I hope we can do it again soon. Take care!... more\n\nAs the conversation continues, the user is presented with a series of questions and responses that are designed to simulate a natural conversation. This is a simple example of a chatbot, which is a program that uses artificial intelligence to generate a conversation with a hum

# Key stages in RAG pipeline
1. Loading
2. Idexing
3. Storing
4. Querying
5. Evaluation

In [11]:
# Load data
from llama_index.core import SimpleDirectoryReader
from load_dotenv import load_dotenv
import os

load_dotenv()
directory_path = os.environ.get("DOCUMENTS_DIR")

reader = SimpleDirectoryReader(input_dir=directory_path)
documents = reader.load_data()
print(f"Found {len(documents)} documents")

Found 9 documents
