In [None]:
%pip install llama-index-llms-openai
!pip install llama-index

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


In [None]:
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# Configure Ollama LLM
ollama_llm = Ollama(
    model="llama3.2:latest",
    base_url="http://localhost:11434",
    temperature=0.1
)

# Configure embedding model
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

Settings.llm = ollama_llm
Settings.embed_model = ollama_embedding

In [None]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=['../data/paul_graham_essay3.txt']).load_data()
# documents = SimpleDirectoryReader(input_files=['../data/2022 Q3 AAPL.pdf']).load_data()

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_documents(documents, embed_model=ollama_embedding)


In [None]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=3
)

In [None]:
from time import time

In [None]:
query_engine = vector_index.as_query_engine(
    similarity_top_k=10, node_postprocessors=[rerank]
)

now = time()
response = query_engine.query(
    "Which grad schools did the author apply for and why?",
)
print(f"Elapsed: {round(time() - now, 2)}s")

In [None]:

print(response)

In [None]:
print(response.get_formatted_sources(length=200))


In [None]:
query_engine = vector_index.as_query_engine(similarity_top_k=10)


now = time()
response = query_engine.query(
    "Which grad schools did the author apply for and why?",
)

print(f"Elapsed: {round(time() - now, 2)}s")

In [None]:
print(response)


In [None]:
print(response.get_formatted_sources(length=200))


In [None]:
query_engine = vector_index.as_query_engine()


In [None]:
response = query_engine.query("How much of profit and sales growth?")
#response = query_engine.query("In what all companies did the author worked?")


In [None]:
print(response)

In [None]:
tree_summarize_query_engine = vector_index.as_query_engine(response_mode="tree_summarize")
response = tree_summarize_query_engine.query("How much of profit and sales growth?")
#response = tree_summarize_query_engine.query("In what all companies did the author worked?")
print("Tree Summarize Response:")
print(response)

In [None]:
from llama_index.core.response_synthesizers.type import ResponseMode
print(ResponseMode.__members__)

In [None]:
from llama_index.core.response_synthesizers.type import ResponseMode
# tree_summarize_query_engine = vector_index.as_query_engine(response_mode="simple_summarize", verbose=True)
refine_query_engine = vector_index.as_query_engine(response_mode=ResponseMode.REFINE)

response = refine_query_engine.query("In what all companies did the author worked?")
print("Different Response Modes:")
print(response)