In [None]:
%pip install llama-index-llms-openai
!pip install llama-index

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

import nest_asyncio
nest_asyncio.apply()


In [2]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    load_index_from_storage,
    StorageContext,
)
from IPython.display import Markdown, display

In [None]:
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# Configure Ollama LLM
ollama_llm = Ollama(
    model="llama3.2:latest",
    base_url="http://localhost:11434",
    temperature=0.1
)

# Configure embedding model
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

Settings.llm = ollama_llm
Settings.embed_model = ollama_embedding

In [4]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass("open ai api key: ")



In [5]:
from llama_index.llms.openai import OpenAI
llm = OpenAI(model="gpt-3.5-turbo")

In [None]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=['../data/paul_graham_essay3.txt']).load_data()
# documents = SimpleDirectoryReader(input_files=['../data/2022 Q3 AAPL.pdf']).load_data()

In [None]:
from llama_index.core import VectorStoreIndex

# vector_index = VectorStoreIndex.from_documents(documents, embed_model=ollama_embedding)
vector_index = VectorStoreIndex.from_documents(documents)


In [6]:
# Define prompt strings
qa_prompt_str = (
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the question: {query_str}\n"
)

refine_prompt_str = (
    "We have the opportunity to refine the original answer "
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{context_msg}\n"
    "------------\n"
    "Given the new context, refine the original answer to better "
    "answer the question: {query_str}. "
    "If the context isn't useful, output the original answer again.\n"
    "Original Answer: {existing_answer}"
)

In [9]:
from llama_index.core import Settings, ChatPromptTemplate


# Create ChatPromptTemplates using the `from_messages` method
chat_text_qa_msgs = [
    ("system", "Always answer the question, even if the context isn't helpful."),
    ("user", qa_prompt_str),
]
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

chat_refine_msgs = [
    ("system", "Always answer the question, even if the context isn't helpful."),
    ("user", refine_prompt_str),
]
refine_template = ChatPromptTemplate.from_messages(chat_refine_msgs)

In [None]:
# Query without templates
print(vector_index.as_query_engine(llm=ollama_llm).query("Who is Joe Biden?"))


In [None]:
# Query with custom templates
response = vector_index.as_query_engine(
    text_qa_template=text_qa_template,
    refine_template=refine_template,
    llm=ollama_llm,
).query("Who is Joe Biden?")
print(response)