In [1]:
from langchain.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.llms import OpenAI
from langchain_ollama import ChatOllama
from langchain.vectorstores import Chroma
from dotenv import load_dotenv
import os

In [2]:
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [3]:
RAG_SYSTEM_TEMPLATE = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, say that you don't know. Don't Make Up Anything.
"""

In [28]:
SYSTEM_TEMPLATE = """
You only answer with the capital of the country asked about.
The answer MUST be in this exact format:
"The capital of <country> is <capital>."
If you don't know, ONLY say: I don't know.
Do not write anthing else, only the answer.
Here is an example of the question and answer:
Question: What is the capital of Latvia?    
Answer: The capital of Latvia is Riga.
Here is another example:
Question: What is the capital of Japan?
Answer: The capital of Japan is Tokyo.
Do not explain, apologize, or add anything else.
Do not say anything before or after the answer.
One question, one answer.
"""

In [29]:
ASSISTANT_TEMPLATE = """
The capital of Latvia is Riga.
"""

In [30]:
prompt_template = ChatPromptTemplate.from_messages([
    ("system", SYSTEM_TEMPLATE),
    ("user", "{query}"),
    ("assistant", ASSISTANT_TEMPLATE)
])

In [31]:
prompt_template.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='\nYou only answer with the capital of the country asked about.\nThe answer MUST be in this exact format:\n"The capital of <country> is <capital>."\nIf you don\'t know, ONLY say: I don\'t know.\nDo not write anthing else, only the answer.\nHere is an example of the question and answer:\nQuestion: What is the capital of Latvia?    \nAnswer: The capital of Latvia is Riga.\nHere is another example:\nQuestion: What is the capital of Japan?\nAnswer: The capital of Japan is Tokyo.\nDo not explain, apologize, or add anything else.\nDo not say anything before or after the answer.\nOne question, one answer.\n'), additional_kwargs={}),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='{query}'), additional_kwargs={}),
 AIMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partia

In [None]:
llm = ChatOllama(model="deepseek-r1:7b", base_url="http://localhost:11434")
# llm = OpenAI(model="gpt-4o-mini", temperature=0)

In [42]:
pipeline = prompt_template | llm

In [43]:
def ask(query):
    """Ask a question to the pipeline."""
    answer = pipeline.invoke({"query": query})
    return answer

In [45]:
ask("What is the capital of China?")

AIMessage(content=' \n(I made a mistake!)\nThe capital of China is Beijing.', additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-06-26T18:49:00.7552189Z', 'done': True, 'done_reason': 'stop', 'total_duration': 359393700, 'load_duration': 13975700, 'prompt_eval_count': 177, 'prompt_eval_duration': 15246200, 'eval_count': 14, 'eval_duration': 329651400, 'model_name': 'llama3.2'}, id='run--3b570804-ff55-45b3-8850-0131f1fb187c-0', usage_metadata={'input_tokens': 177, 'output_tokens': 14, 'total_tokens': 191})