In [10]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv

load_success = load_dotenv()

# Langchain Example

In [11]:
# Simple translation pipeline using LangChain and OpenAI

# Language
# Input Text

# Transalated Text

# 1. Create LLM Object
# 2. Build a Prompt Template
# 3. Compose the prompt and model
# 4. Execute the chain

# 1. Create an LLM client object and we are going to use gpt 4 model
llm = ChatOpenAI(model="gpt-4o-mini")

# 2. Build a PromptTemplate.
prompt = PromptTemplate(
    input_variables=["text", "language"],
    template="Translate the following sentence into {language}:\n\n{text}",
)

# 3. Compose the prompt and model into a chain using the pipe operator.
chain = prompt | llm

# 4. Execute the chain.
text = "Good morning, Whats going on??"
language = "French"

result = chain.invoke({"text": text, "language": language})

print("--- Translated Text ---")
print(result.content)

--- Translated Text ---
Bonjour, que se passe-t-il ?


# Types of Prompt

In [12]:
# Zero-Shot Prompting
# The model performs a task without seeing any examples.
# You just give an instruction — the model uses its internal knowledge to answer.
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

input_prompt="""
Classify the sentiment of the following text as 'positive' or 'negative'.
Do not generate any additional texts or explainations, return only the sentiment.
Your output should be one of the following:["positive",negative"]
Text: {query}
Sentiment:
"""
query = ""

prompt = PromptTemplate(
    input_variables=["query"],
    template=input_prompt,
)
chain = prompt | llm

result = chain.invoke({"query": query})

print("--- Output ---")
print(result.content)

--- Output ---
negative


In [13]:
# One-Shot Prompting
# You provide one example of how the task should be done before asking for the actual output.
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

input_prompt="""
Classify the sentiment of the following text as 'positive' or 'negative'.
Do not generate any additional texts or explainations, return only the sentiment.
Your output should be one of the following:["positive", "negative"]

Example 01
Text: The film was good but hate the villain, I hate vaccations, The film was good, but i dont like the climax, 
Sentiment: Positive

Text:{query} Sentiment:
"""
query = ""

prompt = PromptTemplate(
    input_variables=["query"],
    template=input_prompt,
)
chain = prompt | llm

result = chain.invoke({"query": query})

print("--- Output ---")
print(result.content)

--- Output ---
Negative


In [14]:
# Few-Shot Prompting
# You provide one example of how the task should be done before asking for the actual output.

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

input_prompt="""
Classify the sentiment of the following text as 'positive' or 'negative'.
Do not generate any additional texts or explainations, return only the sentiment.
Your output should be one of the following:["positive",negative"]
Example 01
Text: The film was good but hate the villain # I hate vaccations, The film was good, but i dont like the climax, 
Sentiment: Positive
Example 02
Text: I hate vaccations
Sentiment: Positive

Text:{query} Sentiment:
"""
query = ""

prompt = PromptTemplate(
    input_variables=["query"],
    template=input_prompt,
)
chain = prompt | llm

result = chain.invoke({"query": query})

print("--- Output ---")
print(result.content)

--- Output ---
Negative


In [15]:
# Chain of Thought Prompting
# You ask the model to “think step-by-step” before answering. Helps with reasoning, math, and logical tasks.
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

input_prompt="""
If a train leaves at 3PM and arrives at 6PM, how long was the journey?
Let's think step by step.
"""
query = ""

prompt = PromptTemplate(
    input_variables=["query"],
    template=input_prompt,
)
chain = prompt | llm

result = chain.invoke({"query": query})

print("--- Output ---")
print(result.content)

--- Output ---
To determine the duration of the train journey, we can follow these steps:

1. **Identify the departure time**: The train leaves at 3 PM.
2. **Identify the arrival time**: The train arrives at 6 PM.
3. **Calculate the time difference**: 

   - From 3 PM to 4 PM is 1 hour.
   - From 4 PM to 5 PM is another hour (totaling 2 hours).
   - From 5 PM to 6 PM is one more hour (totaling 3 hours).

So, the total journey time is 3 hours.

Therefore, the journey lasted **3 hours**.


# RAG Retrieval-Augmented Generation

In [16]:
# Indexing
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings


# 1. Load PDF
pdf_path = "./pdf/pondicherry.pdf"
loader = PyPDFLoader(pdf_path)
docs = loader.load()

# 2. Split into chunks
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    separators=["\n\n", "\n", " ", ""],
)
chunks = splitter.split_documents(docs)

# 3. Create embeddings & store in Chroma
embeddings = OpenAIEmbeddings()
vector_db = Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_db")

In [17]:
# Retrieval and Generation
question = "What are the main attractions in Pondicherry?"

# 1. Load Chroma DB
embeddings = OpenAIEmbeddings()
vector_db = Chroma(
    persist_directory="./chroma_db",
    embedding_function=embeddings
)

# 2. Retrieve relevant chunks
retrieved_docs = vector_db.similarity_search(question ,k=2)

# 3. Combine retrieved text into context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# 4. Create the system + user message
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

# 5. Create ChatOpenAI and call it
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# 6. Build the message sequence manually
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
]

response = llm.invoke(messages)

print("-" * 50)
print(f"Question: {question}")
print("-" * 50)
print(f"Answer: {response.content}")
print("-" * 50)
print(f"Source documents used: {len(response.content)} chunks")


--------------------------------------------------
Question: What are the main attractions in Pondicherry?
--------------------------------------------------
Answer: The main attractions in Pondicherry include the old churches built in the 18th and 19th centuries, heritage buildings and monuments around Promenade Beach, and the Puducherry Botanical Gardens. Notable sites also include the Sri Manakula Vinayagar Temple, French War Memorial, and the Pondicherry Museum. Additionally, the Chunnambar Backwater resort offers a tropical experience along with various parks and statues.
--------------------------------------------------
Source documents used: 417 chunks
