## Testing out the environment

In [70]:
import langchain
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
from pprint import pprint

In [2]:
load_dotenv()  # Load environment variables from a .env file if present

True

In [3]:
langchain.__version__

'0.3.27'

In [4]:
gemini_flash_model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [26]:
model = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2,
)

In [27]:
model.invoke("what is the capital of india").content

'The capital of India is **New\u202fDelhi**.'

In [7]:
gemini_flash_model.invoke("what is the capital of india").content

'The capital of India is **New Delhi**.'

### Embedding model

In [None]:
embedding_model = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
embeddings = embedding_model.embed_query(text="What's our Q1 revenue?", output_dimensionality=10)

[-0.03572908416390419,
 0.014558478258550167,
 0.011592254973948002,
 -0.08969993889331818,
 -0.009068180806934834,
 0.013664662837982178,
 0.011340967379510403,
 -0.005701108369976282,
 -0.027033332735300064,
 3.775993536692113e-05]

In [16]:
from langchain_huggingface import HuggingFaceEmbeddings

In [19]:
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={"device": "cpu"},
)
embeddings = embedding_model.embed_query(text="What's our Q1 revenue?")

In [22]:
len(embeddings)

384

## Langchain Prompts

### Basic Prompt template

In [61]:
from langchain_core.prompts import PromptTemplate, load_prompt

In [62]:
template = PromptTemplate(
    template="""
    You are a helpful assistant that can generate a short report on the topic: {paper_input}
    The report should be in the style of {style_input} and the length should be {length_input}
    """,
    input_variables=["paper_input", "style_input", "length_input"]
)

template.save("./prompt_templates/template.json")

In [63]:
template = load_prompt("./prompt_templates/template.json")

In [64]:
prompt = template.format(
        paper_input="Attention is all you need", style_input="Beginner-Friendly", length_input="Short (1-2 paragraphs)"
    )

In [67]:
print (prompt)

### Messages


    You are a helpful assistant that can generate a short report on the topic: Attention is all you need
    The report should be in the style of Beginner-Friendly and the length should be Short (1-2 paragraphs)
    


### Messages

In [49]:
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [35]:
messages = [
    SystemMessage(content="You are a helpful assistant that can answer questions and help with tasks."),
    HumanMessage(content="What is the capital of France?"),
]

result = model.invoke(messages)

result.content

'The capital of France is **Paris**.'

### Dynammic list of messages

In [39]:
chat_template = ChatPromptTemplate(messages=[
    SystemMessage(content="You are a helpful assistant who is an expert in the domain: {domain}"),
    HumanMessage(content="Explain the topic in simple terms: {topic}"),
])

In [42]:
prompt = chat_template.invoke({"domain": "AI", "topic": "Self Attention"})

print(prompt)

messages=[SystemMessage(content='You are a helpful assistant who is an expert in the domain: {domain}', additional_kwargs={}, response_metadata={}), HumanMessage(content='Explain the topic in simple terms: {topic}', additional_kwargs={}, response_metadata={})]


### The above does not work

In [47]:
chat_template = ChatPromptTemplate(messages=[
    ("system", "You are a helpful assistant who is an expert in the domain: {domain}"),
    ("human", "Explain the topic in simple terms in 3-5 sentences: {topic}"),
])
prompt = chat_template.invoke({"domain": "AI", "topic": "Self Attention"})

print(prompt)

messages=[SystemMessage(content='You are a helpful assistant who is an expert in the domain: AI', additional_kwargs={}, response_metadata={}), HumanMessage(content='Explain the topic in simple terms in 3-5 sentences: Self Attention', additional_kwargs={}, response_metadata={})]


In [48]:
result = model.invoke(prompt)
print(result.content)


Self‑attention is a way for a model to look at all the words in a sentence at once and decide how much each word should influence every other word.  
For each word, the model creates three vectors—query, key, and value—then compares the query of one word with the keys of all words to get a “similarity score.”  
These scores are turned into weights (via a softmax) that say how much attention each word should give to the others, and the weighted sum of the value vectors gives the new representation for that word.  
Because every word can attend to every other word, the model captures long‑range relationships and context without needing to process the sentence sequentially.  
This mechanism is the core of transformer models, enabling them to understand and generate language efficiently.


### Message Placeholder

In [72]:
history_messages = [
    HumanMessage(content="I want to request a refund for my order #12345."),
    AIMessage(content="Your refund request for order #12345 has been initiated. It will be processed in 3-5 business days.")
]

In [75]:
history_messages[0].content


'I want to request a refund for my order #12345.'

In [71]:
chat_template = ChatPromptTemplate(messages=[
    ("system", "You are a helpful assistant that can answer questions and help with tasks."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{query}"),
])

prompt = chat_template.invoke({"query": "how many day again?", "chat_history": history_messages})

pprint(prompt)

ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant that can answer questions and help with tasks.', additional_kwargs={}, response_metadata={}), HumanMessage(content='I want to request a refund for my order #12345.', additional_kwargs={}, response_metadata={}), AIMessage(content='Your refund request for order #12345 has been initiated. It will be processed in 3-5 business days.', additional_kwargs={}, response_metadata={}), HumanMessage(content='how many day again?', additional_kwargs={}, response_metadata={})])


In [57]:
response = model.invoke(prompt)
print(response.content)

It will take **3–5 business days** to complete the refund.  
That means the processing time is counted only on weekdays (Monday‑Friday), excluding public holidays. If you have any more questions, just let me know!
