In [1]:
import os 
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
load_dotenv()

True

In [2]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [3]:
llm = ChatGroq(
    model = "llama-3.1-8b-instant",
    temperature=0
)

In [4]:
parser = StrOutputParser()

In [35]:
def chat(user_input):
    prompt_template = ChatPromptTemplate.from_messages(
        [
            ("system" , "You are a helpful assistant . Be concise and accurate"),
            ("user" , "{user_input}")
        ]
    )

    chain = prompt_template | llm | parser
    response = chain.invoke({"user_input": user_input})
    return response



In [37]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Exiting chat. Goodbye!")
        break

    response = chat(user_input)
    print("Bot:", response)
    print("_" * 80)

Bot: **Self-Attention Mechanism**

The self-attention mechanism is a key component of transformer models, introduced in the paper "Attention Is All You Need" by Vaswani et al. in 2017. It allows the model to weigh the importance of different input elements relative to each other.

**How it works**

Given a sequence of vectors `x = [x1, x2, ..., xn]`, the self-attention mechanism computes a weighted sum of these vectors, where the weights are learned based on the similarity between the vectors.

Here's a step-by-step breakdown:

1. **Query (Q)**: Compute a query vector `q` for each input vector `xi`.
2. **Key (K)**: Compute a key vector `k` for each input vector `xi`.
3. **Value (V)**: Compute a value vector `v` for each input vector `xi`.
4. **Attention weights**: Compute the attention weights `a` by taking the dot product of `q` and `k`, and applying a softmax function.
5. **Weighted sum**: Compute the weighted sum of the value vectors `v` using the attention weights `a`.

**Mathemati