In [None]:
# Building a Chatbot

# How to design and implement an LLM based chatbot
# This chatbot will be able to have conversations and remember previous interactions

# chatbot will only use language model to have a conversation
# while other related concepts can be integrated:
# RAG -> Conversational RAG -> Enable a chatbot experience over an external source of data
# Agents -> Build a chatbot that can take actions

In [None]:
import os 
form dotenv import load_dotenv
load_dotenv() # loading all the environment variables

In [None]:
groq_api_key = os.getenv("GROQ_API_KEY")
groq_api_key

In [None]:
# Access specific groq model 
from langchain_groq import ChatGroq
model = ChatGroq(model="Gemma2-9b-It",groq_api_key=groq_api_key)
model

In [None]:
# Invoking model using HumanMessage
from langchain_core.messages import HumanMessage
model.invoke([HumanMessage(content="Hi, My Name is Anonymous and I am a chief AI Engineer")])

In [None]:
# if make converation within a list it will remember all the conversation
from langchain_core.messages import AIMessage
model.invoke([
    HumanMessage(content="Hi my name is Anonymous and I am a chief AI Engineer"),
    AIMessage(content="Hello, Anonymous! It's nice to meet you!, As a Cheif Engineer what kind of project are you working on currently "),
    HumanMessage(content="Hey What's my name and what do I do?")
])

In [None]:
# # MessageHistory 
# We can use a Message History class to wrap out model and make it stateful. 
# This will keep track of input and output of the model and store them in some datastore
# Future interactions will then load those messages and pass them into the chain as part of the input

In [None]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# Individual conversation with Individuals
# means distinguishing one chat history with the other 
# history with respect to session id will be created and this session_id will be used to distinguish one session id with the other


# check session_history is available or not if not it will create session history and store it in store{}
# otherwise it will return the previous chat history which was remembered earlier 
store = {}


def get_session_history(session_id:str)->BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


# Interact with llm model based on the chat history
with_message_history = RunnableWithMessageHistory(model,get_session_history)

In [None]:
config = {"configurable":{"session_id":"chat1"}}

In [None]:
# when given config -> that means for the specific id we are interacting
with_message_history.invoke(
    [HumanMessage(content="Hi my name is Anonymous and I am an AI Chief Engineer")],
    config=config
)

In [None]:
response.content

In [None]:
with_message_history.invoke(
    [HumanMessage(config="What is my name?")],
    config=config
)

In [None]:
# change the config -> session id 
# if passed chat1 it will remember the previous name given 
# if passed chat2 it will not remember the previous name and will say that he dont have memory of past converrsations
config1 = {"configurable":{"session_id":"chat2"}}
response = with_message_history.invoke(
    [HumanMessage(content="What's my name")],
    config=config
)
response.content

In [None]:
# check by edition the new message
response = with_message_history.invoke(
    [HumanMessage(content="Hey! My name is John Cena")],
    config=config
)
response.content

In [None]:
# check if remember or not by providing same config
response = with_message_history.invoke(
    [HumanMessage(content="What's my name")],
    config=config
)
response.content

In [None]:
# Working with Prompt Template and MessageChat History using Langchain 
# Prompt Template -> Helps turn raw user information into a format that LLM can work with
# since raw user input is just a message -> which we are passing to the LLM 
# to make it complicated 
# add system message with some custom instruction (but still taking messages as input)
# add in more input besides just messages 

In [None]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system", "You are a helpful assistant. Answer all the quuestion to the nest of your ability"
        ),
        MessagesPlaceholder(variable_name="messages"),

]
)

chain = prompt|model

In [None]:
chain.invoke({"messages": [HumanMessage(content="Hi my name is Anonymous ")]})

In [None]:
with_message_history = RunnableWithMessageHistory(chain,get_session_history)

In [None]:
config = {"configurable":{"session_id": "chat3"}}
response = with_message_history.invoke(
    [HumanMessage(content="Hi my name is Anonymous")],
    config= config
)

response.content

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all the question to the best of your ability in this {language}"
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [None]:
response = chain.invoke({"messages":[HumanMessage(content="Hi my name is Anonymous")], "language":"Hindi"})
response.content

In [None]:
# wrap this more complicated chain in a message history class
# this time because there are multiple keys in the input we need to specify the correct key to use to save the chat history

In [None]:
with_message_history = RunnableWithMessageHistory(
    chain, 
    get_session_history,
    input_messages_key="messages"
)

In [None]:
config = {"configurable": {"session_id":"chat4"}}
response = with_message_history.invoke(
    {'messages': [HumanMessage(content="Hi, I am Anonymous")],"language":"Hindi"},
    config=config
)
response.content

In [None]:
response = with_message_history.invoke({
    "messages": [HumanMessage(content="What's my name?")], "language": "Hindi"},
    config=config)

In [None]:
response.content

In [None]:
# 3 Manage the Chat Conversation History 
# How to manage conversation history
# bcoz if left unmanaged, list of messages will grow unbounded and potentially overflow the context window of the llm
# therefore it is important to add a step that limits the size of the messages you are passing in "trim_messages"

In [None]:
from langchain_core.messages import SystemMessage,trim_messages
trimmer = trim_messages(
    max_tokens = 70,                    # allow maximum tokens -> 70 
    strategy = "last",                  # count the tokens from last session 
    token_counter = model,              # token will be counted by model
    include_system = True,              # whether or not include message by system 
    allow_partial = False,             # other content
    start_on = "human"                  # count human tokens only

)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm Bob!"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no Problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]
trimmer.invoke(messages)

In [None]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

chain = (
    RunnablePassthrough.assign(messages=itemgetter("messages")|trimmer)
    | prompt
    | model 
)

response = chain.invoke(
    {
    "messages": messages + [HumanMessage(content="What ice cream do I like")],
    "language": "English"
}
)
response.content

# if not got answer -> maybe it go trimmed bcoz trimmer is passed

In [None]:
response = chain.invoke(
    {
    "messages": messages + [HumanMessage(content="what math problem did i ask")],
    "language": "English",
}
)
response.content

In [None]:
response = with_message_history.invoke( 
    {
    "messages": messages + [HumanMessage(content="what's my name?")],
    "language": "English",
},
config = config,
)
response.content

In [None]:
response = with_message_history.invoke( 
    {
    "messages": messages + [HumanMessage(content="what math problem did I ask?")],
    "language": "English",
},
config = config,
)
response.content

In [None]:
# Vector stores and Retrievers 
# Familiarize with Langchain's vector and retriever abstractions
# Abstractions are designed to support retrieval of data -> from vector databases and other sources for integrations with LLM workflows
# They are imporatant for applications that fetched data to be reasoned as part of the model inference
# as in the case of retrieval augmented generation


# Document
# Vector Stores
# Retrievers 

In [None]:
# Documents ->
# Langchain implements a document abstraction, which is intended to represent a unit of text and associated metadata
# It has two attributes: 
# page_content: a string representing the content
# metadata: a dictionary containing arbitrary metadata.
# The metadata attribute can capture information about the source of the document, its relationship to other documents
# and other information
# note that -> an individual document object often represent a chunk of a larger document

In [None]:
# How to create document 
from langchain_core.documents import Document

from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [None]:
documents

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
groq_api_key=os.getenv("GROQ_API_KEY")

os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")

llm=ChatGroq(groq_api_key=groq_api_key,model="Llama3-8b-8192")
llm

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
## VectorStores
from langchain_chroma import Chroma

vectorstore=Chroma.from_documents(documents,embedding=embeddings)
vectorstore


In [None]:
vectorstore.similarity_search("cat")

In [None]:
## Async query
await vectorstore.asimilarity_search("cat")

In [None]:
vectorstore.similarity_search_with_score("cat")

In [None]:
### Retrievers
# LangChain VectorStore objects do not subclass Runnable, and so cannot immediately be integrated into LangChain Expression Language chains.

# LangChain Retrievers are Runnables, so they implement a standard set of methods 
# (e.g., synchronous and asynchronous invoke and batch operations) and are designed to be incorporated in LCEL chains.

# We can create a simple version of this ourselves, without subclassing Retriever. 
# If we choose what method we wish to use to retrieve documents, we can create a runnable easily. 
# Below we will build one around the similarity_search method:

In [None]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever=RunnableLambda(vectorstore.similarity_search).bind(k=1)    # top result 
retriever.batch(["cat","dog"])

In [None]:
# Vectorstores implement an as_retriever method that will generate a Retriever, specifically a VectorStoreRetriever. 
# These retrievers include specific search_type and search_kwargs attributes that identify what methods of 
# the underlying vector store to call, and how to parameterize them. 
# For instance, we can replicate the above with the following:

In [None]:
retriever=vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)
retriever.batch(["cat","dog"])

In [None]:
## RAG
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""
prompt = ChatPromptTemplate.from_messages([("human", message)])

rag_chain={"context":retriever,"question":RunnablePassthrough()}|prompt|llm

response=rag_chain.invoke("tell me about dogs")
print(response.content)