# Libraries and Model

In [None]:
import os
import google.generativeai as genai
import pandas as pd
import pprint
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro-latest",
    temperature=0
)

# Simple chatbot with memory example

In [None]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    system_prompt = (
        "You are a helpful assistant. "
        "Answer all questions to the best of your ability."
    )
    messages = [SystemMessage(content=system_prompt)] + state["messages"]
    response = model.invoke(messages)
    return {"messages": response}


# Define the node and edge
workflow.add_node("model", call_model)
workflow.add_edge(START, "model")

# Add simple in-memory checkpointer
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [None]:
app.invoke(
    {"messages": [HumanMessage(content="Translate to French: I love programming.")]},
    config={"configurable": {"thread_id": "1"}},
)

In [None]:
app.invoke(
    {"messages": [HumanMessage(content="What did I just ask you?")]},
    config={"configurable": {"thread_id": "1"}},
)

In [None]:
demo_ephemeral_chat_history = [
    HumanMessage(content="Hey there! I'm Nemo."),
    AIMessage(content="Hello!"),
    HumanMessage(content="How are you today?"),
    AIMessage(content="Fine thanks!"),
]

app.invoke(
    {
        "messages": demo_ephemeral_chat_history
        + [HumanMessage(content="What's my name?")]
    },
    config={"configurable": {"thread_id": "2"}},
)

# Chatbot with memory and summary of previous messages

In [None]:
from langchain_core.messages import HumanMessage, RemoveMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    system_prompt = (
        "You are a helpful assistant. "
        "Answer all questions to the best of your ability. "
        "The provided chat history includes a summary of the earlier conversation."
    )
    system_message = SystemMessage(content=system_prompt)
    message_history = state["messages"][:-1]  # exclude the most recent user input
    # Summarize the messages if the chat history reaches a certain size
    if len(message_history) >= 4:
        last_human_message = state["messages"][-1]
        # Invoke the model to generate conversation summary
        summary_prompt = (
            "Distill the above chat messages into a single summary message. "
            "Include as many specific details as you can."
        )
        summary_message = model.invoke(
            message_history + [HumanMessage(content=summary_prompt)]
        )

        # Delete messages that we no longer want to show up
        delete_messages = [RemoveMessage(id=m.id) for m in state["messages"]]
        # Re-add user message
        human_message = HumanMessage(content=last_human_message.content)
        # Call the model with summary & response
        response = model.invoke([system_message, summary_message, human_message])
        message_updates = [summary_message, human_message, response] + delete_messages
    else:
        message_updates = model.invoke([system_message] + state["messages"])

    return {"messages": message_updates}


# Define the node and edge
workflow.add_node("model", call_model)
workflow.add_edge(START, "model")

# Add simple in-memory checkpointer
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [None]:
app.invoke(
    {
        "messages": demo_ephemeral_chat_history
        + [HumanMessage("What did I say my name was?")]
    },
    config={"configurable": {"thread_id": "3"}},
)

# Chatbot with memory for the implemented basic RAG with Gemini

In [None]:
from FlagEmbedding import BGEM3FlagModel
from langchain_community.vectorstores import FAISS

model_fp16 = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)

class M3EmbeddingFP16:
    def embed_documents(self, texts):
        return model_fp16.encode(texts)['dense_vecs']
    
    def __call__(self, texts):
        return self.embed_documents(texts)
    
embd = M3EmbeddingFP16()

In [None]:
# Contains the documents without any data preprocessing steps
vectorstore = FAISS.load_local("recursive_augmented_faiss_index", embd, allow_dangerous_deserialization=True)
vectorstore, vectorstore.index.ntotal

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

In [None]:
from langchain_core.messages import HumanMessage, RemoveMessage, SystemMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define the workflow
workflow = StateGraph(state_schema=MessagesState)

# Post-processing for retrieved documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Function that retrieves relevant documents
def retrieve_documents(question):
    docs = retriever.get_relevant_documents(question)
    return format_docs(docs)

# Define the function that calls the model
def call_model(state: MessagesState):
    # Define the system prompt with placeholders for context and question
    system_prompt = (
        """Comportati come un assistente che risponde alle domande del cliente.   
        Rispondi alla domanda basandoti solo sui seguenti documenti: {context}.
        Il contesto fornito può contenere anche il riassunto dei precedenti messaggi di questa conversazione.

        Rispondi in modo conciso e chiaro, spiegando passo passo al cliente le azioni necessarie da effettuare.   
        Se possibile, dai indicazioni dettagliate al cliente, su come risolvere il problema o effettuare l'azione desiderata. 
        Evita troppe ripetizioni nella risposta fornita.
        Quando spieghi che cosa è o cosa significa un certo elemento richiesto, non parlarne come se fosse un problema.

        In caso di più domande rispondi solo a quelle inerenti alla documentazione e rimani a disposizione per altre domande sull'argomento,
        specificando, invece, che le altre domande non sono state trovate pertinenti in questo contesto.

        Domanda relativa al software Panthera: {question} 
        """
    )

    # Retrieve the message history and the most recent user input
    message_history = state["messages"][:-1]  # exclude the most recent user input
    last_human_message = state["messages"][-1]

    # Retrieve documents based on the last user question
    context = retrieve_documents(last_human_message.content)

    # Construct the prompt
    prompt = system_prompt.format(context=context, question=last_human_message.content)
    system_message = SystemMessage(content=prompt)

    # Summarize the messages if the chat history exceeds a certain size
    if len(message_history) >= 4:
        summary_prompt = (
            "Distilla i messaggi della chat sopra in un unico messaggio di riepilogo."
            "Includi il maggior numero possibile di dettagli specifici."
        )
        summary_message = model.invoke(
            message_history + [HumanMessage(content=summary_prompt)]
        )
        delete_messages = [RemoveMessage(id=m.id) for m in state["messages"]]
        human_message = HumanMessage(content=last_human_message.content)
        response = model.invoke([system_message, summary_message, human_message])
        message_updates = [summary_message, human_message, response] + delete_messages
    else:
        response = model.invoke([system_message] + state["messages"])
        message_updates = [response]

    return {"messages": message_updates}

# Add the node to the workflow
workflow.add_node("model", call_model)
workflow.add_edge(START, "model")

In [None]:
# Add in-memory checkpointer for saving state
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

# Define initial conversation state
initial_state = {"messages": []}  # Empty message state

# Add the user's message to the state
user_input = "Quando mi conviene gestire un articolo a PSO rispetto a pianificazione?"
initial_state["messages"].append({"role": "user", "content": user_input})

output = app.invoke(
    {"messages": initial_state["messages"][-1]},
    config={"configurable": {"thread_id": "1"}},
)

In [None]:
pprint.pprint(output['messages'])
print("\n")
print("Question:")
pprint.pprint(output["messages"][-2].content)
print("Answer:")
pprint.pprint(output['messages'][-1].content)

In [None]:
# Add the user's message to the state
user_input = "Cosa ti ho appena chiesto?"
initial_state["messages"].append({"role": "user", "content": user_input})

app.invoke(
    {"messages": initial_state["messages"][-1]},
    config={"configurable": {"thread_id": "1"}},
)

In [None]:
# Add the user's message to the state
user_input = "Spiegami meglio come funziona la gestione degli articoli in Panthera"
initial_state["messages"].append({"role": "user", "content": user_input})

riassunto = app.invoke(
    {"messages": initial_state["messages"][-1]},
    config={"configurable": {"thread_id": "1"}},
)

In [None]:
pprint.pprint(riassunto['messages'])
print("\n")
print("Resume of previous conversation:")
pprint.pprint(riassunto["messages"][-3].content)
print("Question:")
pprint.pprint(riassunto["messages"][-2].content)
print("Answer:")
pprint.pprint(riassunto['messages'][-1].content)