In [1]:
%reload_ext watermark
%watermark -a "Edgar Gulay @ Flit" -v -n -t -m -w

Author: Edgar Gulay @ Flit

Python implementation: CPython
Python version       : 3.11.4
IPython version      : 8.14.0

Compiler    : MSC v.1934 64 bit (AMD64)
OS          : Windows
Release     : 10
Machine     : AMD64
Processor   : Intel64 Family 6 Model 78 Stepping 3, GenuineIntel
CPU cores   : 4
Architecture: 64bit

Watermark: 2.4.3



In [4]:
# %pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai chromadb

Note: you may need to restart the kernel to use updated packages.


  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.13.0 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.9.0 which is incompatible.


In [2]:
import os
os.environ['OPENAI_API_KEY'] = ""

In [5]:
from langchain import hub
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Prepare RAG

In [7]:
# read the txt file
with open('data/Flit Product Knowledge.txt', 'r') as file:
    data = file.read().replace('\n', '')


In [54]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1080
)
splits = text_splitter.split_text(data)
vectorstore = Chroma.from_texts(texts=splits, embedding=OpenAIEmbeddings(), persist_directory='embeddings/Flit_Product_Knowledge')

In [18]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Indexing: Store vectors [embeddings]

In [56]:
# load the vectorstore from disk
vectorstore_ = Chroma(persist_directory='embeddings/Flit_Product_Knowledge')

In [58]:
retriever_ = vectorstore_.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [59]:
retrieved_docs = retriever_.invoke("What is Flit?")
len(retrieved_docs)

ValueError: You must provide an embedding function to compute embeddings.https://docs.trychroma.com/embeddings

# retrieve and generate

In [13]:
from langchain_core.prompts import PromptTemplate

In [29]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=1)

In [31]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, tell the customer to tap the call button on top right to ask in person, don't try to make up an answer.
keep the answer as concise as possible. When assisting customers, always be polite and professional. Listen actively to their concerns and provide helpful information. Use the same language as question. When mentioning products mention it's name and size.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("Soda ipi ni nzuri kwa watu wenye kisukari, na ina bei ndogo?")

'Krest Bitter Lemon 24 RB ni nzuri kwa watu wenye kisukari na ina bei ndogo.'

# Chat History with contextualization

In [37]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

qa_system_prompt = """
    Use the following pieces of context to answer the question at the end.\
    If you don't know the answer tell the customer to tap the call button on top right to ask in person. \
    don't try to make up an answer.\
    keep the answer as concise as possible. \
    When assisting customers, always be polite and professional. \
    Listen actively to their concerns and provide helpful information. \
    Use the same language as question. When mentioning products mention it's name and size.
{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)

In [38]:
chat_history = []

question = "Soda zipi ni nzuri kwa watu wanaopenda sukari kidogo na ladha ya matunda?"
, = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])

In [39]:
second_question = "Soda hizo zina bei gani?"
rag_chain.invoke({"question": second_question, "chat_history": chat_history})

AIMessage(content='Fanta Passion inauzwa kwa bei ya 9700 kwa ukubwa wa 250ML na 6800 kwa ukubwa wa 300ML.')

In [40]:
chat_history


[HumanMessage(content='Soda zipi ni nzuri kwa watu wanaopenda sukari kidogo na ladha ya matunda?'),
 AIMessage(content='Kwa watu wanaopenda sukari kidogo na ladha ya matunda, Fanta Passion ni chaguo zuri.')]