In [0]:
!pip install -qU langchain langchain-community langchain-huggingface langchain-chroma faiss-cpu pypdf

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
petastorm 0.12.1 requires pyspark>=2.1.0, which is not installed.
databricks-feature-engineering 0.2.1 requires pyspark<4,>=3.1.2, which is not installed.
ydata-profiling 4.2.0 requires numpy<1.24,>=1.16.0, but you have numpy 1.26.4 which is incompatible.
ydata-profiling 4.2.0 requires pydantic<2,>=1.8.1, but you have pydantic 2.10.3 which is incompatible.
tensorflow-cpu 2.14.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.29.1 which is incompatible.
tensorboard-plugin-profile 2.14.0 requires protobuf<5.0.0dev,>=3.19.6, but you have protobuf 5.29.1 which is incompatible.
numba 0.56.4 requires numpy<1.24,>=1.18, but you have numpy 1.26.

In [0]:
dbutils.library.restartPython()

In [0]:
from typing import List
from getpass import getpass
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
    PyPDFLoader,
)
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint, ChatHuggingFace
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

import faiss
import os

In [0]:
HUGGINGFACEHUB_API_TOKEN = getpass()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

 [REDACTED]

In [0]:
def load_data(file_loc: str) -> List[Document]:
    try:
        """Load data into a list of Documents
        Args:
            file_type: the type of file to load
        Returns:    list of Documents
        """
        loader = PyPDFLoader(file_loc)
        data = loader.load()

        return data
    
    except Exception as e:
        raise SystemExit(f"Exiting due to the error: {str(e)}")

In [0]:
def prepare_chunk(data: list) -> List[Document]:
    """Prepare Chunks
    Args:
        data: list of Documents
    Returns: list of Chunks
    """
    try:
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=100, chunk_overlap=10, separators=["\n\n", "\n", " ", ""]
        )
        documents = text_splitter.split_documents(data)

        return documents, len(documents)

    except Exception as e:
        raise SystemExit(f"Exiting due to the error: {str(e)}")

In [0]:
def create_vstores(documents: list, faiss, docstore):
    try:
        embeddings = HuggingFaceEmbeddings()

        print("****Loading to Vectorstore, Please wait...****")
        print(f"Adding {len(documents)} to FAISS Local")

        embeddings = HuggingFaceEmbeddings()
        embedded_docs = embeddings.embed_documents([doc.page_content for doc in documents])
        index = faiss.IndexFlatL2(len(embedded_docs[0]))
        vector_stores = FAISS(
                embedding_function=embeddings,
                index=index,
                docstore= InMemoryDocstore(),
                index_to_docstore_id={},
                )
        vector_stores.add_documents(documents=documents, ids=[i for i in range (1, len(documents)+1)])
        print("****Loading to Vectorstore, Done!****")

        vector_stores.save_local("online_guide_index")

        return vector_stores

    except Exception as e:
        raise SystemExit(f"Exiting due to the error: {str(e)}")

In [0]:
def run_llm(model_name: str, user_question: str, session_id: str, vstore_connection):
    try:
        print(f"****Setting up {model_name}, Please wait...****")
        model = HuggingFaceEndpoint(
                        repo_id=model_name,
                        task="text-generation",
                        max_new_tokens=512,
                        top_k=10,
                        top_p=0.95,
                        temperature=0.01,
                        do_sample=False,
                        repetition_penalty=1.03,
                    )
        
        print("****Connecting to VectorStore****")
        retriever = vstore_connection.as_retriever()

        print("****Setting up RAG Prompt****")
        context_system_prompt = """Answer any user questions, If you don't know the answer, just say that you don't know. 
                    Use three sentences maximum and keep the answer concise."""
                                
        context_prompt = ChatPromptTemplate.from_messages(
            [
                ("system",context_system_prompt),
                MessagesPlaceholder(variable_name="chat_history"),
                ("human", "{input}"),
            ]
        )

        context_retriever = create_history_aware_retriever(llm=model, retriever=retriever, prompt=context_prompt)

        conversation_system_prompt = """Answer any user questions based solely on the context below:<context>\n\n{context}</context>
                    If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise."""
        
        conversation_prompt = ChatPromptTemplate.from_messages(
            [
                ("system",conversation_system_prompt),
                MessagesPlaceholder(variable_name="chat_history"),
                ("human", "{input}"),
            ]
        )
        
        print("****Building the Chains with Chat History****")
        context_chain = create_stuff_documents_chain(model,conversation_prompt)

        rag_chain = create_retrieval_chain(context_retriever,context_chain)

        store = {}

        def get_session_history(session_id: str) -> BaseChatMessageHistory:
            if session_id not in store:
                store[session_id] = ChatMessageHistory()
            return store[session_id]


        conversational_rag_chain = RunnableWithMessageHistory(
            rag_chain,
            get_session_history,
            input_messages_key="input",
            history_messages_key="chat_history",
            output_messages_key="answer",
        )
        
        print("****Invoking the Chain with User Question****")
        return conversational_rag_chain.invoke(
            {"input": user_question},
            config={"configurable": {"session_id": session_id}},
        )

    except Exception as e:
        raise SystemExit(f"Exiting due to the error: {str(e)}")

In [0]:
# Function calls to create the vector store
data = load_data("/dbfs/mnt/data/input_data/How_to_Buy_Products_Online_1.pdf")
documents, length = prepare_chunk(data)
vc = create_vstores(documents, faiss, docstore=InMemoryDocstore())

****Loading to Vectorstore, Please wait...****
Adding 2 to FAISS Local
****Loading to Vectorstore, Done!****


In [0]:
data = run_llm("microsoft/Phi-3-mini-4k-instruct", "How to create an account, where I can buy products?", "uid1", vc)

# Extract the latest question
latest_question = data['input']

# Extract the latest answer
conversation = data['answer']
latest_answer = conversation.strip().split("Assistant:")[-1].strip()

print("Question:", latest_question, end='\n')
print("Answer:", latest_answer)

****Setting up microsoft/Phi-3-mini-4k-instruct, Please wait...****
****Connecting to VectorStore****
****Setting up RAG Prompt****
****Building the Chains with Chat History****
****Invoking the Chain with User Question****
Question: How to create an account, where I can buy products?
Answer: To create an account on an e-commerce website, follow these steps:

1. Choose your preferred platform (e.g., Amazon, eBay, Walmart).
2. Sign up by providing your email address and creating a secure password.
3. Enter personal information such as your name and shipping address accurately.
4. You can now browse products, add them to your cart, and proceed to checkout to make purchases.
