#### Create langchain docs from folder path

In [None]:
from langchain.document_loaders import DirectoryLoader

directory = '/app/dir_path'

def load_docs(directory):
  loader = DirectoryLoader(directory)
  documents = loader.load()
  return documents

documents = load_docs(directory)
len(documents)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents,chunk_size=1024,chunk_overlap=30):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

docs = split_docs(documents)
len(docs)

#### Create custom langchain docs with metadata

In [None]:
from langchain.docstore.document import Document

d = {'how are you?':'I am fine',
     'what is your name?':'My name is smith'}

docs = []
for question,answer in d.items():
    doc =  Document(page_content=question, metadata={"answer": answer})
    docs.append(doc)

len(docs)

#### Create retriever using chromadb
* For more information visit [chroma langchain](https://python.langchain.com/docs/integrations/vectorstores/chroma)

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

openai_api_key = 'xxx'
openai_embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
vectorstore = Chroma.from_documents(documents=docs, embedding=openai_embedding)
retriever = vectorstore.as_retriever(search_kwargs={"k": 50})
# retriever = vectorstore.as_retriever(search_type="similarity_score_threshold",search_kwargs={"score_threshold": .5})
# retriever = vectorstore.as_retriever(search_type="mmr")

#### Create retriever using FAISS
* For more information visit [FAISS langchain](https://python.langchain.com/docs/integrations/vectorstores/FAISS)

In [None]:
from langchain.vectorstores import FAISS

openai_api_key = 'xxx'
openai_embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
vectorstore = FAISS.from_documents(documents=docs, embedding=openai_embedding)
retriever = vectorstore.as_retriever(search_kwargs={"k": 50})
# retriever = vectorstore.as_retriever(search_type="similarity_score_threshold",search_kwargs={"score_threshold": .5})
# retriever = vectorstore.as_retriever(search_type="mmr")

In [None]:
#save database
vectorstore.save_local('faiss_index')

#load database
openai_api_key = 'xxx'
openai_embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
vectorstore = FAISS.load_local("faiss_index", openai_embedding)

##### Faiss vectorstore to dataframe

In [None]:
import pandas as pd

def get_vectorstore_df(vectorstore):
    d = vectorstore.docstore._dict
    data_rows = []
    for chunk_id,content in d.items():
        s1 = {"chunk_id": chunk_id,
            "content": content.page_content.strip()}
        s = {**s1,**content.metadata}
        data_rows.append(s)
        
    return pd.DataFrame(data_rows)

##### Faiss vectorstore delete records

In [None]:
# Note chunk id you can get from dataframe
chunk_id_list = ['abcd','xxxx']
vectorstore.delete(ids=chunk_id_list)

##### Faiss vectorstore add new record docs

In [None]:
from langchain.docstore.document import Document

d = {'how are you?':'I am fine',
     'what is your name?':'My name is smith'}

docs = []
for question,answer in d.items():
    doc =  Document(page_content=question, metadata={"answer": answer})
    docs.append(doc)
    
vectorstore.add_documents(docs)

In [None]:
# once you have done update and delete you can 
# save your database
vectorstore.save_local('faiss_index')

#### Finding top k similar docs

In [None]:
query = 'how many awards did messi won?'
similar_docs = retriever.get_relevant_documents(query)
similar_docs

#### Using retrievers as chatbot

In [None]:
from langchain.vectorstores import FAISS

openai_api_key = 'xxx'
openai_embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
vectorstore = FAISS.from_documents(documents=docs, embedding=openai_embedding)
retriever = vectorstore.as_retriever(search_kwargs={"k": 50})

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory

openai_api_key = 'xxx'
llm = ChatOpenAI(model_name="gpt-3.5-turbo",temperature=0.0, openai_api_key=openai_api_key)
memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory,verbose=True)

qa.run({'question':'tell me about goglocal?'})

#### custom langchain response generation using retrievers

In [None]:
import os
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

openai_api_key = 'xxx'
model_name = 'gpt-3.5-turbo-16k'
chat_model = ChatOpenAI(temperature=0.0, model_name=model_name, openai_api_key=openai_api_key)

response_schemas = []
response_schemas.append(ResponseSchema(name="email_subject", description="subject of email based on context"))
response_schemas.append(ResponseSchema(name="email_body", description="body of email based on context"))

In [None]:
input_prompt_query = (
            "You are a marketing specialist at FoodForGood. Your responsibility is to respond to the sender's "
            "email using the company context provided below. Ensure that your reply is professional and "
            "incorporates the specified end template for concluding the email response. The sender's email, "
            "company context, and end template are provided, so be sure to utilize them to craft a professional "
            "subject and body for the email.It is crucial to precisely respond to each inquiry in the email by "
            "leveraging the company context provided below.\n"
        )

s = '##'*30
input_prompt_query += s
input_prompt_query += '\n\nSender\'s Email:\n{input_email}\n\n'
input_prompt_query += s
input_prompt_query += '\n\nEnd Template:\n{end_template}\n\n'
input_prompt_query += s
input_prompt_query += '\n\nCompany Context:\n{context}\n\n'
input_prompt_query += s
input_prompt_query += "\n\n{format_instructions}\n"

In [None]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

In [None]:
full_email = """Hi, I am parent from haximar school. can you tell me steps to signup for the program for my child?"""
similar_docs = retriever.get_relevant_documents(full_email)
similar_docs

In [None]:
# similar docs if metadata is given
similar_docs = ['Q.' + i.page_content.strip() + '\nA.' + i.metadata['answer'].strip() for i in similar_docs]
len(similar_docs)

# similar docs without metadata
similar_docs = [i.page_content.strip() for i in similar_docs]
len(similar_docs)

In [None]:
end_template = """Warmest regards,
Customer Service Team
support@foodforgood.ca"""

In [None]:
input_context = '\n'.join(similar_docs)
print(input_context)

In [None]:
input_variables = []
input_variables.append("input_email")
input_variables.append("context")
input_variables.append("end_template")
prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template(input_prompt_query)  
    ],
    input_variables=input_variables,
    partial_variables={"format_instructions": format_instructions}
)
      
input_prompt = prompt.format_prompt(input_email = full_email,
                                   context = input_context,
                                   end_template = end_template)
    
print(input_prompt.messages[0].content)

In [None]:
gpt_response = chat_model(input_prompt.to_messages())
parse_response = output_parser.parse(gpt_response.content)
parse_response