In [None]:
import os
from dotenv import load_dotenv

# Get the current working directory
current_dir = os.getcwd()
# Get the parent directory (one level above the current working directory)
parent_dir = os.path.dirname(current_dir)
# Construct the path to the .env file in the parent directory
dotenv_path = os.path.join(parent_dir, ".env")
# Load environment variables from the .env file
_ = load_dotenv(dotenv_path)

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")


In [None]:
# load document
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path="../documents/events.csv")
data = loader.load()
len(data)



In [None]:
# split documents (not needed for single csv file as it is already split into lines)
# from langchain.text_splitter import RecursiveCharacterTextSplitter

# text_splitter = RecursiveCharacterTextSplitter(
# # Chunk size is new linewith separator set
#     # chunk_size =
#     chunk_overlap  = 2,
#     separators="\n"
# )
# docs = text_splitter.split_documents(data)
# print(len(data))
# print(len(docs))


In [None]:
# embedding
import os
from dotenv import load_dotenv

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
dotenv_path = os.path.join(parent_dir, ".env")
_ = load_dotenv(dotenv_path)

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [None]:

# vector store
from langchain.vectorstores import FAISS

output_directory = "../documents/faiss_db"
!rm -rf ./documents/faiss_db  # remove old database files if any

vectordb = FAISS.from_documents(
data,
embeddings,
)

# save vector store
vectordb.save_local(output_directory)


In [None]:
# load vector store
events_db = FAISS.load_local(output_directory, embeddings)

In [None]:
# similarity search
query = "What photography events are happening in August?"
docs = events_db.similarity_search(query, k=5)
print(len(docs))
for doc in docs:
    print(doc.page_content[:50])

In [None]:
# maximal marginal relevance search - to achieve both relevance (semantic similarity) and diversity to the query
docs = events_db.max_marginal_relevance_search(query, k=5, fetch_k=20)
for doc in docs:
    print(doc.page_content[:50])


In [None]:
# vector store-backed retriever
retriever = events_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})
docs = retriever.get_relevant_documents("What photography events are happening in August?")

for doc in docs:
    print(doc.page_content[:50])

In [None]:
# RetrievalQA - question answering with prompt
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)
retriever = events_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})

# Build prompt
prompt_template = """You are a help assistant at www.artrabbit.com. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 
{context}
Question: {question}
Answer:"""
PROMPT = PromptTemplate.from_template(prompt_template)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

question = "What photography exhibitions are happening in August?"
result = qa_chain({"query": question})
result["result"]
# result["source_documents"][0]



In [23]:
# RetrievalQA - question answering with refine chain type
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)
retriever = events_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="refine"
)

question = "What photography exhibitions are happening in August?"
result = qa_chain({"query": question})
result["result"]
# result["source_documents"][0]



tags=None metadata=None vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x15f5d3690> search_type='mmr' search_kwargs={'k': 5}


In [None]:
# Conversational retrieval chain
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

retriever = events_db.as_retriever(search_kwargs={"k": 5})
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)

# build prompt
prompt_template = """You are a help assistant at www.artrabbit.com having a conversation with a person who is looking for something creative adn cultural to do.
Use the following pieces of context to provide a concise answer to the question.
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Chat History: {chat_history}
Question: {question}
Answer:"""
PROMPT = PromptTemplate.from_template(prompt_template)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# run chain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

question = "What photography exhibitions are happening in July?"
result = qa_chain({"question": question})
result['answer']

