In [None]:
import os
from dotenv import load_dotenv

# Get the current working directory
current_dir = os.getcwd()
# Get the parent directory (one level above the current working directory)
parent_dir = os.path.dirname(current_dir)
# Construct the path to the .env file in the parent directory
dotenv_path = os.path.join(parent_dir, ".env")
# Load environment variables from the .env file
_ = load_dotenv(dotenv_path)

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")


In [None]:
# load document
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path="../documents/events.csv")
data = loader.load()
len(data)



In [None]:
# split documents (not needed for single csv file as it is already split into lines)
# from langchain.text_splitter import RecursiveCharacterTextSplitter

# text_splitter = RecursiveCharacterTextSplitter(
# # Chunk size is new linewith separator set
#     # chunk_size =
#     chunk_overlap  = 2,
#     separators="\n"
# )
# docs = text_splitter.split_documents(data)
# print(len(data))
# print(len(docs))


In [None]:
# embedding
import os
from dotenv import load_dotenv

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
dotenv_path = os.path.join(parent_dir, ".env")
_ = load_dotenv(dotenv_path)

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [None]:

# vector store
from langchain.vectorstores import FAISS

output_directory = "../documents/faiss_db"
!rm -rf ./documents/faiss_db  # remove old database files if any

vectordb = FAISS.from_documents(
data,
embeddings,
)

# save vector store
vectordb.save_local(output_directory)


In [59]:
# load vector store
events_db = FAISS.load_local(output_directory, embeddings)

In [None]:
# similarity search
query = "What photography events are happening in August?"
docs = events_db.similarity_search(query, k=5)
print(len(docs))
for doc in docs:
    print(doc.page_content[:50])

In [None]:
# maximal marginal relevance search - to achieve both relevance (semantic similarity) and diversity to the query
docs = events_db.max_marginal_relevance_search(query, k=5, fetch_k=20)
for doc in docs:
    print(doc.page_content[:50])


In [None]:
# vector store-backed retriever
retriever = events_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})
docs = retriever.get_relevant_documents("What photography events are happening in August?")

for doc in docs:
    print(doc.page_content[:50])

In [64]:
# RetrievalQA - question answering
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)
retriever = events_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})
# docs = retriever.get_relevant_documents("What photography exhibitions are happening in August?")

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever
)

result = qa_chain({"query": "What photography exhibitions are happening in August?"})
result["result"]



"There are several photography exhibitions happening in August in London, United Kingdom. Here are some of them:\n\n1. Title: Gallery Summer Show\n   Date: 07 Jul 2023 – 12 Aug 2023\n   Venue: Frith Street Gallery, Golden Square\n   Description: Frith Street Gallery presents Portrait, an exhibition of works by gallery artists considering the contemporary portrait through a range of perspectives and mediums.\n\n2. Title: Paul McCartney. Photographs\n   Date: 28 Jun 2023 – 01 Oct 2023\n   Venue: National Portrait Gallery\n   Description: An unprecedented exhibition, revealing extraordinary photographs taken by Paul McCartney. This show focuses on portraits captured by McCartney, using his own camera, between December 1963 and February 1964.\n\n3. Title: Evelyn Hofer\n   Date: 23 Jun 2023 – 24 Sep 2023\n   Venue: The Photographers' Gallery\n   Description: The first UK solo exhibition of German-American photographer Evelyn Hofer.\n\n4. Title: Beyond Brutal!\n   Date: 19 Jul 2023 – 23 Aug 