In [None]:
import os
from dotenv import load_dotenv

# Get the current working directory
current_dir = os.getcwd()
# Get the parent directory (one level above the current working directory)
parent_dir = os.path.dirname(current_dir)
# Construct the path to the .env file in the parent directory
dotenv_path = os.path.join(parent_dir, ".env")
# Load environment variables from the .env file
_ = load_dotenv(dotenv_path)

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")


In [None]:
# load document
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.document_loaders import WebBaseLoader

# loader = CSVLoader(file_path="../documents/events.csv")
loader =  WebBaseLoader(["https://www.artrabbit.com/all-listings/united-kingdom/birmingham"])

data = loader.load()
len(data)
data[0]



In [None]:
# split documents (not needed for single csv file as it is already split into lines)
# from langchain.text_splitter import RecursiveCharacterTextSplitter

# text_splitter = RecursiveCharacterTextSplitter(
# # Chunk size is new linewith separator set
#     # chunk_size =
#     chunk_overlap  = 2,
#     separators="\n"
# )
# docs = text_splitter.split_documents(data)
# print(len(data))
# print(len(docs))


In [None]:
# embedding
import os
from dotenv import load_dotenv

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
dotenv_path = os.path.join(parent_dir, ".env")
_ = load_dotenv(dotenv_path)

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [None]:

# vector store
from langchain.vectorstores import FAISS

output_directory = "../documents/mock_db"
!rm -rf ./documents/faiss_db  # remove old database files if any

vectordb = FAISS.from_documents(
data,
embeddings,
)

# save vector store
vectordb.save_local(output_directory)


In [None]:
# load vector store
events_db = FAISS.load_local(output_directory, embeddings)

In [None]:
# similarity search
query = "I want to go to an art event, what's happening this month?"
docs = events_db.similarity_search(query, k=5)
print(len(docs))
for doc in docs:
    print(doc.page_content)

In [None]:
# maximal marginal relevance search - to achieve both relevance (semantic similarity) and diversity to the query
docs = events_db.max_marginal_relevance_search(query, k=5, fetch_k=20)
for doc in docs:
    print(doc.page_content[:50])


In [None]:
# vector store-backed retriever
retriever = events_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})
docs = retriever.get_relevant_documents("What photography events are happening in August?")

for doc in docs:
    print(doc.page_content[:50])

In [8]:
# RetrievalQA - question answering with prompt
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)
retriever = events_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})

# Build prompt
prompt_template = """You are a help assistant at www.artrabbit.com. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 
{context}
Question: {question}
Answer:"""
PROMPT = PromptTemplate.from_template(prompt_template)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

question = "I want to go to an art event, what's happening this month?"
result = qa_chain({"query": question})
result["result"]
# result["source_documents"][0]



'Here are some contemporary art exhibitions happening in Birmingham this month:\n\n1. Karl Blossfeldt: Art Forms in Nature at Midlands Arts Centre, running from July 8th to August 20th, 2023.\n2. Made at MAC: Digital Zines at Midlands Arts Centre, running from July 7th, 2023 to January 7th, 2024.\n3. Watershed at Midlands Arts Centre, running from June 29th to November 5th, 2023.\n4. RBSA Summer Show at RBSA Gallery, running from June 15th to July 22nd, 2023.\n5. Mary Williams ARBSA at RBSA Gallery, running from June 15th to July 22nd, 2023.\n6. Charles Weston ARBSA at RBSA Gallery, running from June 15th to July 22nd, 2023.\n7. Ed Isaacs RBSA at RBSA Gallery, running from June 15th to July 22nd, 2023.\n8. Melati Suryodarmo: Passionate Pilgrim at Ikon, running from May 17th to September 3rd, 2023.\n9. The 1970s - Artists Born In And Art Created In One Fantastic Decade at Colley Ison Gallery, running from May 3rd to August 12th, 2023.\n\nPlease note that these are just a few of the exhi

In [None]:
# RetrievalQA - question answering with refine chain type
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)
retriever = events_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="refine"
)

question = "What photography exhibitions are happening in August?"
result = qa_chain({"query": question})
result["result"]
# result["source_documents"][0]



In [9]:
# Conversational retrieval chain
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

retriever = events_db.as_retriever(search_kwargs={"k": 5})
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)

# build prompt
prompt_template = """You are a help assistant at www.artrabbit.com having a conversation with a person who is looking for something creative adn cultural to do.
Use the following pieces of context to provide a concise answer to the question.
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Chat History: {chat_history}
Question: {question}
Answer:"""
PROMPT = PromptTemplate.from_template(prompt_template)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# run chain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

question = "I want to go to an art event, what's happening this month?"
result = qa_chain({"question": question})
result['answer']



"Here are some contemporary art exhibitions happening in Birmingham this month:\n\n1. Karl Blossfeldt: Art Forms in Nature\n   Dates: 8 Jul 2023 – 20 Aug 2023\n   Location: Midlands Arts Centre\n\n2. Made at MAC: Digital Zines\n   Dates: 7 Jul 2023 – 7 Jan 2024\n   Location: Midlands Arts Centre\n\n3. Watershed\n   Dates: 29 Jun 2023 – 5 Nov 2023\n   Location: Midlands Arts Centre\n\n4. RBSA Summer Show\n   Dates: 15 Jun 2023 – 22 Jul 2023\n   Location: RBSA Gallery\n\n5. Mary Williams ARBSA\n   Dates: 15 Jun 2023 – 22 Jul 2023\n   Location: RBSA Gallery\n\n6. Charles Weston ARBSA\n   Dates: 15 Jun 2023 – 22 Jul 2023\n   Location: RBSA Gallery\n\n7. Ed Isaacs RBSA\n   Dates: 15 Jun 2023 – 22 Jul 2023\n   Location: RBSA Gallery\n\n8. Melati Suryodarmo: Passionate Pilgrim\n   Dates: 17 May 2023 – 3 Sep 2023\n   Location: Ikon\n\n9. The 1970s - Artists Born In And Art Created In One Fantastic Decade\n   Dates: 3 May 2023 – 12 Aug 2023\n   Location: Colley Ison Gallery\n\nPlease note that 