In [7]:
import os
from dotenv import find_dotenv, load_dotenv

dotenv_path = find_dotenv()
load_dotenv(dotenv_path)

openai_api_key = os.getenv('OPENAI-API-KEY')
langchain_api_key = os.getenv('LANGCHAIN-API-KEY')

In [41]:
import bs4
from langchain import hub
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [11]:
llm = ChatOpenAI(model="gpt-4o-mini", api_key=openai_api_key)

In [32]:
loader = WebBaseLoader(
    web_paths=["https://www.fcm.dk/billetter/"]
) #Consider including argument that removes irrelevant text

docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
splits = text_splitter.split_documents(docs)

In [33]:
print(splits)  # Print the first few splits to inspect their content


[Document(metadata={'source': 'https://www.fcm.dk/billetter/', 'title': 'Billetter', 'language': 'da-DK'}, page_content='Billetter\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nFans\n\n\n\n\nSøg\n\n\n\n\n\n\n\n\n\nSøg efter:\n\n\n\n \n\n\n\n\n \n\n\n\n\n\nMenu\n \n\n\n\nVision 2025\nNyheder\nHerreholdet\n\nTruppen\nStaben\nKampprogram\nStilling\nKalender\n\n\nKvindeholdet\n\nTruppen\nStaben\nKampprogram\nStilling\n\n\nFans\n\nBilletter\nSæsonkort / BilletFlex\nUdebaneture\nUdebanefans\nEvents\nFCM-app\n\n\nFCM Shop\n\n\n\nKlubben\nDrengeakademiet\nPigeakademiet\nGuldminen\nFCM Klubsamarbejdet\nFCM Samfund\nSponsor\nKontakt'), Document(metadata={'source': 'https://www.fcm.dk/billetter/', 'title': 'Billetter', 'language': 'da-DK'}, page_content='Klubben\nDrengeakademiet\nPigeakademiet\nGuldminen\nFCM Klubsamarbejdet\nFCM Samfund\nSponsor\nKontakt\n\n\n\nEng

In [34]:
#Define vectorstore and retriever
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(api_key=openai_api_key))
retriever = vectorstore.as_retriever()

In [35]:
# 2. Incorporate the retriever into a question-answering chain.
system_prompt = (
    "Du er en assistent der skal svare på spørgsmål fra fans af fodbolklubben FC Midtjylland. "
    "Brug følgende kontekst til at besvare spørgsmålet. "
    "Hvis du ikke kender svaret, henvis til mailen billetsalg@fcm.dk. "
    "Brug maks tre sætninger og svar præcist. "
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [36]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [40]:
response = rag_chain.invoke({"input": "Hvordan refunderer jeg min billet?"})
response["answer"]

'Du kan desværre ikke få refunderet din billet, da vi ikke refunderer billetter i henhold til vores handelsbetingelser.'