In [27]:
import os

# Ensure the API key is set in your environment
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
    raise ValueError("API key not set. Please set the OPENAI_API_KEY environment variable.")

# Importing necessary libraries from the langchain package
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

# Load the webpage content using a WebBaseLoader with the specified URL
loader = WebBaseLoader("https://www.srh-hochschule-heidelberg.de/en/master/applied-data-science-and-analytics/")
docs = loader.load()

# Function to split the loaded document into smaller text chunks for processing
def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

# Calling the function with the page content of the first document loaded
text_chunks = get_text_chunks(docs[0].page_content)

# Function to create a vector store using FAISS from the text chunks with embeddings from OpenAI
def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings(api_key=api_key)
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

# Create the vector store from the text chunks
vectorstore = get_vectorstore(text_chunks)

# Function to set up the conversational chain using the vector store, a chat model, and memory management
def get_conversation_chain(vectorstore):
    llm = ChatOpenAI(api_key=api_key)
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain

# Initialize the conversation chain
conversation_chain = get_conversation_chain(vectorstore)

# Function to pass user questions to the conversation chain and extract responses
def ask_question(conversation_chain, question):
    response = conversation_chain.__call__(question)
    return response['answer']  # Extract only the 'answer' part of the response

# Main interaction loop for user input and response handling
if __name__ == "__main__":
    print("Welcome to the Applied Data Science & Analytics Chatbot. Ask me anything!")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit']:
            print("Goodbye!")
            break
        response = ask_question(conversation_chain, user_input)
        print("Bot:", response)  # Display the response from the bot


Created a chunk of size 1080, which is longer than the specified 1000


Welcome to the Applied Data Science & Analytics Chatbot. Ask me anything!
You: hi
Bot: Hello! How can I assist you today?
You: what is the fees?
Bot: The tuition fees for students from the European Economic Area are 790 EUR per month full-time, with a one-time registration fee of 750 EUR. For applicants from Non-EEA countries without a permanent residence permit, the tuition fee is 6,450 EUR per semester, along with a one-time registration fee of 1,000 EUR. They must also pay the tuition fees for one semester in advance.
You: give me contact details
Bot: Sure, you can contact SRH Hochschule Heidelberg through the following email address: studyinheidelberg@srh.de or by phone at +49 (0)6221-6799-799. Their address is Ludwig-Guttmann-Straße 6, 69123 Heidelberg.
You: how many semesters?
Bot: The program mentioned in the context typically consists of 4 semesters.
You: which are the subjects?
Bot: The Master's program in Applied Data Science and Analytics includes subjects such as Data Visua