## Importing libraries 

In [1]:
# needed packages
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# for openai access 
import openai
from langchain_community.llms import OpenAI 
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# work with URL laoder as example 
from langchain_community.document_loaders import UnstructuredURLLoader

# store data in vector database
from langchain_chroma import Chroma

# history of chat
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory

# for loading keys
from dotenv import load_dotenv
import os
from uuid_extensions import uuid7, uuid7str

# for gradio interface 
import gradio as gr

## Loading Enviroment Variables 

In [2]:
load_dotenv("keys.env")

# Access the secret key from environment variables
openai.api_key = os.getenv('OPENAI_API_KEY')

## Intializing LLM 

In [3]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

### Collect data from websites URLs 

In [4]:
greek_compus_url = 'https://www.thegreekcampus.com/'
MQR_url = 'https://www.mqrspaces.com/'

places_urls = [greek_compus_url, MQR_url]

## Creating a Vectorestore from URLs Data 

In [5]:
data=[]
for url in  places_urls : 
    loaders = UnstructuredURLLoader(urls=[url])
    data.append(loaders.load()[0])

In [6]:
# check the gathered data 
data

[Document(metadata={'source': 'https://www.thegreekcampus.com/'}, page_content='Where groundbreaking\n\nstartups and tech companies\n\nconnect, collaborate and create.\n\nAt A\xa0Glance\n\n12\n\nYEARS FOSTERING\n\nINNOVATION\n\nENTREPRENEURSHIP HUBS IN CAIRO\n\n250+\n\nSTARTUPS & CORPORATES\n\n2,800\n\nMEMBERS IN HIGH-IMPACT JOBS\n\n200\n\nINDUSTRY-LEADING EVENTS HOSTED\n\n25\n\nINVESTMENT ROUNDS\n\nA hub for homegrown innovation where businesses at all stages join together to develop and grow new ideas.\n\nThe essence of The GrEEK Campus that formulates its unique DNA lies in three essential elements: its connected support network, its collaborative spaces, and the real heart of the campus — its creative community.\n\nOur Community\n\nThis is the ideal space to grow your community.\n\nCommunity is why we\'re here and it\'s what we\'re most proud of.\n\nOur Journey So Far\n\n2013\n\nTHE COMMUNITY BUILT FROM THE GROUND UP\n\nOur Chairman & Founder, Mr. Ahmed El Alfi, kickstarts the tran

In [7]:
# instead of creating data just read it uncomment if you want to create it

# this part for creating v_db
vectorstore = Chroma.from_documents(documents=data, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

### RAG application with system prompt only ! 

In [9]:
from langchain.chains import RetrievalQA
# Define the RAG chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)

# Ask a question and get a response
query = "How many startups are in the greek compus?"
response = rag_chain.run(query)

print(response)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


There are 250+ startups at The GrEEK Campus.


## Creating History Aware Retriever ( RAG + chat history)

In [23]:
contextualize_q_system_prompt = (
    """Given a chat history and the latest user question
    which might reference context in the chat history,
    formulate a response which can be understood and clear
    without the chat history. Do NOT answer the question,
    """
)

# contextualize
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# add chat history
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

## System prompt retrieveral 

In [24]:
def read_system_prompt(file_path):
    with open(file_path, 'r') as file:
        prompt_content = file.read()

    context = "{context}"

    system_prompt = f'("""\n{prompt_content.strip()}\n"""\n"{context}")'

    return system_prompt


file_path = 'add your path'  
system_prompt = read_system_prompt(file_path)

FileNotFoundError: [Errno 2] No such file or directory: 'bot_system_prompts/ICN system prompt.txt'

## Q_A_chain Creation and invoking it in RAG Chain 

In [9]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [10]:
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

In [11]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

## Session History Retrieveral Function

In [12]:
store = {}
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

## Creating Conversational RAG chain 

In [13]:
# final conversational RAG chain
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
    max_tokens_limit=500,
    top_n=5
)


## Running Chat Function 

In [14]:
def Runchat(user_input, session_id):
    print("ICNBot: ", end="")
    # Use the session_id dynamically
    response = conversational_rag_chain.invoke({"input": user_input}, config={"configurable": {"session_id": session_id}})["answer"]
    return response, ""

## Creating User Interface 

In [18]:
def create_gradio_interface():
    with gr.Blocks() as demo:
        chatbot = gr.Chatbot(height=240)
        msg = gr.Textbox(label="Prompt")
        btn = gr.Button("Send")
        clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

        # Generate a unique session ID for each user session
        session_id = str(uuid7())  # Generates a unique session ID

        # Function to handle responses
        def respond(user_input, chatbot):
            response, _ = Runchat(user_input, session_id)  # Pass session_id to Runchat
            chatbot.append((user_input, response))
            return "", chatbot

        # Button click and message submit events
        btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
        msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])  # Press enter to submit

    gr.close_all()
    demo.queue().launch(share=True, server_port=8082)

# Run the interface creation function
if __name__ == "__main__":
    create_gradio_interface()

Running on local URL:  http://127.0.0.1:8082
Running on public URL: https://23ebbb0af5bcb90b15.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Parent run ebbdbcef-6883-41e9-bab0-35eacdf2cc43 not found for run 44065f3b-9afe-461d-865c-113b7d24281f. Treating as a root run.


ICNBot: 

In [16]:
gr.close_all()