In [1]:
import os
import getpass
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import gradio as gr


  from .autonotebook import tqdm as notebook_tqdm


In [10]:
# Define paths to the topic folders
TOPIC_FOLDERS = {
    "Python Programming": "Data/Python",
    "Machine Learning_AI": "Data/AI_ML",
    "UAE Information": "Data/UAE"
}

In [3]:

def load_documents(folder_path):
    documents = []
    if not os.path.exists(folder_path):
        print(f"Warning: Folder not found: {folder_path}")
        return documents
        
    for filename in os.listdir(folder_path):
        if filename.startswith('.'): 
            continue
            
        file_path = os.path.join(folder_path, filename)
        try:
            if filename.endswith('.txt'):
                loader = TextLoader(file_path)
            elif filename.endswith('.pdf'):
                loader = PyPDFLoader(file_path)
            else:
                print(f"Unsupported file type: {file_path}")
                continue
            documents.extend(loader.load())
        except Exception as e:
            print(f"Error loading {file_path}: {str(e)}")
    return documents


In [4]:
def initialize_chains():
    # Initialize embeddings
    embeddings = OpenAIEmbeddings()
    
    # Initialize text splitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    
    # Initialize chains dictionary
    conversational_chains = {}
    
    # Process each topic
    for topic, folder in TOPIC_FOLDERS.items():
        print(f"Processing {topic}...")
        
        # Load and split documents
        documents = load_documents(folder)
        if not documents:
            print(f"No documents found for {topic}")
            continue
            
        split_docs = text_splitter.split_documents(documents)
        
        # Create vector store
        vectorstore = Chroma.from_documents(
            documents=split_docs,
            embedding=embeddings,
            collection_name=topic.lower().replace(" ", "_")
        )
        
        # Initialize retriever and memory
        retriever = vectorstore.as_retriever()
        memory = ConversationBufferMemory(
            memory_key="chat_history",
            return_messages=True
        )
        
        # Create chain
        chain = ConversationalRetrievalChain.from_llm(
            llm=ChatOpenAI(model_name="gpt-4", temperature=0.7),
            retriever=retriever,
            memory=memory,
            verbose=True
        )
        
        conversational_chains[topic] = chain
        print(f"{topic}: Chain initialized")
    
    return conversational_chains

In [5]:
def chat_function(user_input, selected_topic, history):
    if not selected_topic:
        return history, history
        
    chain = conversational_chains.get(selected_topic)
    if not chain:
        return history + [(user_input, "Please select a valid topic")], history
        
    try:
        response = chain({"question": user_input, "chat_history": history})
        history.append((user_input, response['answer']))
    except Exception as e:
        history.append((user_input, f"Error: {str(e)}"))
    
    return history, history

In [6]:
def create_gradio_interface(chains):
    with gr.Blocks() as demo:
        gr.Markdown("## Multi-Topic RAG-Enhanced Chatbot")
        
        selected_topic = gr.Dropdown(
            choices=list(chains.keys()),
            label="Choose a Topic",
            value=list(chains.keys())[0] if chains else None
        )
        
        chatbot = gr.Chatbot()
        state = gr.State([])
        
        with gr.Row():
            user_input = gr.Textbox(
                show_label=False,
                placeholder="Type your question here...",
                scale=4
            )
            submit_btn = gr.Button("Send", scale=1)
        
        submit_click = submit_btn.click(
            chat_function,
            inputs=[user_input, selected_topic, state],
            outputs=[chatbot, state]
        )
        
        user_input.submit(
            chat_function,
            inputs=[user_input, selected_topic, state],
            outputs=[chatbot, state]
        )
        
        submit_click.then(lambda: "", None, user_input)
        
    return demo

In [12]:
if __name__ == "__main__":
    # Check for API key
    if not os.getenv("OPENAI_API_KEY"):
        api_key = getpass.getpass("Enter your OpenAI API key: ")
        os.environ["OPENAI_API_KEY"] = api_key
    
    # Initialize chains
    print("Initializing chains...")
    conversational_chains = initialize_chains()
    
    if not conversational_chains:
        print("Error: No chains were initialized. Check your data folders and files.")
        exit(1)
    
    # Create and launch Gradio interface
    demo = create_gradio_interface(conversational_chains)
    demo.launch(share=True)

Initializing chains...


Ignoring wrong pointing object 7 0 (offset 0)
Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 338 0 (offset 0)


Processing Python Programming...


Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)


Python Programming: Chain initialized
Processing Machine Learning_AI...


Ignoring wrong pointing object 7 0 (offset 0)
Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 325 0 (offset 0)


Machine Learning_AI: Chain initialized
Processing UAE Information...
UAE Information: Chain initialized


Python(3816) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


* Running on local URL:  http://127.0.0.1:7861


Python(3823) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


* Running on public URL: https://875627278d9993e811.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
