In [None]:
import os
import glob
from dotenv import load_dotenv
from flask import Flask, request
from twilio.twiml.messaging_response import MessagingResponse
import threading
from pyngrok import ngrok
import time
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyMuPDFLoader, UnstructuredPowerPointLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [None]:


# Load environment variables
load_dotenv()
MODEL = "gpt-4o-mini"
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

# Initialize Flask app
app = Flask(__name__)

# Initialize conversation history dictionary to track conversations by user
user_memories = {}

# Load documents and prepare vector database
def setup_vectorstore():
    print("Loading documents and setting up vector database...")
    # Get all folders inside the main directory
    folders = glob.glob('C:/Users/COCOCE/Desktop/SRH')
    documents = []
    
    def get_loader(file_path):
        """Selects the appropriate loader based on file type."""
        ext = os.path.splitext(file_path)[-1].lower()
        if ext == ".txt":
            return TextLoader(file_path, encoding="utf-8")
        elif ext == ".pptx":
            return UnstructuredPowerPointLoader(file_path)
        elif ext == ".pdf":
            return PyMuPDFLoader(file_path)
        else:
            raise ValueError(f"Unsupported file type: {ext}")
    
    # Loop through each folder
    for folder in folders:
        doc_type = os.path.basename(folder)
        
        # Get all files inside the folder
        files = glob.glob(os.path.join(folder, '**/*'), recursive=True)
        
        for file_path in files:
            if os.path.isfile(file_path):  # Ensure it's a file, not a directory
                try:
                    loader = get_loader(file_path)
                    file_docs = loader.load()
                    # Add metadata and store documents
                    for doc in file_docs:
                        doc.metadata['doc_type'] = doc_type
                        documents.append(doc)
                    print(f"Loaded {len(file_docs)} pages from {file_path}")
                except Exception as e:
                    print(f"Error loading {file_path}: {e}")
    
    print(f"\nTotal documents loaded: {len(documents)}")
    
    # Split the documents into chunks
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(documents)
    
    # Create OpenAI embeddings
    embeddings = OpenAIEmbeddings()
    
    # Initialize the FAISS vector store
    faiss_vectorstore = FAISS.from_documents(chunks, embedding=embeddings)
    
    # Create the retriever
    retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 25})
    
    return retriever

print("Setting up the LangChain components...")
# Create retriever
retriever = setup_vectorstore()

# Create LLM
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# Function to get or create user-specific conversation chain
def get_conversation_chain(user_id):
    if user_id not in user_memories:
        # Set up new conversation memory for this user
        memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
        user_memories[user_id] = ConversationalRetrievalChain.from_llm(
            llm=llm, retriever=retriever, memory=memory
        )
    
    return user_memories[user_id]

# Chat function that will be called by the Twilio webhook
def chat(message, history):
    # Extract only the user_id from the history
    # This is a simple implementation - you might want to extract the actual user_id from Twilio
    user_id = str(id(history))
    
    # Get the conversation chain for this user
    conversation_chain = get_conversation_chain(user_id)
    
    # Get response from LangChain
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

@app.route("/bot", methods=["POST"])
def bot():
    # Get the incoming message and sender's phone number
    incoming_msg = request.values.get("Body", "").strip()
    sender = request.values.get("From", "")
    
    print(f"📩 Received message from {sender}: {incoming_msg}")
    
    # Initialize or retrieve conversation history for this user
    if sender not in user_memories:
        # This initializes the conversation chain for this user
        get_conversation_chain(sender)
    
    try:
        # Get the response from your conversation chain
        conversation_chain = get_conversation_chain(sender)
        result = conversation_chain.invoke({"question": incoming_msg})
        response = result["answer"]
    except Exception as e:
        print(f"❌ Error processing message: {e}")
        response = "Sorry, I encountered an error processing your message. Please try again."
    
    # Create a Twilio response
    twilio_response = MessagingResponse()
    twilio_response.message(response)
    
    print(f"✉️ Sent response: {response[:100]}..." if len(response) > 100 else f"✉️ Sent response: {response}")
    
    return str(twilio_response)

# Simple homepage route to check if server is live
@app.route("/", methods=["GET"])
def home():
    return "WhatsApp RAG Chatbot is running!"

# Function to run Flask server in the background
def run_flask():
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)

print("Starting the server...")
# Start Flask server in background thread
flask_thread = threading.Thread(target=run_flask)
flask_thread.daemon = True
flask_thread.start()
print("Flask server running in background at http://localhost:5000")

# Set up ngrok tunnel to expose Flask app for Twilio
try:
    public_url = ngrok.connect(5000).public_url
    print(f"Public URL: {public_url}")
    print(f"Twilio Webhook URL: {public_url}/bot")
    print("\nConfigure this URL in your Twilio WhatsApp Sandbox.")
    print("Keep this notebook or script running to maintain the connection.")
except Exception as e:
    print(f"Error setting up ngrok: {e}")
    print("You'll need to manually set up a way to expose your local server.")

# Keep the script running to maintain the connection
try:
    while True:
        time.sleep(1)  # Keep the script running
except KeyboardInterrupt:
    print("Server shutting down...")

Setting up the LangChain components...
Loading documents and setting up vector database...
Loaded 5 pages from C:/Users/COCOCE/Desktop/SRH\1.pdf
Loaded 2 pages from C:/Users/COCOCE/Desktop/SRH\10.pdf
Loaded 1 pages from C:/Users/COCOCE/Desktop/SRH\11.pdf
Loaded 2 pages from C:/Users/COCOCE/Desktop/SRH\12.pdf
Loaded 1 pages from C:/Users/COCOCE/Desktop/SRH\13.pdf
Loaded 1 pages from C:/Users/COCOCE/Desktop/SRH\14.pdf
Loaded 1 pages from C:/Users/COCOCE/Desktop/SRH\15.pdf
Loaded 1 pages from C:/Users/COCOCE/Desktop/SRH\16.pdf
Loaded 1 pages from C:/Users/COCOCE/Desktop/SRH\17.pdf
Loaded 2 pages from C:/Users/COCOCE/Desktop/SRH\18.pdf
Loaded 2 pages from C:/Users/COCOCE/Desktop/SRH\19.pdf
Loaded 4 pages from C:/Users/COCOCE/Desktop/SRH\2.pdf
Loaded 2 pages from C:/Users/COCOCE/Desktop/SRH\21.pdf
Loaded 3 pages from C:/Users/COCOCE/Desktop/SRH\23.pdf
Loaded 2 pages from C:/Users/COCOCE/Desktop/SRH\24.pdf
Loaded 1 pages from C:/Users/COCOCE/Desktop/SRH\25.pdf
Loaded 2 pages from C:/Users/CO

 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://10.93.144.101:5000
Press CTRL+C to quit


Public URL: https://fd94234b1745.ngrok-free.app
Twilio Webhook URL: https://fd94234b1745.ngrok-free.app/bot

Configure this URL in your Twilio WhatsApp Sandbox.
Keep this notebook or script running to maintain the connection.
📩 Received message from whatsapp:+250787168817: Muraho


  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
127.0.0.1 - - [09/Jul/2025 16:08:51] "POST /bot HTTP/1.1" 200 -


✉️ Sent response: Muraho neza! Nitwa Umujyanama w’Ubuzima bw’Imyororokere. Niteguye kugufasha. Mbwira ikibazo cyangwa ...
📩 Received message from whatsapp:+250787168817: Mbwira uko nakwirinda gusama


127.0.0.1 - - [09/Jul/2025 16:09:32] "POST /bot HTTP/1.1" 200 -


✉️ Sent response: Umukobwa ashobora kwirinda gusama mu buryo bukurikira: 

1. Kwifata (kudakora imibonano mpuzabitsina...
