# RAG Chatbot with Conversational Memory

## Project Overview
This project implements a Retrieval Augmented Generation (RAG) system with:
- **LangChain** for orchestration
- **OpenRouter** for LLM (free API)
- **HuggingFace** for embeddings (free API)
- **ChromaDB** for vector storage
- **Streamlit** for UI
- **Conversational Memory** for chat history



In [1]:
!pip uninstall -y langchain langchain-community langchain-core langchain-text-splitters 2>/dev/null
!pip install --quiet langchain-community chromadb sentence-transformers streamlit pyngrok requests pypdf python-docx

print("✅ Packages installed!")

Found existing installation: langchain 1.2.3
Uninstalling langchain-1.2.3:
  Successfully uninstalled langchain-1.2.3
Found existing installation: langchain-core 1.2.6
Uninstalling langchain-core-1.2.6:
  Successfully uninstalled langchain-core-1.2.6
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.1/21.1 MB[0m [31m93.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m101.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.0/329.0 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

API Key Configuration

In [2]:
import os
from getpass import getpass

print("=" * 50)
print("API KEY CONFIGURATION")
print("=" * 50)
print("\nGet free API keys from:")
print("- OpenRouter: https://openrouter.ai/keys")
print("- HuggingFace: https://huggingface.co/settings/tokens")
print()

OPENROUTER_API_KEY = getpass("Enter your OpenRouter API Key: ")
HUGGINGFACE_API_KEY = getpass("Enter your HuggingFace API Key: ")

os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
os.environ["HUGGINGFACE_API_KEY"] = HUGGINGFACE_API_KEY

print("\n✅ API Keys configured successfully!")

API KEY CONFIGURATION

Get free API keys from:
- OpenRouter: https://openrouter.ai/keys
- HuggingFace: https://huggingface.co/settings/tokens

Enter your OpenRouter API Key: ··········
Enter your HuggingFace API Key: ··········

✅ API Keys configured successfully!


## 📁 Create Project Structure

We'll create all necessary files for our RAG application.

In [3]:
import os

project_dir = "/content/rag_chatbot"
os.makedirs(project_dir, exist_ok=True)
os.makedirs(f"{project_dir}/data", exist_ok=True)
os.makedirs(f"{project_dir}/vectorstore", exist_ok=True)

print(f"✅ Project directory created at: {project_dir}")

✅ Project directory created at: /content/rag_chatbot


## ⚙️ Configuration File

This file contains all configuration settings for the RAG system.

In [5]:
%%writefile /content/rag_chatbot/config.py
import os

class Config:
    OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
    OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"

    AVAILABLE_MODELS = {
        "Llama 3.1 8B": "meta-llama/llama-3.1-8b-instruct:free",
        "Gemma 3 27B": "google/gemma-3-27b-it:free",
        "Mistral 7B": "mistralai/mistral-7b-instruct:free",
    }
    DEFAULT_MODEL = "google/gemma-3-27b-it:free"

    EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
    VECTORSTORE_PATH = "/content/rag_chatbot/vectorstore"
    COLLECTION_NAME = "documents"

    CHUNK_SIZE = 500
    CHUNK_OVERLAP = 50
    TOP_K = 3
    MAX_HISTORY = 5

    SYSTEM_PROMPT = """You are a helpful assistant. Use the context below to answer questions.
If you don't know, say "I don't have that information."

Context:
{context}

Previous conversation:
{chat_history}

Question: {question}

Answer:"""

Writing /content/rag_chatbot/config.py


## 🧠 RAG Engine

This is the core RAG logic with:
- Custom OpenRouter LLM integration
- HuggingFace embeddings
- ChromaDB vector store
- Conversational memory

In [6]:
%%writefile /content/rag_chatbot/rag_engine.py
import os
import requests
from typing import List, Dict, Any, Optional

# Imports
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document

from config import Config


class SimpleLLM:
    """Simple LLM wrapper for OpenRouter"""

    def __init__(self, model_name: str = None):
        self.model = model_name or Config.DEFAULT_MODEL
        self.api_key = Config.OPENROUTER_API_KEY

    def generate(self, prompt: str) -> str:
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }

        data = {
            "model": self.model,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": 1024,
        }

        try:
            response = requests.post(
                f"{Config.OPENROUTER_BASE_URL}/chat/completions",
                headers=headers,
                json=data,
                timeout=30
            )
            response.raise_for_status()
            return response.json()["choices"][0]["message"]["content"]
        except Exception as e:
            return f"Error: {str(e)}"


class RAGEngine:
    """RAG Engine with memory"""

    def __init__(self):
        print("Loading embeddings...")
        self.embeddings = HuggingFaceEmbeddings(
            model_name=Config.EMBEDDING_MODEL,
            model_kwargs={"device": "cpu"}
        )

        print("Setting up LLM...")
        self.llm = SimpleLLM()

        print("Loading vector store...")
        self.vectorstore = Chroma(
            persist_directory=Config.VECTORSTORE_PATH,
            embedding_function=self.embeddings,
            collection_name=Config.COLLECTION_NAME
        )

        self.chat_history = []
        print("✅ RAG Engine ready!")

    def add_text(self, text: str, source: str = "user") -> int:
        """Add text to knowledge base"""
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=Config.CHUNK_SIZE,
            chunk_overlap=Config.CHUNK_OVERLAP
        )

        doc = Document(page_content=text, metadata={"source": source})
        chunks = splitter.split_documents([doc])

        self.vectorstore.add_documents(chunks)
        return len(chunks)

    def search(self, query: str) -> str:
        """Search knowledge base"""
        try:
            docs = self.vectorstore.similarity_search(query, k=Config.TOP_K)
            if docs:
                return "\n\n".join([d.page_content for d in docs])
            return ""
        except:
            return ""

    def get_history_str(self) -> str:
        """Format chat history"""
        if not self.chat_history:
            return "None"

        history = self.chat_history[-Config.MAX_HISTORY:]
        return "\n".join([f"Q: {h['q']}\nA: {h['a']}" for h in history])

    def chat(self, question: str) -> str:
        """Chat with RAG"""
        context = self.search(question)
        history = self.get_history_str()

        prompt = Config.SYSTEM_PROMPT.format(
            context=context or "No relevant information found.",
            chat_history=history,
            question=question
        )

        answer = self.llm.generate(prompt)

        self.chat_history.append({"q": question, "a": answer})

        return answer

    def clear_history(self):
        """Clear chat history"""
        self.chat_history = []

    def doc_count(self) -> int:
        """Get document count"""
        try:
            return self.vectorstore._collection.count()
        except:
            return 0

Writing /content/rag_chatbot/rag_engine.py


## 🎨 Streamlit UI Application

Creating a beautiful and functional chat interface with:
- Document upload
- Chat interface with memory
- Model selection
- Knowledge base management

In [7]:
%%writefile /content/rag_chatbot/app.py
import streamlit as st
import sys
import os

sys.path.insert(0, "/content/rag_chatbot")
os.chdir("/content/rag_chatbot")

from rag_engine import RAGEngine

# Page config
st.set_page_config(page_title="RAG Chatbot", page_icon="🤖", layout="wide")

# Initialize session state
if "engine" not in st.session_state:
    st.session_state.engine = None
if "messages" not in st.session_state:
    st.session_state.messages = []

# Title
st.title("🤖 RAG Chatbot with Memory")

# Sidebar
with st.sidebar:
    st.header("⚙️ Settings")

    if st.button("🚀 Initialize Engine", type="primary"):
        with st.spinner("Loading..."):
            st.session_state.engine = RAGEngine()
        st.success("Ready!")

    st.divider()

    # Add knowledge
    st.subheader("📚 Add Knowledge")
    text_input = st.text_area("Enter text:", height=150)

    if st.button("➕ Add to Knowledge Base"):
        if st.session_state.engine and text_input:
            chunks = st.session_state.engine.add_text(text_input)
            st.success(f"Added {chunks} chunks!")
        elif not st.session_state.engine:
            st.error("Initialize engine first!")

    st.divider()

    # Stats
    if st.session_state.engine:
        st.metric("📄 Documents", st.session_state.engine.doc_count())
        st.metric("💬 Messages", len(st.session_state.messages))

    # Clear buttons
    col1, col2 = st.columns(2)
    with col1:
        if st.button("🗑️ Clear Chat"):
            st.session_state.messages = []
            if st.session_state.engine:
                st.session_state.engine.clear_history()
            st.rerun()
    with col2:
        if st.button("🔄 Reset"):
            st.session_state.engine = None
            st.session_state.messages = []
            st.rerun()

# Main chat area
if not st.session_state.engine:
    st.info("👈 Click 'Initialize Engine' to start!")
    st.stop()

# Display chat messages
for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        st.write(msg["content"])

# Chat input
if prompt := st.chat_input("Ask a question..."):
    # Add user message
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.write(prompt)

    # Get response
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            response = st.session_state.engine.chat(prompt)
        st.write(response)

    st.session_state.messages.append({"role": "assistant", "content": response})

Writing /content/rag_chatbot/app.py


## 📝 Create Sample Documents

Let's add some sample documents to test the RAG system.

In [8]:
sample_text = """
Machine Learning is a type of artificial intelligence that allows computers to learn from data.

There are three main types of machine learning:
1. Supervised Learning - Uses labeled data to train models
2. Unsupervised Learning - Finds patterns in unlabeled data
3. Reinforcement Learning - Learns through trial and error

Python is the most popular language for machine learning because of libraries like:
- NumPy for numerical computing
- Pandas for data analysis
- Scikit-learn for machine learning algorithms
- TensorFlow and PyTorch for deep learning

RAG (Retrieval Augmented Generation) is a technique that combines:
- Information retrieval from a knowledge base
- Text generation using large language models
This helps reduce hallucinations and provide accurate answers.
"""

with open("/content/rag_chatbot/data/sample.txt", "w") as f:
    f.write(sample_text)

print("✅ Sample data created!")

✅ Sample data created!


Testing

In [9]:
import sys
import os

sys.path.insert(0, "/content/rag_chatbot")
os.chdir("/content/rag_chatbot")

from rag_engine import RAGEngine

print("Testing RAG Engine...")
print("="*50)

# Initialize
engine = RAGEngine()

# Add sample data
with open("/content/rag_chatbot/data/sample.txt", "r") as f:
    text = f.read()

chunks = engine.add_text(text, source="sample.txt")
print(f"\n📄 Added {chunks} chunks to knowledge base")
print(f"📚 Total documents: {engine.doc_count()}")

# Test questions
print("\n" + "="*50)
print("Testing Questions:")
print("="*50)

questions = [
    "What is machine learning?",
    "What are the types of machine learning?",
    "Tell me more about the first type you mentioned"
]

for q in questions:
    print(f"\n❓ {q}")
    answer = engine.chat(q)
    print(f"💬 {answer[:300]}...")

print("\n✅ Test complete!")



Testing RAG Engine...
Loading embeddings...


  self.embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Setting up LLM...
Loading vector store...


  self.vectorstore = Chroma(


✅ RAG Engine ready!

📄 Added 2 chunks to knowledge base
📚 Total documents: 2

Testing Questions:

❓ What is machine learning?
💬 
Machine Learning is a type of artificial intelligence that allows computers to learn from data.



...

❓ What are the types of machine learning?
💬 
There are three main types of machine learning:
1. Supervised Learning - Uses labeled data to train models
2. Unsupervised Learning - Finds patterns in unlabeled data  
3. Reinforcement Learning - Learns through trial and error
...

❓ Tell me more about the first type you mentioned
💬 
Supervised Learning uses labeled data to train models.



...

✅ Test complete!


Cloudflare integration and Public Link generation

In [10]:
# Install cloudflared
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb > /dev/null 2>&1

print("✅ Cloudflared installed!")

✅ Cloudflared installed!


In [11]:
import subprocess
import time
import re

# Kill any existing processes
!pkill -f streamlit 2>/dev/null
!pkill -f cloudflared 2>/dev/null
time.sleep(2)

# Start Streamlit in background
streamlit_process = subprocess.Popen([
    "streamlit", "run", "/content/rag_chatbot/app.py",
    "--server.port", "8501",
    "--server.headless", "true",
    "--server.address", "localhost"
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

print("⏳ Starting Streamlit server...")
time.sleep(5)

# Start Cloudflare tunnel
print("⏳ Creating Cloudflare tunnel...")

tunnel_process = subprocess.Popen(
    ["cloudflared", "tunnel", "--url", "http://localhost:8501"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True
)

# Wait and capture the URL
time.sleep(8)

# Read output to find URL
import select
url_found = False

for _ in range(20):
    if tunnel_process.stderr:
        line = tunnel_process.stderr.readline()
        if "trycloudflare.com" in line:
            match = re.search(r'https://[a-zA-Z0-9-]+\.trycloudflare\.com', line)
            if match:
                url = match.group(0)
                print("\n" + "="*60)
                print("🎉 APP IS RUNNING!")
                print("="*60)
                print(f"\n🌐 Your URL: {url}\n")
                print("="*60)
                print("\n📝 Instructions:")
                print("   1. Click the URL above to open the app")
                print("   2. Click '🚀 Initialize Engine' in sidebar")
                print("   3. Add knowledge or start chatting!")
                print("\n⚠️  Keep this cell running to maintain the server")
                print("="*60)
                url_found = True
                break
    time.sleep(1)

if not url_found:
    print("❌ Could not get URL. Trying alternative method...")
    !cloudflared tunnel --url http://localhost:8501 2>&1 | grep -o 'https://[a-zA-Z0-9-]*\.trycloudflare\.com' | head -1

^C
^C
⏳ Starting Streamlit server...
⏳ Creating Cloudflare tunnel...

🎉 APP IS RUNNING!

🌐 Your URL: https://secretariat-ones-reunion-stock.trycloudflare.com


📝 Instructions:
   1. Click the URL above to open the app
   2. Click '🚀 Initialize Engine' in sidebar
   3. Add knowledge or start chatting!

⚠️  Keep this cell running to maintain the server


Stopping after using

In [12]:

!pkill -f streamlit
!pkill -f cloudflared

print("✅ All processes stopped!")

✅ All processes stopped!
