<a href="https://colab.research.google.com/github/2bahaa/OmniRAG-General-Purpose-LLM-Chatbot-for-Any-File/blob/main/Ollama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# STEP 1: Install dependencies
!pip install -q langchain faiss-cpu pypdf fastapi uvicorn python-multipart ollama langchain-community
!pip install -q aiofiles nest_asyncio

# STEP 2: Install and start Ollama server
!curl -fsSL https://ollama.ai/install.sh | sh
!nohup ollama serve > ollama.log 2>&1 &
!sleep 10

# STEP 3: Pull LLaMA 3.2 and embedding model
!ollama pull llama3
!ollama pull mxbai-embed-large

# STEP 4: Upload PDFs
from google.colab import files
uploaded = files.upload()  # Upload one or more PDF files
pdf_paths = list(uploaded.keys())

# STEP 5: Load and process PDFs
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS

all_docs = []
for path in pdf_paths:
    loader = PyPDFLoader(path)
    all_docs.extend(loader.load())

splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(all_docs)

embedding = OllamaEmbeddings(model="mxbai-embed-large")
db = FAISS.from_documents(chunks, embedding)
retriever = db.as_retriever()

>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l


Saving Thesis Draft.pdf to Thesis Draft (1).pdf


  embedding = OllamaEmbeddings(model="mxbai-embed-large")


In [2]:
# STEP 6: FastAPI app with modern UI
from fastapi import FastAPI, Form
from fastapi.responses import HTMLResponse
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
import nest_asyncio
nest_asyncio.apply()

llm = Ollama(model="llama3")
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

app = FastAPI()

@app.get("/", response_class=HTMLResponse)
def chat_ui():
    return """
    <html>
    <head>
        <title>🌱 PDF RAG Chatbot</title>
        <style>
            body {
                font-family: Arial, sans-serif;
                background-color: #f4f4f4;
                padding: 40px;
                max-width: 800px;
                margin: auto;
            }
            h2 {
                color: #2c3e50;
            }
            input[type="text"] {
                width: 100%;
                padding: 12px;
                margin-top: 8px;
                margin-bottom: 12px;
                box-sizing: border-box;
                border: 2px solid #ccc;
                border-radius: 4px;
            }
            button {
                background-color: #4CAF50;
                color: white;
                padding: 10px 20px;
                border: none;
                border-radius: 4px;
                cursor: pointer;
            }
            button:hover {
                background-color: #45a049;
            }
        </style>
    </head>
    <body>
        <h2>🌾 Ask me anything about your PDFs!</h2>
        <form action="/ask" method="post">
            <input type="text" name="query" placeholder="Type your question here..." />
            <button type="submit">💬 Submit</button>
        </form>
    </body>
    </html>
    """

@app.post("/ask", response_class=HTMLResponse)
def ask(query: str = Form(...)):
    answer = qa_chain.run(query)
    return f"""
    <html>
    <head>
        <title>🌱 PDF RAG Chatbot - Answer</title>
        <style>
            body {{
                font-family: Arial, sans-serif;
                background-color: #f9f9f9;
                padding: 40px;
                max-width: 800px;
                margin: auto;
            }}
            p {{
                font-size: 18px;
                line-height: 1.6;
            }}
            a {{
                display: inline-block;
                margin-top: 20px;
                color: #3498db;
                text-decoration: none;
            }}
            a:hover {{
                text-decoration: underline;
            }}
        </style>
    </head>
    <body>
        <h2>📄 Answer</h2>
        <p><strong>Question:</strong> {query}</p>
        <p><strong>Answer:</strong> {answer}</p>
        <a href='/'>🔁 Ask another question</a>
    </body>
    </html>
    """

  llm = Ollama(model="llama3")


In [3]:
# STEP 7: Run FastAPI server & expose it via Colab proxy
!fuser -k 8000/tcp  # Free the port

import uvicorn
import threading

def start():
    uvicorn.run(app, host="0.0.0.0", port=8000)

threading.Thread(target=start).start()

# STEP 8: Get public URL (Colab-native)
from google.colab.output import eval_js
proxy_url = eval_js("google.colab.kernel.proxyPort(8000)")
print(f"🔗 Your chatbot is live at: {proxy_url}")

INFO:     Started server process [22044]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


🔗 Your chatbot is live at: https://8000-gpu-t4-s-1s96v42z7vfki-b.us-west1-1.prod.colab.dev


In [1]:
# STEP 1: Install dependencies
!pip install -q langchain faiss-cpu pypdf fastapi uvicorn python-multipart aiofiles nest_asyncio ollama langchain-community

# STEP 2: Install & start Ollama server
!curl -fsSL https://ollama.ai/install.sh | sh
!nohup ollama serve > ollama.log 2>&1 &
!sleep 10

# STEP 3: Pull LLaMA 3.2 and embedding model
!ollama pull llama3
!ollama pull mxbai-embed-large


>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l


In [2]:
import os
import nest_asyncio
from fastapi import FastAPI, UploadFile, Form, Request
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import Ollama
from langchain.chains import RetrievalQA

nest_asyncio.apply()

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

pdf_path = "uploaded.pdf"
qa_chain = None  # Will hold the RetrievalQA chain once a PDF is uploaded

@app.get("/", response_class=HTMLResponse)
def chat_ui():
    return """
    <html>
    <head>
        <title>Llama 3.2 using RAG</title>
        <style>
            body { font-family: Arial; background-color: #f4f4f4; padding: 40px; max-width: 700px; margin: auto; }
            h2 { color: #2c3e50; }
            form { margin-top: 30px; }
            input[type="file"], input[type="text"] {
                width: 100%; padding: 12px; margin: 10px 0; border: 2px solid #ccc; border-radius: 4px;
            }
            button {
                background-color: #4CAF50; color: white; padding: 10px 20px;
                border: none; border-radius: 4px; cursor: pointer;
            }
            button:hover { background-color: #45a049; }
        </style>
    </head>
    <body>
        <h2>📄 Llama 3.2 using RAG</h2>
        <form action="/upload" method="post" enctype="multipart/form-data">
            <label><strong>Upload a PDF to start:</strong></label><br/>
            <input type="file" name="pdffile" accept=".pdf" required />
            <button type="submit">📤 Upload</button>
        </form>
        <br/>
        <form action="/ask" method="post">
            <label><strong>Ask your question:</strong></label><br/>
            <input type="text" name="query" placeholder="Type your question here..." required />
            <button type="submit">💬 Ask</button>
        </form>
    </body>
    </html>
    """

@app.post("/upload", response_class=HTMLResponse)
async def upload_pdf(pdffile: UploadFile):
    global qa_chain
    with open(pdf_path, "wb") as f:
        f.write(await pdffile.read())

    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_documents(docs)
    embedding = OllamaEmbeddings(model="mxbai-embed-large")
    db = FAISS.from_documents(chunks, embedding)
    retriever = db.as_retriever()
    llm = Ollama(model="llama3")
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

    return """
    <html><body>
    <h3>✅ PDF uploaded and processed successfully.</h3>
    <a href='/'>⬅️ Back to ask questions</a>
    </body></html>
    """

@app.post("/ask", response_class=HTMLResponse)
async def ask_question(query: str = Form(...)):
    global qa_chain
    if qa_chain is None:
        return "<p style='color:red;'>⚠️ Please upload a PDF first.</p><a href='/'>Back</a>"
    answer = qa_chain.run(query)
    return f"""
    <html><body>
    <h3>📄 Llama 3.2 Answer</h3>
    <p><strong>Question:</strong> {query}</p>
    <p><strong>Answer:</strong> {answer}</p>
    <a href="/">🔁 Ask another or upload new PDF</a>
    </body></html>
    """


In [3]:
# Free the port
!fuser -k 8000/tcp

import uvicorn
import threading

def start():
    uvicorn.run(app, host="0.0.0.0", port=8000)

threading.Thread(target=start).start()

# Use Colab proxy
from google.colab.output import eval_js
proxy_url = eval_js("google.colab.kernel.proxyPort(8000)")
print(f"🔗 Your chatbot is live at: {proxy_url}")


INFO:     Started server process [31433]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


🔗 Your chatbot is live at: https://8000-gpu-t4-s-1s96v42z7vfki-b.us-west1-1.prod.colab.dev
