In [None]:

!pip install -U langchain langchain-community langchain-huggingface pypdf chromadb sentence-transformers langchain-text-splitters gradio

!apt-get update && apt-get install -y zstd
!curl -fsSL https://ollama.com/install.sh | sh

Hit:1 https://cli.github.com/packages stable InRelease
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:5 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:6 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:9 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:10 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:11 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading packag

In [None]:
import subprocess
import time
import requests

def start_ollama():
    try:
        # ÿßŸÑÿ™ÿ£ŸÉÿØ ÿ•ÿ∞ÿß ŸÉÿßŸÜ ÿßŸÑÿ≥Ÿäÿ±ŸÅÿ± ŸäÿπŸÖŸÑ ÿ®ÿßŸÑŸÅÿπŸÑ
        requests.get("http://localhost:11434", timeout=2)
        print("‚úÖ Ollama is already running.")
    except:
        print("üîÑ Starting Ollama Server...")
        subprocess.Popen(['/usr/local/bin/ollama', 'serve'])
        time.sleep(10)

    # ÿ™ÿ≠ŸÖŸäŸÑ ŸÖŸàÿØŸäŸÑ llama3
    print("ü¶ô Pulling Llama3 model (this may take a few minutes)...")
    subprocess.run(['/usr/local/bin/ollama', 'pull', 'llama3'], check=True)
    print("‚úÖ Model is ready!")

start_ollama()

üîÑ Starting Ollama Server...
ü¶ô Pulling Llama3 model (this may take a few minutes)...
‚úÖ Model is ready!


In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_message_histories import ChatMessageHistory

# ÿ•ÿπÿØÿßÿØ Embeddings (ŸäÿØÿπŸÖ ÿßŸÑÿπÿ±ÿ®Ÿäÿ© ŸàÿßŸÑÿ•ŸÜÿ¨ŸÑŸäÿ≤Ÿäÿ©)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
chat_history_db = ChatMessageHistory()
vectorstore = None
all_splits = []

def process_docs(files):
    global vectorstore, all_splits
    if not files: return "‚ö†Ô∏è Please upload PDFs."
    all_docs = []
    for file in files:
        loader = PyPDFLoader(file.name)
        all_docs.extend(loader.load())
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=150)
    all_splits = text_splitter.split_documents(all_docs)
    vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
    return f"‚úÖ Indexed {len(all_docs)} pages."

def generate_summary():
    global all_splits
    if not all_splits: return "Please upload docs first."
    summary_context = "\n".join([d.page_content for d in all_splits[:15]])
    prompt = ChatPromptTemplate.from_template("Provide an Executive Summary for:\n{context}")
    llm = Ollama(model="llama3", temperature=0.1)
    return (prompt | llm | StrOutputParser()).invoke({"context": summary_context})

def chat_logic(message, history):
    global vectorstore, chat_history_db
    if vectorstore is None: return "Upload PDFs first."
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    docs = retriever.invoke(message)
    context_text = "\n".join([f"[Page {d.metadata.get('page',0)+1}]: {d.page_content}" for d in docs])
    prompt = ChatPromptTemplate.from_messages([
        ("system", "Answer based on context. Cite sources."),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "Context:\n{context}\n\nQuestion: {question}")
    ])
    llm = Ollama(model="llama3", temperature=0.3)
    response = (prompt | llm | StrOutputParser()).invoke({
        "context": context_text, "question": message, "chat_history": chat_history_db.messages
    })
    chat_history_db.add_user_message(message)
    chat_history_db.add_ai_message(response)
    return response

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
import gradio as gr

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üöÄ Professional RAG Assistant")
    with gr.Row():
        with gr.Column(scale=1):
            file_uploader = gr.File(label="Upload PDFs", file_count="multiple")
            btn_analyze = gr.Button("üî® Analyze", variant="primary")
            btn_summary = gr.Button("üìã Executive Summary")
            status_box = gr.Textbox(label="Status")
            summary_display = gr.Markdown()
        with gr.Column(scale=2):
            chat_ui = gr.ChatInterface(fn=chat_logic)

    btn_analyze.click(process_docs, inputs=file_uploader, outputs=status_box)
    btn_summary.click(generate_summary, outputs=summary_display)

demo.launch(share=True, debug=True)

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://0fd8e676058063111c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  llm = Ollama(model="llama3", temperature=0.1)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://0fd8e676058063111c.gradio.live


