<a href="https://colab.research.google.com/github/ShreejayShakya28/ASR-LLM-Pipeline/blob/feat%2FUse-Requirements.txt/RAG/notebook/RAG_Inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cell 0 : Import Drive

In [None]:
# Mount Google Drive â€” run this first every session
from google.colab import drive
drive.mount('/drive')

import os
os.makedirs('/drive/MyDrive/nepal_rag_index', exist_ok=True)
print("âœ… Drive mounted â€” index will persist at /drive/MyDrive/nepal_rag_index")

## Cell 2 â€” Clone Repo

In [None]:
import os

# â”€â”€ Config â€” change this line only â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
BRANCH = "feat/Use-Requirements.txt"        # "main" | "develop" | any branch name
# â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€

REPO_URL = "https://github.com/ShreejayShakya28/ASR-LLM-Pipeline.git"
REPO_DIR = "/content/ASR-LLM-Pipeline"
RAG_DIR  = f"{REPO_DIR}/RAG"

if os.path.exists(REPO_DIR):
    !git -C {REPO_DIR} fetch --all
    !git -C {REPO_DIR} checkout {BRANCH}
    !git -C {REPO_DIR} pull origin {BRANCH}
    print(f"âœ… Repo updated â€” branch: {BRANCH}")
else:
    !git clone -b {BRANCH} {REPO_URL} {REPO_DIR}
    print(f"âœ… Repo cloned â€” branch: {BRANCH}")

import sys
if RAG_DIR not in sys.path:
    sys.path.insert(0, RAG_DIR)
print(f"âœ… sys.path â†’ {RAG_DIR}")

In [None]:
# Cell 1 â€” Install
!pip install -q -r /content/ASR-LLM-Pipeline/RAG/requirements.txt
print("âœ… Dependencies installed")

## Cell 3 â€” Load Models
*Takes ~2 min on first run. Weights are cached in `/root/.cache` for the session.*

In [None]:
from rag.models import embedding_model, reranker, tokenizer, llm
print("\nâœ… All models ready â€” proceed to refresh.")

## Cell 4 â€” Daily Refresh
*Run once per session. Scrapes new articles, skips already-indexed URLs.*

> **No background scheduler** â€” Colab disconnects too fast. Just run this on connect.

In [None]:
from rag.pipeline import daily_refresh

daily_refresh()
# To override limits:
# daily_refresh(max_per_feed=30)

## Cell 5 â€” Ask Questions

In [None]:
from rag.inference import ask

ask("Recent Nepali flood causality?")

In [None]:
from rag.inference import ask

ask("who is running from Rupendehi-2 for elections")

In [None]:
ask("What is Prime Minister Karki doing?")

In [None]:
ask("What is the economic situation in Nepal?")

In [None]:
# Nepali (Devanagari) works too
ask("à¤¨à¥‡à¤ªà¤¾à¤²à¤®à¤¾ à¤•à¥‡ à¤­à¤‡à¤°à¤¹à¥‡à¤•à¥‹ à¤›?")

## Cell 6 â€” Tune Retrieval (optional)

| Symptom | Fix |
|---|---|
| No results | Lower `min_cosine` or raise `days_filter` |
| Off-topic answers | Raise `min_cosine` or lower `days_filter` |
| Missing older news | Raise `days_filter` |

In [None]:
# Widen the net â€” good for less-covered topics
ask("Nepal foreign policy", min_cosine=0.35, days_filter=60)

In [None]:
# Tighten for very recent, specific news
ask("Kathmandu flood", min_cosine=0.25, days_filter=7)

In [None]:
# More sources in context (default top_k=3)
ask("Nepal economy", top_k=5)

## Cell 7 â€” Feed Management (optional)
Check which feeds are alive, or add a one-off feed without editing `config.py`.

In [None]:
from rag.scraper import test_feeds
from rag.config  import ALL_CANDIDATE_FEEDS

working_feeds = test_feeds(ALL_CANDIDATE_FEEDS)
print(f"\n{len(working_feeds)} feeds active")

In [None]:
# Add a feed just for this session
extra_feeds = working_feeds + ["https://some-new-feed.com/rss"]
daily_refresh(feed_urls=extra_feeds)

## Cell 8 â€” Index Stats (optional)
Confirm how much is in the index.

In [None]:
import sqlite3, faiss
from rag.config import DB_PATH, INDEX_PATH

conn   = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM chunks")
n_chunks = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(DISTINCT url) FROM chunks")
n_articles = cursor.fetchone()[0]
cursor.execute("SELECT MIN(date), MAX(date) FROM chunks")
date_range = cursor.fetchone()
conn.close()

index = faiss.read_index(INDEX_PATH)

print(f"ðŸ“Š Index stats")
print(f"   Articles  : {n_articles}")
print(f"   Chunks    : {n_chunks}")
print(f"   Vectors   : {index.ntotal}")
print(f"   Date range: {date_range[0]} â†’ {date_range[1]}")