# ADHD RAG Workbench

Use this Google Colab notebook to ingest new documents, rebuild the FAISS vector store stored in the repo, and prototype retrieval-augmented chats against `https://kb.terpedia.com/v1/api/chat`.

**Workflow**
1. Clone or pull the repo into the Colab runtime.
2. Optionally upload new files; they are written into `data/uploads/`.
3. Re-run the ingestion helper to regenerate `data/rag/`.
4. Provide your Terpedia API key and issue chat prompts with fresh context.

After you are done, download or commit the updated `data/` directory from the Colab session back into this repository.

In [None]:
import os
import pathlib
import subprocess

REPO_URL = "https://github.com/Terpedia/ADHD.git"
REPO_PATH = pathlib.Path("/content/ADHD")

if not REPO_PATH.exists():
    print("Cloning repositoryâ€¦")
    subprocess.run(["git", "clone", REPO_URL, str(REPO_PATH)], check=True)
else:
    print("Repository already present at", REPO_PATH)

os.chdir(REPO_PATH)
print("Working directory:", os.getcwd())
subprocess.run(["git", "pull"], check=True)
print("HEAD:", subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip())

In [None]:
!pip install -q langchain langchain-community sentence-transformers faiss-cpu requests python-dotenv tqdm

In [None]:
from pathlib import Path

UPLOADS_DIR = Path("data/uploads")
RAG_DIR = Path("data/rag")

UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
RAG_DIR.mkdir(parents=True, exist_ok=True)
print("Uploads directory:", UPLOADS_DIR.resolve())
print("RAG directory:", RAG_DIR.resolve())

In [None]:
try:
    from google.colab import files
except ImportError:
    files = None

if files is None:
    print("google.colab.files not available in this environment.")
else:
    uploaded = files.upload()
    for name, data in uploaded.items():
        destination = UPLOADS_DIR / name
        with open(destination, "wb") as fh:
            fh.write(data)
        print("Saved", destination)
