In [1]:
# utils/scraping.py

from playwright.sync_api import sync_playwright
import os

def scrape_and_screenshot(url, output_folder="screenshots", html_save_path="data/chapter1.html"):
    # Ensure folders exist
    os.makedirs(output_folder, exist_ok=True)
    os.makedirs(os.path.dirname(html_save_path), exist_ok=True)

    with sync_playwright() as p:
        browser = p.chromium.launch()
        page = browser.new_page()
        page.goto(url)

        # Save screenshot
        screenshot_path = os.path.join(output_folder, "chapter1_screenshot.png")
        page.screenshot(path=screenshot_path, full_page=True)

        # Save HTML content
        with open(html_save_path, "w", encoding="utf-8") as f:
            f.write(page.content())

        browser.close()

    print(f"[✓] Screenshot saved to: {screenshot_path}")
    print(f"[✓] HTML content saved to: {html_save_path}")


In [9]:
from gui.reviewer import review_and_edit

sample_text = "This is AI-spun content. Please review before finalizing."
review_and_edit(sample_text, "output/test_final.txt")


✅ Final version saved to output/test_final.txt


In [10]:
import os
from pathlib import Path

cache_path = Path.home() / ".cache" / "chroma"
print("🧭 ChromaDB cache path:", cache_path)

if cache_path.exists():
    print("\n📁 Contents:")
    for item in cache_path.glob("**/*"):
        print("-", item)
else:
    print("⚠️ ChromaDB cache directory not found.")


🧭 ChromaDB cache path: C:\Users\jarif\.cache\chroma

📁 Contents:
- C:\Users\jarif\.cache\chroma\onnx_models
- C:\Users\jarif\.cache\chroma\telemetry_user_id
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx\config.json
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx\model.onnx
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx\special_tokens_map.json
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx\tokenizer.json
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx\tokenizer_config.json
- C:\Users\jarif\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx\vocab.txt
