In [1]:
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup  # Make sure to install: pip install beautifulsoup4
import gradio as gr 

async def scrape_chapter(url:str) -> str:
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.goto(url)

        # Get full HTML content of the page
        html = await page.content()

        # Take full-page screenshot
        await page.screenshot(path="chapter.png", full_page=True)
        await browser.close()

    # Extract and clean content using BeautifulSoup
    soup = BeautifulSoup(html, "html.parser")
    content_div = soup.find("div", {"id": "mw-content-text"})

    if not content_div:
        return "Error: Content not found."

    # Extract only <p> tags (actual story content)
    paragraphs = content_div.find_all("p")
    chapter_text = "\n\n".join(p.get_text(strip=True) for p in paragraphs)

    return chapter_text

url = "https://en.m.wikisource.org/wiki/The_Gates_of_Morning/Book_1/Chapter_1"
chapter = await scrape_chapter(url)
print(chapter[:500])  # Print first 500 characters


"The Gates of Morning"

CHAPTER I

THE CANOE BUILDER

DICK standing on a ledge of coral cast his eyes to the South.

Behind him the breakers of the outer sea thundered and the spindrift scattered on the wind; before him stretched an ocean calm as a lake, infinite, blue, and flown about by the fishing gulls—the lagoon of Karolin.

Clipped by its forty-mile ring of coral this great pond was a sea in itself, a sea of storm in heavy winds, a lake of azure, in light airs—and it was his—he who had lan


In [2]:
import google.generativeai as genai


genai.configure(api_key="AIzaSyDbLah7_XgAK2XAFC51Gq5X7QQzRmDLYx4")

model = genai.GenerativeModel(model_name = "gemini-1.5-flash")  # This works on free tier
style = gr.Dropdown(["Modern", "Poetic", "Shakespearean", "Gen Z Slang"], label="Choose Style")

def rewrite_text(text: str, style: str = "Modern") -> str:
    prompt = (
        f"Rewrite this in a {style.lower()} style while keeping headings/formatting:\n\n{text}"
    )
    response = model.generate_content(prompt)
    return response.text





In [3]:
def review_text(original, rewritten):
    prompt = f"""You're a professional editor.

Here is the original text:
{original}

Here is the rewritten version:
{rewritten}

Improve the rewritten version further for clarity, style, and readability.
Only output the improved version text. Do not explain or review anything."""
    
    respo = model.generate_content(prompt)
    return respo.text


In [4]:

import gradio as gr

def full_pipeline(text, style):
    rewritten = rewrite_text(text, style)
    reviewed = review_text(text, rewritten)
    return rewritten, reviewed

def launch_ui(original_text):
    with gr.Blocks() as demo:
        gr.Markdown("## AI-Powered Chapter Rewriter")

        style = gr.Dropdown(
            ["Modern", "Poetic", "Shakespearean", "Gen Z Slang" , "Academic" , "Simplified for Kids" , "Poetic"],
            label="Choose Rewrite Style",
            value="Modern"
        )

        original = gr.Textbox(value=original_text, label="Original Text", lines=15)
        rewritten = gr.Textbox(label="Rewritten", lines=15)
        reviewed = gr.Textbox(label="Reviewed", lines=15)

        rewrite_btn = gr.Button("Rewrite & Review")

        def process(text, style):
            rewritten, reviewed = full_pipeline(text, style)
            return rewritten, reviewed

        rewrite_btn.click(
            fn=process,
            inputs=[original, style],
            outputs=[rewritten, reviewed]
        )

    demo.launch()



In [5]:
import chromadb
from sentence_transformers import SentenceTransformer

chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="chapters")
mode = SentenceTransformer("all-MiniLM-L6-v2")

def store_versions(versions: list[str]):
    for i, text in enumerate(versions):
        embedding = mode.encode(text).tolist()
        collection.add(
            documents=[text],
            embeddings=[embedding],
            ids=[f"version_{i}"]
        )

def search_best_version(query: str) -> str:
    query_embedding = mode.encode(query).tolist()
    results = collection.query(query_embeddings=[query_embedding], n_results=1)
    return results['documents'][0][0]



In [6]:
url = "https://en.m.wikisource.org/wiki/The_Gates_of_Morning/Book_1/Chapter_1"
chapter = await scrape_chapter(url)
rewritten = rewrite_text(chapter)
reviewed = review_text(chapter,rewritten)
store_versions([rewritten, reviewed])
launch_ui(chapter)
best = search_best_version("best summary of the chapter")
print("Best version:", best)

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


Best version: The Gates of Morning

### Chapter I: The Canoe Builder

Dick, perched on a coral ledge, gazed south. Behind him, the ocean thundered, its spray windblown; before him, the Karolin lagoon stretched calm and vast, a blue mirror reflecting the sky, punctuated by fishing gulls.

Ringed by a forty-mile coral reef, this immense lagoon was a sea unto itself—stormy in high winds, tranquil in gentler breezes—and it was his. He, a newcomer of only yesterday.

The beach teemed with activity: women, children, and youths toiled in the sun, fishing, playing, or tending *paraka* patches.  They were his people. The beached canoes and empty canoe houses, once filled with war canoes, were his as well.

His gaze shifted from lagoon to canoe houses, a frown etching his brow. He turned from the calm lagoon to face the turbulent northern sea. Beyond the horizon, invisible yet present, lay Palm Tree Island—a paradise in his dreams, a devil's lair in reality.

Nearby, young Tari, son of Le Taioi 