In [1]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

GOOGLE_API_KEY = "AIzaSyAew05mCoO2eCTRn0uPeBS1yZHwp_b8bi4"
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"


def fetch_text(url):
    try:
        res = requests.get(url, timeout=10)
        soup = BeautifulSoup(res.text, "html.parser")
        for tag in soup(["script", "style"]):
            tag.decompose()
        return soup.get_text(separator=" ", strip=True)[:6000]
    except Exception as e:
        return f"[Error fetching text from {url}]: {e}"


def find_internal_links(url, limit=3):
    try:
        base = "{0.scheme}://{0.netloc}".format(urlparse(url))
        res = requests.get(url)
        soup = BeautifulSoup(res.text, "html.parser")
        links = set()
        for a in soup.find_all("a", href=True):
            full_url = urljoin(base, a['href'])
            if base in full_url:
                links.add(full_url)
            if len(links) >= limit:
                break
        return list(links)
    except Exception:
        return []


def summarize(text):
    headers = { "Content-Type": "application/json" }
    data = {
        "contents": [{
            "parts": [{"text": f"Summarize this:\n{text}"}]
        }]
    }
    response = requests.post(f"{GEMINI_URL}?key={GOOGLE_API_KEY}", headers=headers, json=data)
    if response.status_code == 200:
        return response.json()['candidates'][0]['content']['parts'][0]['text']
    else:
        return f"[Gemini API Error]: {response.text}"


def summarize_website(url, filename="summary.txt"):
    with open(filename, "w", encoding="utf-8") as f:
        f.write(f"🔗 Main URL: {url}\n")
        print(f"Fetching main page content from {url}...")
        main_text = fetch_text(url)
        main_summary = summarize(main_text)
        f.write("\n📄 Summary of Main Page:\n")
        f.write(main_summary + "\n\n")

        internal_links = find_internal_links(url)
        for i, link in enumerate(internal_links, 1):
            print(f"Fetching internal link {i}: {link}...")
            link_text = fetch_text(link)
            link_summary = summarize(link_text)
            f.write(f"\n🔗 Internal Link {i}: {link}\n")
            f.write(f"\n📄 Summary of Link {i}:\n")
            f.write(link_summary + "\n\n")

    print(f"\n✅ All summaries saved to '{filename}'")


if __name__ == "__main__":
    user_url = input("Enter a website URL: ").strip()
    summarize_website(user_url)

Fetching main page content from https://www.gndec.ac.in...
Fetching internal link 1: https://www.gndec.ac.in#main-content...
Fetching internal link 2: https://www.gndec.ac.in/?q=node/2...
Fetching internal link 3: https://www.gndec.ac.in/...

✅ All summaries saved to 'summary.txt'


In [2]:
import time
import requests
import textwrap

GOOGLE_API_KEY = "AIzaSyAew05mCoO2eCTRn0uPeBS1yZHwp_b8bi4"
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"

def typewriter(text, delay=0.01):
    for char in text:
        print(char, end='', flush=True)
        time.sleep(delay)

def rephrase_text(text):
    headers = {"Content-Type": "application/json"}
    payload = {
        "contents": [{
            "parts": [{"text": f"Rephrase the following summary in clear paragraph format:\n{text}"}]
        }]
    }
    response = requests.post(f"{GEMINI_URL}?key={GOOGLE_API_KEY}", headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()['candidates'][0]['content']['parts'][0]['text']
    else:
        return f"[Gemini API Error]: {response.text}"

def format_to_paragraphs(text, width=100):
    # Split by double newlines or try sentence ends
    paragraphs = text.split('\n\n')
    if len(paragraphs) == 1:
        paragraphs = textwrap.wrap(text, width)
    return "\n\n".join(textwrap.fill(p.strip(), width) for p in paragraphs if p.strip())

def process_summary(input_file="summary.txt", output_file="rephrased_summary.txt"):
    try:
        with open(input_file, "r", encoding="utf-8") as f:
            original_text = f.read()

        print("\n📄 Original Summary (Formatted as Paragraphs):\n")
        formatted_original = format_to_paragraphs(original_text)
        print(formatted_original)

        print("\n🎯 Rephrasing the summary...\n")
        rephrased = rephrase_text(formatted_original)

        formatted_rephrased = format_to_paragraphs(rephrased)

        print("📝 Rephrased Summary (Typewriter Style):\n")
        typewriter(formatted_rephrased)

        with open(output_file, "w", encoding="utf-8") as f:
            f.write(formatted_rephrased)

        print(f"\n\n✅ Rephrased summary saved to '{output_file}'")

    except FileNotFoundError:
        print(f"❌ Error: '{input_file}' not found.")
    except Exception as e:
        print(f"❌ Unexpected error: {e}")


if __name__ == "__main__":
    process_summary()


📄 Original Summary (Formatted as Paragraphs):

🔗 Main URL: https://www.gndec.ac.in

📄 Summary of Main Page: Guru Nanak Dev Engineering College (GNDEC), Ludhiana is an engineering
college offering a variety of undergraduate and postgraduate programs across departments like Civil,
Electrical, Mechanical, Computer Science, and Architecture. The college is affiliated with PTU, has
NBA and ISO certifications, and NAAC accreditation. It emphasizes research, consultancy, and quality
work. The college has various facilities including hostels, a computer center, a library, and sports
facilities. The college also has a strong alumni network with graduates holding prominent positions
in various industries and government sectors worldwide, including engineers, directors, scientists,
and civil servants.

🔗 Internal Link 1: https://www.gndec.ac.in#main-content

📄 Summary of Link 1: Guru Nanak Dev Engineering College (GNDEC), Ludhiana, is an engineering
institution offering a range of undergraduate 