In [1]:
pip install beautifulsoup4 requests google-generativeai

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install --upgrade google-generativeai


Note: you may need to restart the kernel to use updated packages.


In [4]:
import requests
from bs4 import BeautifulSoup
import urllib.parse
import json
import time
import sys

API_KEY = "AIzaSyBUgw2uHBXz98qfeKTVGFaC5U2SFhz-50s"
MODEL = "gemini-2.0-flash"
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={API_KEY}"

def extract_links_and_text(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        base_url = urllib.parse.urlparse(url).netloc
        internal_links = set()
        external_links = set()
        all_text = []

        for a_tag in soup.find_all('a', href=True):
            href = urllib.parse.urljoin(url, a_tag['href'])
            parsed_href = urllib.parse.urlparse(href)
            if parsed_href.netloc == base_url:
                internal_links.add(href)
            else:
                external_links.add(href)

        for tag in soup.find_all(['p', 'h1', 'h2', 'h3', 'li']):
            text = tag.get_text(strip=True)
            if text:
                all_text.append(text)

        return list(internal_links), list(external_links), '\n'.join(all_text)

    except Exception as e:
        return [], [], f"Error while fetching or parsing the webpage: {str(e)}"

def call_gemini(prompt_text):
    headers = {
        "Content-Type": "application/json"
    }

    data = {
        "contents": [
            {
                "parts": [{"text": prompt_text}]
            }
        ]
    }

    response = requests.post(GEMINI_API_URL, headers=headers, data=json.dumps(data))

    if response.status_code == 200:
        result = response.json()
        return result["candidates"][0]["content"]["parts"][0]["text"]
    else:
        return f"❌ Error from Gemini API: {response.status_code} - {response.text}"

def typewriter_effect(text, delay=0.02):
    for char in text:
        print(char, end='', flush=True)
        time.sleep(delay)
    print("\n")  # final newline

def main():
    url = input("Enter the website URL: ").strip()

    internal_links, external_links, page_text = extract_links_and_text(url)

    if page_text.startswith("Error"):
        print(page_text)
        return

    print(f"\n✅ Extracted {len(internal_links)} internal links and {len(external_links)} external links.")
    print("⏳ Summarizing content...")

    summary_prompt = f"Summarize the following webpage content:\n\n{page_text[:12000]}"
    summary = call_gemini(summary_prompt)

    with open("website_summary.txt", "w", encoding="utf-8") as f:
        f.write("📄 Website Summary\n")
        f.write("=" * 60 + "\n\n")
        f.write(summary)
        f.write("\n\n🔗 Internal Links:\n")
        f.write("\n".join(internal_links))
        f.write("\n\n🌐 External Links:\n")
        f.write("\n".join(external_links))

    print("✅ Summary saved to 'website_summary.txt'")

    print("\n🔁 Rephrasing the summary...")
    rephrase_prompt = f"Rephrase this summary in a more human, vivid, and engaging way:\n\n{summary}"
    rephrased = call_gemini(rephrase_prompt)

    print("\n🎬 Rephrased Summary (Typewriter Effect):\n")
    typewriter_effect(rephrased, delay=0.01)

    with open("rephrased_typewriter_summary.txt", "w", encoding="utf-8") as f:
        f.write("📃 Rephrased Summary (Simulated Typewriter Output)\n")
        f.write("=" * 60 + "\n\n")
        f.write(rephrased)

    print("\n✅ Rephrased summary saved to 'rephrased_typewriter_summary.txt'")

if __name__ == "__main__":
    main()



✅ Extracted 181 internal links and 9 external links.
⏳ Summarizing content...
✅ Summary saved to 'website_summary.txt'

🔁 Rephrasing the summary...

🎬 Rephrased Summary (Typewriter Effect):

Imagine a digital playground where you can level up your tech skills and land your dream job! This website is like a superpower generator for anyone looking to conquer the tech world.

Need to become a coding ninja? They've got you covered with everything from classic languages like C++ and Java to the hottest trends like Python, JavaScript, and GoLang. Want to unlock the secrets of Data Science and Machine Learning? Dive into Python, Pandas, and even the magic of Deep Learning!

Craving a career in Web Development? Build stunning websites with HTML, CSS, JavaScript, and frameworks like ReactJS and NextJS. Feeling overwhelmed by coding interviews? Sharpen your DSA skills with tons of practice problems and proven strategies.

And it doesn't stop there! Explore the wild world of DevOps and Cloud wit