In [16]:
!pip install openai requests beautifulsoup4 python-dotenv



In [17]:
# ============================================
# WEBSITE SUMMARIZER WITH OPENAI
# ============================================
# Import libraries
from openai import OpenAI  # Updated import
from bs4 import BeautifulSoup
import requests
import os
from dotenv import load_dotenv

# Load API key from .env file (automatic)
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

# Initialize OpenAI client
client = OpenAI(api_key=api_key)

# Verify API key is loaded
if not api_key:
    print("‚ùå ERROR: API key not found!")
    print("Please make sure .env file exists with your API key")
else:
    print("‚úì API key loaded successfully!")
    print("‚úì Ready to summarize websites!\n")

# ============================================
# FUNCTIONS
# ============================================
def get_website_content(url):
    """Fetch and extract text from a website"""
    try:
        print(f"üì° Fetching content from {url}...")
        
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Remove scripts and styles
        for script in soup(["script", "style"]):
            script.decompose()
        
        # Extract text
        text = soup.get_text()
        
        # Clean up text
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = ' '.join(chunk for chunk in chunks if chunk)
        
        print(f"‚úì Fetched {len(text)} characters\n")
        return text
    except Exception as e:
        return f"Error fetching website: {str(e)}"

def summarize_text(text, max_length=3000):
    """Summarize text using OpenAI"""
    try:
        # Truncate if too long
        if len(text) > max_length:
            text = text[:max_length]
            print(f"‚ö†Ô∏è Text truncated to {max_length} characters")
        
        print("ü§ñ Generating summary with OpenAI...")
        
        # Updated OpenAI API call for version 1.0.0+
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {
                    "role": "system", 
                    "content": "You are a helpful assistant that summarizes website content concisely."
                },
                {
                    "role": "user", 
                    "content": f"Please summarize the following website content:\n\n{text}"
                }
            ],
            max_tokens=300,
            temperature=0.5
        )
        
        summary = response.choices[0].message.content
        print("‚úì Summary generated!\n")
        return summary
        
    except Exception as e:
        return f"Error summarizing: {str(e)}"

def website_summarizer(url):
    """Main function to summarize a website"""
    print("=" * 70)
    print(" " * 20 + "WEBSITE SUMMARIZER")
    print("=" * 70)
    print()
    
    # Get content
    content = get_website_content(url)
    
    if content.startswith("Error"):
        print(f"‚ùå {content}")
        return None
    
    # Generate summary
    summary = summarize_text(content)
    
    if summary.startswith("Error"):
        print(f"‚ùå {summary}")
        return None
    
    # Display summary
    print("=" * 70)
    print(" " * 28 + "SUMMARY")
    print("=" * 70)
    print()
    print(summary)
    print()
    print("=" * 70)
    
    return summary

# ============================================
# READY TO USE!
# ============================================
print("üöÄ System ready! Use: website_summarizer('your-url-here')")
print("-" * 70)

‚úì API key loaded successfully!
‚úì Ready to summarize websites!

üöÄ System ready! Use: website_summarizer('your-url-here')
----------------------------------------------------------------------


In [18]:
# Test with a website
url = "https://en.wikipedia.org/wiki/Artificial_intelligence"
summary = website_summarizer(url)

                    WEBSITE SUMMARIZER

üì° Fetching content from https://en.wikipedia.org/wiki/Artificial_intelligence...
‚úì Fetched 212374 characters

‚ö†Ô∏è Text truncated to 3000 characters
ü§ñ Generating summary with OpenAI...
‚ùå Error summarizing: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


In [21]:
# Summarize multiple websites
websites = [
    "https://en.wikipedia.org/wiki/Python_(programming_language)",
    "https://en.wikipedia.org/wiki/Machine_learning",
    "https://en.wikipedia.org/wiki/Data_science"
]
for url in websites:
    website_summarizer(url)
    print("\n\n")

                    WEBSITE SUMMARIZER

üì° Fetching content from https://en.wikipedia.org/wiki/Python_(programming_language)...
‚úì Fetched 88188 characters

‚ö†Ô∏è Text truncated to 3000 characters
ü§ñ Generating summary with OpenAI...
‚ùå Error summarizing: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}



                    WEBSITE SUMMARIZER

üì° Fetching content from https://en.wikipedia.org/wiki/Machine_learning...
‚úì Fetched 124119 characters

‚ö†Ô∏è Text truncated to 3000 characters
ü§ñ Generating summary with OpenAI...
‚ùå Error summarizing: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: htt