<a href="https://colab.research.google.com/github/Coltcult/fantastic-computing-machine/blob/main/Copy_of_SEO_Comparison_and_Suggestion_Script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import re
import string
import random
import os

def sanitize_filename(filename):
    """Sanitizes a filename to remove invalid characters.  Replaces spaces with underscores."""
    valid_chars = "-_ %s%s" % (string.ascii_letters, string.digits)
    filename = ''.join(c for c in filename if c in valid_chars)
    filename = filename.replace(' ', '_')  # Replace spaces with underscores
    return filename

def fetch_website_content(url):
    """
    Fetches the HTML content of a website.  Handles basic errors.

    Args:
        url (str): The URL of the website.

    Returns:
        str: The HTML content, or None on error.
    """
    try:
        # Set a user-agent to avoid being blocked by some servers
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        response = requests.get(url, headers=headers, timeout=10)  # Add a timeout
        response.raise_for_status()  # Raise an exception for bad status codes
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred while fetching {url}: {e}")
        return None

def extract_seo_data(html_content, url):
    """
    Extracts SEO data (title, description, keywords, headings) from HTML content.

    Args:
        html_content (str): The HTML content of the website.
        url (str): The URL of the website (for error reporting).

    Returns:
        dict: A dictionary containing the SEO data, or None on error.
    """
    if not html_content:
        return None

    soup = BeautifulSoup(html_content, 'html.parser')
    try:
        title = soup.title.string.strip() if soup.title and soup.title.string else "" #check if soup.title exists
        description = soup.find('meta', attrs={'name': 'description'})['content'].strip() if soup.find('meta', attrs={'name': 'description'}) else ""
        keywords = soup.find('meta', attrs={'name': 'keywords'})['content'].strip() if soup.find('meta', attrs={'name': 'keywords'}) else ""
        headings = [h.text.strip() for h in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])]
        return {
            'title': title,
            'description': description,
            'keywords': keywords,
            'headings': headings,
            'url': url  # Include the URL for later use
        }
    except Exception as e:
        print(f"Error extracting SEO data from {url}: {e}")
        return None

def compare_seo_data(data):
    """
    Compares SEO data from multiple websites and provides suggestions.

    Args:
        data (list): A list of dictionaries, where each dictionary contains SEO data for a website.

    Returns:
        dict: A dictionary containing the comparison results and suggestions.
    """
    if not data:
        return {"comparison": "No data to compare.", "suggestions": "No suggestions available."}

    comparison = {}
    suggestions = {}

    # --- Title Comparison ---
    titles = [d['title'] for d in data if d] # Filter out None values
    title_lengths = [len(t) for t in titles]
    avg_title_length = sum(title_lengths) / len(title_lengths) if title_lengths else 0

    comparison['title'] = f"Titles: {', '.join(f'{t[:50]}... ({len(t)} chars)' for t in titles)}"  # Limit title display
    if titles:
        shortest_title_index = title_lengths.index(min(title_lengths)) if title_lengths else -1
        longest_title_index = title_lengths.index(max(title_lengths)) if title_lengths else -1
        suggestions['title'] = "Consider making titles between 50-60 characters for better SEO.  "
        if len(titles) > 1: # only suggest if there is more than one title
          suggestions['title'] += f"The shortest title is on '{data[shortest_title_index]['url']}', the longest is on '{data[longest_title_index]['url']}'."
        elif len(titles) == 1:
            suggestions['title'] += f"The title on '{data[0]['url']}' is {len(titles[0])} characters long."
    else:
        comparison['title'] = "No titles found."
        suggestions['title'] = "No titles found."

    # --- Description Comparison ---
    descriptions = [d['description'] for d in data if d]
    description_lengths = [len(d) for d in descriptions]
    avg_description_length = sum(description_lengths) / len(description_lengths) if description_lengths else 0
    comparison['description'] = f"Descriptions: {', '.join(f'{d[:50]}... ({len(d)} chars)' for d in descriptions)}" # Limit description display

    if descriptions:
        suggestions['description'] = "Descriptions should be between 150-160 characters.  "
        shortest_description_index = description_lengths.index(min(description_lengths)) if description_lengths else -1
        longest_description_index = description_lengths.index(max(description_lengths)) if description_lengths else -1
        if len(descriptions) > 1:
            suggestions['description'] += f"The shortest description is on '{data[shortest_description_index]['url']}', the longest is on '{data[longest_description_index]['url']}'."
        elif len(descriptions) == 1:
             suggestions['description'] += f"The description on '{data[0]['url']}' is {len(descriptions[0])} characters long."
    else:
        comparison['description'] = "No descriptions found."
        suggestions['description'] = "No descriptions found."

    # --- Keywords Comparison ---
    keywords_list = [d['keywords'] for d in data if d]
    all_keywords = []
    for k in keywords_list:
        all_keywords.extend(k.split(','))
    all_keywords = [k.strip() for k in all_keywords if k.strip()] # Remove empty strings and leading/trailing spaces
    keyword_counts = {}
    for keyword in all_keywords:
        keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1

    comparison['keywords'] = f"Keywords: {', '.join(f'{k} ({c})' for k, c in keyword_counts.items())}" if keyword_counts else "No keywords found."
    suggestions['keywords'] = "Use relevant keywords in your title, description, and headings.  Focus on a few key terms."

    # --- Headings Comparison ---
    headings_list = [d['headings'] for d in data if d]
    all_headings = []
    for h_list in headings_list:
        all_headings.extend(h_list)
    heading_counts = {}
    for heading in all_headings:
        heading_counts[heading] = heading_counts.get(heading, 0) + 1
    comparison['headings'] = f"Headings: {', '.join(f'{h[:50]}...' for h in heading_counts.keys())}" if heading_counts else "No headings found." # limit display

    suggestions['headings'] = "Use headings (H1, H2, etc.) to structure your content.  Include keywords in some headings."

    return {"comparison": comparison, "suggestions": suggestions}

def generate_landing_page(data, comparison_results):
    """
    Generates a basic HTML landing page with the SEO comparison and suggestions.

    Args:
        data (list):  A list of dictionaries, where each dictionary contains SEO data for a website.
        comparison_results (dict): A dictionary containing the comparison results and suggestions.

    Returns:
        str: The HTML content of the landing page.
    """
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>SEO Comparison</title>
        <style>
            body {
                font-family: Arial, sans-serif;
                margin: 0;
                padding: 0;
                background-color: #f4f4f4;
                color: #333;
            }
            header {
                background-color: #007bff;
                color: white;
                padding: 1rem;
                text-align: center;
            }
            main {
                padding: 2rem;
                max-width: 1000px;
                margin: 0 auto;
            }
            section {
                margin-bottom: 2rem;
                background-color: white;
                padding: 1rem;
                border-radius: 5px;
                box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
            }
            h1 {
                margin-top: 0;
                color: #007bff;
            }
            h2 {
                color: #0056b3;
                margin-bottom: 0.5rem;
            }
            ul {
                list-style: none;
                padding: 0;
                margin-bottom: 1rem;
            }
            li {
                margin-bottom: 0.5rem;
            }
            strong {
                color: #007bff;
            }
            footer {
                background-color: #333;
                color: white;
                text-align: center;
                padding: 1rem;
                margin-top: 2rem;
            }
            @media (max-width: 600px) {
                main {
                    padding: 1rem;
                }
            }
             .error {
                color: red;
                font-weight: bold;
            }
        </style>
    </head>
    <body>
        <header>
            <h1>SEO Comparison</h1>
        </header>
        <main>
            <section>
                <h1>SEO Comparison Results</h1>
                <p>Here's a comparison of the SEO data from the websites you provided:</p>
                """

    for i, data_item in enumerate(data):
        if data_item: # check if data_item is not None
            html_content += f"<h2>Website {i+1}: {data_item['url']}</h2>"
            html_content += f"<ul>"
            html_content += f"<li><strong>Title:</strong> {data_item['title']}</li>"
            html_content += f"<li><strong>Description:</strong> {data_item['description']}</li>"
            html_content += f"<li><strong>Keywords:</strong> {data_item['keywords']}</li>"
            html_content += f"<li><strong>Headings:</strong> {', '.join(data_item['headings'])}</li>"
            html_content += f"</ul>"
        else:
             html_content += f"<p class='error'>Website {i+1}: No data available.</p>"

    html_content += """
            </section>
            <section>
                <h2>Comparison</h2>
                <ul>
    """
    for key, value in comparison_results['comparison'].items():
        html_content += f"<li><strong>{key.capitalize()}:</strong> {value}</li>"
    html_content += """
                </ul>
            </section>
            <section>
                <h2>SEO Suggestions</h2>
                <p>Here are some suggestions to improve the SEO of the websites:</p>
                <ul>
    """
    for key, value in comparison_results['suggestions'].items():
        html_content += f"<li><strong>{key.capitalize()}:</strong> {value}</li>"
    html_content += """
                </ul>
            </section>
        </main>
        <footer>
            <p>&copy; 2024 SEO Analysis</p>
        </footer>
    </body>
    </html>
    """
    return html_content

def main():
    """
    Main function to orchestrate the SEO comparison process.
    """
    while True:
        print("\nSEO Comparison Tool")
        print("1. Enter URLs for comparison")
        print("2. Exit")

        choice = input("Enter your choice: ")

        if choice == '1':
            urls = []
            for i in range(1, 4):
                url = input(f"Enter URL {i} (or type 'done' to finish): ")
                if url.lower() == 'done':
                    break
                if not url.startswith(('http://', 'https://')):
                    url = 'https://' + url  # Add protocol if missing
                urls.append(url)

            data = []
            for url in urls:
                html_content = fetch_website_content(url)
                if html_content:
                    seo_data = extract_seo_data(html_content, url)
                    if seo_data:
                        data.append(seo_data)
                    else:
                        data.append(None) # Append None if there was an error extracting data.
                else:
                    data.append(None)  # Append None if there was an error fetching content

            comparison_results = compare_seo_data(data)
            html_page = generate_landing_page(data, comparison_results)

            # Save the HTML to a file
            filename = f"seo_comparison_{''.join(random.choices(string.ascii_lowercase + string.digits, k=6))}.html"
            filepath = os.path.join(os.getcwd(), filename) # save in current directory
            try:
                with open(filepath, "w", encoding="utf-8") as f:
                    f.write(html_page)
                print(f"Results saved to: {filepath}")
            except Exception as e:
                print(f"Error saving HTML file: {e}")
                print(f"Here's the HTML content:\n{html_page}") # print the html
        elif choice == '2':
            print("Exiting...")
            break
        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


SEO Comparison Tool
1. Enter URLs for comparison
2. Exit
