# Python Web Scraping and GUI Programming
This notebook covers web scraping and GUI programming with real-life use cases, best practices, and code examples.

## 1. Web Scraping
**Definition:** Web scraping is the process of extracting data from websites using code.

**Syntax and Example:** Scraping the title of a web page.

In [None]:
import requests
from bs4 import BeautifulSoup
import time
import re
from pprint import pprint

# Basic web scraping example
print("Basic web scraping example:")

url = 'https://www.example.com'
try:
    # Send HTTP request to the website
    response = requests.get(url)
    
    # Check if request was successful
    if response.status_code == 200:
        # Parse HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract title
        print(f"Page title: {soup.title.string}")
        
        # Extract main heading
        main_heading = soup.find('h1')
        print(f"Main heading: {main_heading.text if main_heading else 'Not found'}")
        
        # Extract paragraph text
        paragraphs = soup.find_all('p')
        print(f"\nParagraph content:")
        for p in paragraphs:
            print(f"- {p.text.strip()}")
    else:
        print(f"Failed to retrieve page: Status code {response.status_code}")
        
except Exception as e:
    print(f"Error: {e}")

# More advanced web scraping techniques
print("\nAdvanced web scraping techniques:")

# CSS selectors example (demonstrative)
print("\n1. Using CSS selectors:")
print("""# Find elements by CSS selector
headlines = soup.select('h2.headline')  # All h2 elements with class 'headline'
user_links = soup.select('a.user-link')  # All anchor tags with class 'user-link'
table_rows = soup.select('table.data-table tr')  # All rows in table with class 'data-table'

# CSS selectors can target specific elements very precisely
first_column = soup.select('table.data-table tr > td:first-child')  # First cell in each row
""")

# Find by attributes example (demonstrative)
print("\n2. Finding elements by attributes:")
print("""# Find all links to external sites
external_links = soup.find_all('a', attrs={'rel': 'external'})

# Find elements with specific data attributes
data_elements = soup.find_all(attrs={'data-type': 'user-content'})

# Find by multiple attribute conditions
recent_posts = soup.find_all('div', attrs={
    'class': 'post',
    'data-date-created': lambda x: x and '2023' in x
})
""")

# Navigation example (demonstrative)
print("\n3. Navigating the DOM:")
print("""# Parent navigation
element = soup.find('span', class_='highlight')
parent_div = element.parent  # Direct parent
ancestor = element.find_parent('section')  # First parent that's a section

# Siblings navigation
next_element = element.next_sibling  # Next sibling in DOM
prev_element = element.previous_sibling  # Previous sibling

# Children navigation
children = list(element.children)  # Direct children
descendants = list(element.descendants)  # All descendants (children, grandchildren, etc.)
""")

# Regular expressions in web scraping (demonstrative)
print("\n4. Using regular expressions with BeautifulSoup:")
print("""# Find elements with text matching pattern
emails = soup.find_all(text=re.compile(r'[\w\.-]+@[\w\.-]+'))

# Find tags with attribute matching pattern (e.g., all image files)
images = soup.find_all('img', attrs={'src': re.compile(r'\.jpg$|\,jpeg$')})

# Find all headers (h1-h6)
all_headers = soup.find_all(re.compile(r'^h[1-6]$'))
""")

# Real-world example: Product scraping (simulated)
print("\nSimulated e-commerce product scraping example:")

# Simulated HTML content for demonstration
simulated_html = """
<div class="product-listing">
  <div class="product" id="prod-1234">
    <h2 class="product-title">Wireless Bluetooth Headphones</h2>
    <div class="product-price">$79.99</div>
    <div class="product-rating">4.5/5 (243 reviews)</div>
    <div class="product-stock">In Stock</div>
  </div>
  <div class="product" id="prod-5678">
    <h2 class="product-title">Smart Fitness Watch</h2>
    <div class="product-price">$149.99</div>
    <div class="product-rating">4.2/5 (187 reviews)</div>
    <div class="product-stock">Low Stock</div>
  </div>
  <div class="product" id="prod-9012">
    <h2 class="product-title">Bluetooth Portable Speaker</h2>
    <div class="product-price">$39.99</div>
    <div class="product-rating">4.7/5 (312 reviews)</div>
    <div class="product-stock">Out of Stock</div>
  </div>
</div>
"""

# Parse simulated HTML
soup = BeautifulSoup(simulated_html, 'html.parser')

# Extract product data
products = []
for product_elem in soup.select('.product'):
    # Extract product ID from attribute
    product_id = product_elem.get('id')
    
    # Extract product title
    title = product_elem.select_one('.product-title').text
    
    # Extract price and convert to float
    price_text = product_elem.select_one('.product-price').text
    price = float(price_text.replace('$', ''))
    
    # Extract rating
    rating_text = product_elem.select_one('.product-rating').text
    rating_match = re.search(r'([\d\.]+)/5', rating_text)
    rating = float(rating_match.group(1)) if rating_match else None
    
    # Extract number of reviews
    reviews_match = re.search(r'\((\d+) reviews\)', rating_text)
    reviews = int(reviews_match.group(1)) if reviews_match else 0
    
    # Extract stock status
    stock_status = product_elem.select_one('.product-stock').text
    
    # Add to products list
    products.append({
        'id': product_id,
        'title': title,
        'price': price,
        'rating': rating,
        'reviews': reviews,
        'stock_status': stock_status
    })

# Show extracted data
print("Extracted product data:")
pprint(products)

# Calculate average price and rating
avg_price = sum(p['price'] for p in products) / len(products) if products else 0
avg_rating = sum(p['rating'] for p in products if p['rating']) / len(products) if products else 0

print(f"\nAverage price: ${avg_price:.2f}")
print(f"Average rating: {avg_rating:.1f}/5")

# Web scraping ethics and best practices
print("\nWeb scraping ethics and best practices:")
ethics_tips = [
    "1. Always check the website's robots.txt file and terms of service",
    "2. Add delays between requests (don't overwhelm the server)",
    "3. Identify your scraper with appropriate User-Agent headers",
    "4. Cache results to avoid repeated requests",
    "5. Only collect the data you need",
    "6. Consider using the site's API if available",
    "7. Respect copyright and data ownership"
]

for tip in ethics_tips:
    print(tip)

# Show how to respect robots.txt (demonstrative)
print("\nRespecting robots.txt example:")
print("""import requests
from urllib.robotparser import RobotFileParser

def is_scraping_allowed(url, user_agent="MyScraperBot"):
    """Check if scraping is allowed for the given URL and user agent"""
    # Parse the domain from the URL
    from urllib.parse import urlparse
    parsed_url = urlparse(url)
    domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
    
    # Get robots.txt URL
    robots_url = f"{domain}/robots.txt"
    
    # Initialize the parser
    rp = RobotFileParser()
    rp.set_url(robots_url)
    
    try:
        # Read robots.txt
        rp.read()
        # Check if user agent is allowed to fetch URL
        return rp.can_fetch(user_agent, url)
    except Exception as e:
        print(f"Error reading robots.txt: {e}")
        return False

# Example usage
url = "https://www.example.com/products"
if is_scraping_allowed(url):
    # Proceed with scraping
    print("Scraping is allowed")
else:
    print("Scraping is not allowed by robots.txt")
""")

# Adding proper headers example
print("\nUsing proper headers example:")
print("""# Create a headers dictionary that mimics a real browser
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml',
    'Accept-Language': 'en-US,en;q=0.9',
    'Referer': 'https://www.google.com/',
    'DNT': '1'  # Do Not Track
}

# Use headers in request
response = requests.get('https://www.example.com', headers=headers)
""")

**Output:**
Example Domain

**Real-life use case:** Collecting product prices from e-commerce websites for price comparison tools.

**Common mistakes:** Not respecting website terms of service or scraping too quickly (may get blocked).

**Best practices:** Use appropriate delays, respect robots.txt, and use headers to mimic browsers.

## 2. GUI Programming (tkinter)
**Definition:** GUI programming allows you to create graphical user interfaces for your applications.

**Syntax and Example:** Simple window with a button.

In [None]:
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import numpy as np

# Basic tkinter window example
def basic_window_example():
    """Create a basic window with a button"""
    # Create the main window
    root = tk.Tk()
    root.title('Simple GUI Example')  # Set window title
    root.geometry('300x200')  # Set window size (width x height)
    
    # Function to handle button clicks
    def say_hello():
        print('Hello from GUI!')
        # Show a message box
        messagebox.showinfo("Greeting", "Hello from the GUI!")
    
    # Create a button widget
    button = tk.Button(root, text='Click Me', command=say_hello)
    # Place the button in the window (using pack layout manager)
    button.pack(padx=20, pady=20)
    
    # Create a label widget
    label = tk.Label(root, text="This is a simple GUI example", font=("Arial", 12))
    label.pack(pady=10)
    
    # Start the GUI event loop (would block execution in a regular script)
    print("\nNote: The following code would start the event loop and display the window.")
    print("In a Jupyter notebook, we'll skip this to avoid blocking the execution.\n")
    # root.mainloop()  # Uncomment to run the event loop

# Show basic window example code
print("Basic tkinter window example:")
basic_window_example()

# More comprehensive GUI example
print("\nMore comprehensive GUI example:")

def comprehensive_gui_example():
    """A more comprehensive GUI with multiple widgets and layouts"""
    # Simulated application code without actually running it
    
    print("This example would create a data entry form with:")
    print("- Text entry fields")
    print("- Dropdown menus")
    print("- Checkboxes")
    print("- Radio buttons")
    print("- A data table")
    print("- File selection dialog")
    print("- A matplotlib chart embedded in the GUI")
    
    # Sample code that would be used (for demonstration)
    code_sample = """
    # Create main application window
    app = tk.Tk()
    app.title("Data Analysis Tool")
    app.geometry("800x600")
    
    # Create a notebook (tabbed interface)
    notebook = ttk.Notebook(app)
    notebook.pack(fill='both', expand=True, padx=10, pady=10)
    
    # First tab - Data Entry
    data_tab = ttk.Frame(notebook)
    notebook.add(data_tab, text="Data Entry")
    
    # Create a form in the first tab
    tk.Label(data_tab, text="Name:").grid(row=0, column=0, sticky='w', pady=5, padx=5)
    name_entry = tk.Entry(data_tab, width=30)
    name_entry.grid(row=0, column=1, pady=5, padx=5)
    
    tk.Label(data_tab, text="Age:").grid(row=1, column=0, sticky='w', pady=5, padx=5)
    age_entry = tk.Entry(data_tab)
    age_entry.grid(row=1, column=1, pady=5, padx=5)
    
    tk.Label(data_tab, text="Occupation:").grid(row=2, column=0, sticky='w', pady=5, padx=5)
    occupation = ttk.Combobox(data_tab, values=["Data Scientist", "Software Engineer", "Analyst", "Manager", "Other"])
    occupation.grid(row=2, column=1, pady=5, padx=5)
    
    # Checkboxes
    tk.Label(data_tab, text="Skills:").grid(row=3, column=0, sticky='w', pady=5, padx=5)
    skills_frame = tk.Frame(data_tab)
    skills_frame.grid(row=3, column=1, sticky='w')
    
    python_var = tk.BooleanVar()
    r_var = tk.BooleanVar()
    sql_var = tk.BooleanVar()
    
    tk.Checkbutton(skills_frame, text="Python", variable=python_var).pack(anchor='w')
    tk.Checkbutton(skills_frame, text="R", variable=r_var).pack(anchor='w')
    tk.Checkbutton(skills_frame, text="SQL", variable=sql_var).pack(anchor='w')
    
    # Radio buttons
    tk.Label(data_tab, text="Experience Level:").grid(row=4, column=0, sticky='w', pady=5, padx=5)
    exp_frame = tk.Frame(data_tab)
    exp_frame.grid(row=4, column=1, sticky='w')
    
    exp_var = tk.StringVar(value="intermediate")
    tk.Radiobutton(exp_frame, text="Beginner", variable=exp_var, value="beginner").pack(anchor='w')
    tk.Radiobutton(exp_frame, text="Intermediate", variable=exp_var, value="intermediate").pack(anchor='w')
    tk.Radiobutton(exp_frame, text="Expert", variable=exp_var, value="expert").pack(anchor='w')
    
    # Submit button
    def submit_data():
        data = {
            "name": name_entry.get(),
            "age": age_entry.get(),
            "occupation": occupation.get(),
            "skills": {
                "python": python_var.get(),
                "r": r_var.get(),
                "sql": sql_var.get()
            },
            "experience": exp_var.get()
        }
        messagebox.showinfo("Data Submitted", f"Submitted: {data}")
    
    submit_btn = tk.Button(data_tab, text="Submit", command=submit_data)
    submit_btn.grid(row=5, column=1, pady=10, padx=5, sticky='e')
    
    # Second tab - Data Visualization
    viz_tab = ttk.Frame(notebook)
    notebook.add(viz_tab, text="Visualization")
    
    # Add a matplotlib figure to the visualization tab
    fig = plt.Figure(figsize=(6, 4), dpi=100)
    ax = fig.add_subplot(111)
    
    # Sample data
    x = np.arange(0, 10, 0.1)
    y = np.sin(x)
    
    # Plot data
    ax.plot(x, y)
    ax.set_title('Sample Visualization')
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    
    # Embed the matplotlib figure in the tkinter window
    canvas = FigureCanvasTkAgg(fig, master=viz_tab)
    canvas.draw()
    canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
    
    # Third tab - Data Table
    table_tab = ttk.Frame(notebook)
    notebook.add(table_tab, text="Data Table")
    
    # Create a treeview (table widget)
    columns = ('name', 'age', 'occupation', 'experience')
    tree = ttk.Treeview(table_tab, columns=columns, show='headings')
    
    # Define headings
    tree.heading('name', text='Name')
    tree.heading('age', text='Age')
    tree.heading('occupation', text='Occupation')
    tree.heading('experience', text='Experience')
    
    # Sample data
    sample_data = [
        ('Alice', 28, 'Data Scientist', 'Expert'),
        ('Bob', 34, 'Software Engineer', 'Intermediate'),
        ('Charlie', 22, 'Analyst', 'Beginner'),
        ('Diana', 41, 'Manager', 'Expert')
    ]
    
    # Add data to the table
    for item in sample_data:
        tree.insert('', tk.END, values=item)
    
    # Add scrollbar
    scrollbar = ttk.Scrollbar(table_tab, orient=tk.VERTICAL, command=tree.yview)
    tree.configure(yscroll=scrollbar.set)
    
    # Pack widgets
    tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    
    # Menu bar
    menubar = tk.Menu(app)
    
    # File menu
    file_menu = tk.Menu(menubar, tearoff=0)
    file_menu.add_command(label="New", command=lambda: print("New file"))
    file_menu.add_command(label="Open", command=lambda: filedialog.askopenfilename())
    file_menu.add_command(label="Save", command=lambda: filedialog.asksaveasfilename())
    file_menu.add_separator()
    file_menu.add_command(label="Exit", command=app.quit)
    menubar.add_cascade(label="File", menu=file_menu)
    
    # Help menu
    help_menu = tk.Menu(menubar, tearoff=0)
    help_menu.add_command(label="About", command=lambda: messagebox.showinfo("About", "Data Analysis Tool v1.0"))
    menubar.add_cascade(label="Help", menu=help_menu)
    
    # Set the menu bar
    app.config(menu=menubar)
    
    # Start the application
    app.mainloop()
    """
    
    print("\nSample code structure (not executed):")
    print(code_sample[:500] + "...\n[code continues]")

# Show comprehensive GUI example
comprehensive_gui_example()

# Alternative GUI libraries
print("\nAlternative Python GUI libraries:")

gui_libraries = {
    "PyQt/PySide": [
        "Pros: Professional look, comprehensive, cross-platform", 
        "Cons: Licensing considerations, more complex",
        "Best for: Complex, feature-rich applications"
    ],
    "wxPython": [
        "Pros: Native look on each platform, free for commercial use", 
        "Cons: Documentation can be lacking",
        "Best for: Business applications requiring native look"
    ],
    "Kivy": [
        "Pros: Great for multi-touch, mobile-friendly, cross-platform", 
        "Cons: Non-native look and feel",
        "Best for: Mobile apps, touch applications, games"
    ],
    "PySimpleGUI": [
        "Pros: Simple API, low learning curve", 
        "Cons: Limited for complex applications",
        "Best for: Quick utility tools, simple interfaces"
    ],
    "Dash": [
        "Pros: Web-based, great for data visualization", 
        "Cons: Requires web browser to run",
        "Best for: Data dashboards, interactive data apps"
    ]
}

for lib, details in gui_libraries.items():
    print(f"\n{lib}:")
    for detail in details:
        print(f"- {detail}")

# GUI Design Best Practices
print("\nGUI Design Best Practices:")
best_practices = [
    "1. Keep the interface simple and intuitive",
    "2. Group related elements together",
    "3. Provide feedback for user actions",
    "4. Be consistent with layout and design",
    "5. Use appropriate widgets for each task",
    "6. Handle errors gracefully with informative messages",
    "7. Make important actions visible and accessible",
    "8. Test your UI with actual users"
]

for practice in best_practices:
    print(practice)

# Data science specific GUI applications
print("\nGUI applications for data science:")
ds_applications = {
    "Data visualization tools": "Interactive plots, dashboards, and exploratory data analysis tools",
    "Parameter tuning interfaces": "GUIs for adjusting model parameters and seeing results in real-time",
    "Data labeling tools": "Interfaces for annotating training data",
    "Result presentation": "Professional presentations of analysis results for non-technical audiences",
    "Workflow management": "Visual pipeline creation for data processing workflows"
}

for app_type, description in ds_applications.items():
    print(f"- {app_type}: {description}")

# Expected output:
# Basic tkinter window example:
# 
# Note: The following code would start the event loop and display the window.
# In a Jupyter notebook, we'll skip this to avoid blocking the execution.
#
# More comprehensive GUI example:
# This example would create a data entry form with:
# - Text entry fields
# - Dropdown menus
# - Checkboxes
# - Radio buttons
# - A data table
# - File selection dialog
# - A matplotlib chart embedded in the GUI
#
# Sample code structure (not executed):
# [code sample]
#
# Alternative Python GUI libraries:
# PyQt/PySide:
# - Pros: Professional look, comprehensive, cross-platform
# - Cons: Licensing considerations, more complex
# - Best for: Complex, feature-rich applications
# [other libraries...]
#
# GUI Design Best Practices:
# 1. Keep the interface simple and intuitive
# [other best practices...]
#
# GUI applications for data science:
# - Data visualization tools: Interactive plots, dashboards, and exploratory data analysis tools
# [other applications...]

## 3. Web Frameworks
**Definition:** Web frameworks enable Python developers to create web applications. Popular options include Flask and Django.

**Different frameworks:**
- **Flask**: Lightweight, flexible, minimal structure, good for small to medium apps and APIs
- **Django**: Full-featured, batteries-included, with admin interface, ORM, and security features
- **FastAPI**: Modern, high-performance, built for API development with automatic docs
- **Pyramid**: Flexible but includes more features than Flask, less opinionated than Django
- **Bottle**: Ultra-lightweight, single-file framework for simple apps

In [None]:
# Flask example - a simple web application

# Note: This code would be saved to a file and run from the command line
# It's shown here for demonstration purposes

from flask import Flask, render_template, request, jsonify
app = Flask(__name__)

# Basic route returning HTML
@app.route('/')
def home():
    return """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Python Web App</title>
        <style>
            body { font-family: Arial; margin: 40px; }
            .container { max-width: 600px; margin: 0 auto; }
            .result { margin-top: 20px; padding: 10px; background: #f0f0f0; }
        </style>
    </head>
    <body>
        <div class="container">
            <h1>Simple Flask App</h1>
            <form action="/calculate" method="post">
                <div>
                    <label>Enter value 1:</label>
                    <input type="number" name="value1" required>
                </div>
                <div style="margin-top: 10px;">
                    <label>Enter value 2:</label>
                    <input type="number" name="value2" required>
                </div>
                <div style="margin-top: 15px;">
                    <button type="submit">Calculate Sum</button>
                </div>
            </form>
        </div>
    </body>
    </html>
    """

# Route that handles form submission
@app.route('/calculate', methods=['POST'])
def calculate():
    try:
        value1 = float(request.form['value1'])
        value2 = float(request.form['value2'])
        result = value1 + value2
        
        return f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>Result</title>
            <style>
                body {{ font-family: Arial; margin: 40px; }}
                .container {{ max-width: 600px; margin: 0 auto; }}
                .result {{ margin-top: 20px; padding: 10px; background: #f0f0f0; }}
            </style>
        </head>
        <body>
            <div class="container">
                <h1>Calculation Result</h1>
                <div class="result">
                    <p>{value1} + {value2} = <strong>{result}</strong></p>
                </div>
                <p><a href="/">Calculate another sum</a></p>
            </div>
        </body>
        </html>
        """
    except Exception as e:
        return f"Error: {str(e)}"

# API endpoint returning JSON
@app.route('/api/sum', methods=['GET'])
def api_sum():
    try:
        a = float(request.args.get('a', 0))
        b = float(request.args.get('b', 0))
        return jsonify({
            'a': a,
            'b': b,
            'sum': a + b
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 400

# If run directly
if __name__ == '__main__':
    print("Starting Flask development server...")
    # In a real application, this would start the web server
    # app.run(debug=True, port=5000)
    
    print("This Flask application would:")
    print("1. Start a web server on http://localhost:5000")
    print("2. Serve an HTML form at the root URL (/)")
    print("3. Process form submissions at /calculate")
    print("4. Provide a JSON API endpoint at /api/sum?a=5&b=10")
    
# Expected output if run as a script:
# Starting Flask development server...
# This Flask application would:
# 1. Start a web server on http://localhost:5000
# 2. Serve an HTML form at the root URL (/)
# 3. Process form submissions at /calculate
# 4. Provide a JSON API endpoint at /api/sum?a=5&b=10