<a href="https://colab.research.google.com/github/Sowmya74/Apple_Data_Analysis/blob/main/WebScraping_Apple_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import re
import time
import os
import zipfile

def clean_filename(filename):
    return re.sub(r'[^\w\-_\. ]', '_', filename)

def scrape_tables(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
    }

    try:
        session = requests.Session()
        response = session.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        time.sleep(2)  # Add a delay to avoid overwhelming the server
    except requests.RequestException as e:
        print(f"Error fetching the webpage: {e}")
        return []

    soup = BeautifulSoup(response.content, 'html.parser')

    all_tables = soup.find_all('table')
    print(f"Total tables found: {len(all_tables)}")

    tables = soup.find_all('table', {'class': ['has-fixed-layout', 'table-primary']})
    print(f"Tables with specified classes: {len(tables)}")

    if not tables:
        print("No tables with specified classes found. Trying to scrape all tables.")
        tables = all_tables

    csv_files = []

    for i, table in enumerate(tables):
        header = table.find_previous(['h2', 'h3', 'h4'])
        if header:
            filename = clean_filename(header.text.strip()) + '.csv'
        else:
            filename = f'table_{i+1}.csv'

        headers = []
        data = []

        rows = table.find_all('tr')
        if not rows:
            print(f"No rows found in table {i+1}. Skipping.")
            continue

        for row in rows:
            cols = row.find_all(['th', 'td'])
            if cols:
                if not headers:
                    headers = [col.text.strip() for col in cols]
                else:
                    data.append([col.text.strip() for col in cols])

        with open(filename, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(data)

        csv_files.append(filename)
        print(f"Data saved to {filename}")

    return csv_files

def create_zip(csv_files, zip_filename):
    with zipfile.ZipFile(zip_filename, 'w') as zipf:
        for file in csv_files:
            # Check if the file exists before attempting to add it to the zip
            if os.path.exists(file):
                zipf.write(file, os.path.basename(file))
                # Remove the file after adding it to the zip
                os.remove(file)
            else:
                print(f"Warning: File {file} not found and will be skipped.")
    print(f"All CSV files have been compressed into {zip_filename}")


# URLs to scrape
urls = [
    "https://www.demandsage.com/iphone-user-statistics/",
    "https://backlinko.com/apple-statistics"
]

all_csv_files = []

for url in urls:
    print(f"\nScraping tables from: {url}")
    csv_files = scrape_tables(url)
    all_csv_files.extend(csv_files)

if all_csv_files:
    zip_filename = "scraped_tables.zip"
    create_zip(all_csv_files, zip_filename)
else:
    print("No tables were scraped.")

print("Scraping completed.")


Scraping tables from: https://www.demandsage.com/iphone-user-statistics/
Total tables found: 17
Tables with specified classes: 17
Data saved to How Many iPhone Users Are There In The World_.csv
Data saved to Number Of iPhone Users In The United States.csv
Data saved to Share Of iPhone Users Among Other Smartphone Users In The US.csv
Data saved to iPhone Sales And Shipment Statistics.csv
Data saved to iPhone Sales And Shipment Statistics.csv
Data saved to iPhone Market Share Statistics.csv
Data saved to iPhone Market Share Statistics.csv
Data saved to iPhone Users Demographic.csv
Data saved to iPhone Vs. Android.csv
Data saved to iPhone Vs. Android.csv
Data saved to Most Popular Apps on iPhone.csv
Data saved to Most Popular Apps on iPhone.csv
Data saved to Apple_s iPhone Revenue Statistics.csv
Data saved to Apple_s iPhone Revenue Statistics.csv
Data saved to iOS Market Share Statistics.csv
Data saved to iOS Market Share Statistics.csv
Data saved to It Is Predicted That The iPhone 15 Pr