In [None]:
import requests
from bs4 import BeautifulSoup
import mysql.connector
from mysql.connector import Error
import time

In [None]:
# List of target Samsung Galaxy models with GSMArena URLs
TARGET_PHONES = {
    "Samsung Galaxy S20": "https://www.gsmarena.com/samsung_galaxy_s20-10081.php",
    "Samsung Galaxy S20+": "https://www.gsmarena.com/samsung_galaxy_s20+-10082.php",
    "Samsung Galaxy S20 Ultra": "https://www.gsmarena.com/samsung_galaxy_s20_ultra-10084.php",
    "Samsung Galaxy S20 FE": "https://www.gsmarena.com/samsung_galaxy_s20_fe_5g-10377.php",
    "Samsung Galaxy S21": "https://www.gsmarena.com/samsung_galaxy_s21_5g-10626.php",
    "Samsung Galaxy S21+": "https://www.gsmarena.com/samsung_galaxy_s21+_5g-10625.php",
    "Samsung Galaxy S21 Ultra": "https://www.gsmarena.com/samsung_galaxy_s21_ultra_5g-10596.php",
    "Samsung Galaxy S21 FE": "https://www.gsmarena.com/samsung_galaxy_s21_fe_5g-10954.php",
    "Samsung Galaxy S22": "https://www.gsmarena.com/samsung_galaxy_s22-11253.php",
    "Samsung Galaxy S22+": "https://www.gsmarena.com/samsung_galaxy_s22+-11254.php",
    "Samsung Galaxy S22 Ultra": "https://www.gsmarena.com/samsung_galaxy_s22_ultra_5g-11251.php",
    "Samsung Galaxy S23": "https://www.gsmarena.com/samsung_galaxy_s23-12076.php",
    "Samsung Galaxy S23+": "https://www.gsmarena.com/samsung_galaxy_s23+-12083.php",
    "Samsung Galaxy S23 Ultra": "https://www.gsmarena.com/samsung_galaxy_s23_ultra-12024.php",
    "Samsung Galaxy S23 FE": "https://www.gsmarena.com/samsung_galaxy_s23_fe-12559.php",
    "Samsung Galaxy S24": "https://www.gsmarena.com/samsung_galaxy_s24-13086.php",
    "Samsung Galaxy S24+": "https://www.gsmarena.com/samsung_galaxy_s24+-13085.php",
    "Samsung Galaxy S24 Ultra": "https://www.gsmarena.com/samsung_galaxy_s24_ultra-13105.php",
    "Samsung Galaxy S24 FE": "https://www.gsmarena.com/samsung_galaxy_s24_fe-14042.php",  # future/placeholder
    "Samsung Galaxy S25": "https://www.gsmarena.com/samsung_galaxy_s25-14043.php",        # future/placeholder
    "Samsung Galaxy S25+": "https://www.gsmarena.com/samsung_galaxy_s25+-14044.php",      # future/placeholder
    "Samsung Galaxy S25 Ultra": "https://www.gsmarena.com/samsung_galaxy_s25_ultra-14045.php", # future/placeholder
    "Samsung Galaxy S25 Edge": "https://www.gsmarena.com/samsung_galaxy_s25_edge-14046.php"    # future/placeholder
}

In [None]:
# Connect to MySQL
def connect_db():
    try:
        conn = mysql.connector.connect(
            host="localhost",
            user="root",         
            password="",         
            database="samsung_phones"
        )
        return conn
    except Error as e:
        print("Error connecting to DB:", e)
        return None


In [None]:
# Create table
def create_table():
    conn = connect_db()
    if not conn:
        return
    cursor = conn.cursor()
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS phones (
        id INT AUTO_INCREMENT PRIMARY KEY,
        model_name VARCHAR(255) NOT NULL UNIQUE,
        display_size VARCHAR(100),
        resolution VARCHAR(100),
        chipset VARCHAR(255),
        memory VARCHAR(255),
        main_camera TEXT,
        selfie_camera TEXT,
        battery TEXT,
        os VARCHAR(255),
        price VARCHAR(100),
        release_date DATE,
        gsmarena_url TEXT,
        scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )
    """)
    conn.commit()
    cursor.close()
    conn.close()

In [None]:
# Scrape specs
def scrape_phone(url, model_name):
    try:
        r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        r.raise_for_status()
        soup = BeautifulSoup(r.content, "html.parser")

        def get_spec(label):
            row = soup.find("td", class_="ttl", string=lambda t: t and label.lower() in t.lower())
            if row:
                td = row.find_next_sibling("td")
                if td:
                    return td.get_text(" ", strip=True)
            return None

        # Extract specifications
        specs = {
            "display_size": get_spec("Size"),
            "resolution": get_spec("Resolution"),
            "chipset": get_spec("Chipset"),
            "memory": get_spec("Internal"),
            "main_camera": get_spec("Main Camera"),
            "selfie_camera": get_spec("Selfie camera"),
            "battery": get_spec("Battery"),
            "os": get_spec("OS"),
            "price": get_spec("Price"),
            "release_date": get_spec("Announced")
        }

        # Save to DB
        conn = connect_db()
        if not conn:
            return
        cursor = conn.cursor()
        cursor.execute("""
        INSERT INTO phones (model_name, display_size, resolution, chipset, memory, main_camera, selfie_camera, battery, os, price, release_date, gsmarena_url)
        VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
        ON DUPLICATE KEY UPDATE
            display_size=VALUES(display_size),
            resolution=VALUES(resolution),
            chipset=VALUES(chipset),
            memory=VALUES(memory),
            main_camera=VALUES(main_camera),
            selfie_camera=VALUES(selfie_camera),
            battery=VALUES(battery),
            os=VALUES(os),
            price=VALUES(price),
            release_date=VALUES(release_date),
            gsmarena_url=VALUES(gsmarena_url)
        """, (model_name, specs["display_size"], specs["resolution"], specs["chipset"],
              specs["memory"], specs["main_camera"], specs["selfie_camera"],
              specs["battery"], specs["os"], specs["price"], specs["release_date"], url))
        conn.commit()
        cursor.close()
        conn.close()
        print(f"✅ Inserted {model_name}")
    except Exception as e:
        print(f"❌ Error scraping {model_name} ({url}): {e}")

In [None]:
# Main
if __name__ == "__main__":
    create_table()
    for model, url in TARGET_PHONES.items():
        scrape_phone(url, model)
        time.sleep(2)