## 🛰️ AIS Ship Tracking Scraper (MarineTraffic.com)

**Goal:** Scrape real-time vessel data from MarineTraffic's JSON endpoint using `Selenium`, and export structured ship metadata to CSV.


In [None]:
import json
import csv
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time


### 🧰 Setup Selenium WebDriver (Chrome)

In [None]:
def setup_driver():
    """Setup Chrome driver with basic options"""
    chrome_options = Options()

    # Add user agent
    chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

    # Basic anti-detection
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)

    # Uncomment the next line if you want to run headless (without opening browser window)
    # chrome_options.add_argument("--headless")

    driver = webdriver.Chrome(options=chrome_options)
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

    return driver


### 🌐 Scrape AIS JSON Data

In [None]:
def scrape_data(url):
    """Directly scrape the API endpoint"""
    driver = None
    try:
        driver = setup_driver()

        print(f"Accessing: {url}")
        driver.get(url)

        # Wait for page to load
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )

        # Get the JSON data from the page
        try:
            # Try pre tag first (common for JSON responses)
            pre_element = driver.find_element(By.TAG_NAME, "pre")
            json_text = pre_element.text
        except:
            # If no pre tag, get body text
            body_element = driver.find_element(By.TAG_NAME, "body")
            json_text = body_element.text

        # Parse JSON
        data = json.loads(json_text)
        return data

    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        if driver:
            driver.quit()


### 💾 Convert JSON to CSV

In [None]:
def save_to_csv(data, output_path):
    """Convert JSON data to CSV"""
    if not data or 'data' not in data or 'rows' not in data['data']:
        print("No valid data found")
        return False

    rows = data['data']['rows']
    if not rows:
        print("No ship data found")
        return False

    # Create output directory if needed
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Get all field names
    fieldnames = set()
    for row in rows:
        fieldnames.update(row.keys())
    fieldnames = sorted(list(fieldnames))

    # Write CSV
    with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)

    print(f"✅ Saved {len(rows)} ships to {output_path}")
    return True


### 🚀 Run Main Function

In [None]:
def main():
    url = "https://www.marinetraffic.com/getData/get_data_json_4/z:5/X:10/Y:6/station:0"
    output_path = r"C:\Users\tesla\Downloads\ships.csv"

    print("Scraping Marine Traffic data...")

    # Scrape the data
    data = scrape_data(url)

    if data:
        # Save to CSV
        save_to_csv(data, output_path)
        print("Done!")
    else:
        print("Failed to get data")

if __name__ == "__main__":
    main()
