In [None]:
%pip install beautifulsoup4

In [52]:
import requests
import os
import pandas as pd
from bs4 import BeautifulSoup

In [None]:
SOURCE_URL = "https://api.floridayachttrader.com/api/boats/merged/all-sources?page=1&limit=2000&fields=minimal"
OUT_CSV = "Data/data.csv"

In [54]:
os.makedirs("Data", exist_ok=True)

In [55]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Accept": "application/json",
}

In [56]:
def clean_html(raw_html):
    if not raw_html:
        return ""
    if isinstance(raw_html, list):
        raw_html = " ".join(raw_html)
    return BeautifulSoup(raw_html, "html.parser").get_text(separator=" ", strip=True)

In [57]:
response = requests.get(SOURCE_URL, headers=headers)
print("Status code:", response.status_code)

Status code: 200


In [58]:
data_json = response.json()
boats = data_json.get("data", [])
print(f"Total boats fetched: {len(boats)}")

Total boats fetched: 2000


In [68]:
rows = []
for boat in boats:
    boat_location = boat.get("BoatLocation", {})
    engines = boat.get("Engines", [])
    first_engine = engines[0] if isinstance(engines, list) and len(engines) > 0 and isinstance(engines[0], dict) else {}

    row = {
        "DocumentID": boat.get("DocumentID", ""),
        "ListingTitle": boat.get("ListingTitle", ""),
        "BeamMeasure": boat.get("BeamMeasure", ""),
        "TotalEnginePowerQuantity": boat.get("TotalEnginePowerQuantity", ""),
        "Price": boat.get("Price", ""),
        "Model": boat.get("Model", ""),
        "FuelTankCapacityMeasure": boat.get("FuelTankCapacityMeasure", ""),
        "FuelTankCountNumeric": boat.get("FuelTankCountNumeric", ""),
        "ModelYear": boat.get("ModelYear", ""),
        "MakeString": boat.get("MakeString", ""),
        "LengthOverall": boat.get("LengthOverall", ""),
        "NominalLength": boat.get("NominalLength", ""),
        "GeneralBoatDescription": clean_html(boat.get("GeneralBoatDescription", "")),
        "AdditionalDetailDescription": clean_html(boat.get("AdditionalDetailDescription", "")),

        "BoatCityName": boat_location.get("BoatCityName", ""),
        "BoatCountryID": boat_location.get("BoatCountryID", ""),
        "BoatStateCode": boat_location.get("BoatStateCode", ""),

        "Engine_Make": first_engine.get("Make", ""),
        "Engine_Model": first_engine.get("Model", ""),
        "Engine_EnginePower": first_engine.get("EnginePower", ""),
        "Engine_Fuel": first_engine.get("Fuel", ""),
        "Engine_Type": first_engine.get("Type", ""),
        "Engine_DriveTransmissionDescription": first_engine.get("DriveTransmissionDescription", ""),
        "Engine_PropellerType": first_engine.get("PropellerType", ""),
        "Engine_Hours": first_engine.get("Hours", "")
    }

    rows.append(row)

In [69]:
df = pd.DataFrame(rows)
df.drop_duplicates(subset=["DocumentID"], inplace=True)

In [70]:
df.to_csv(OUT_CSV, index=False)
print(f"Saved {len(df)} unique boats to {OUT_CSV}")

Saved 2000 unique boats to Data/data.csv
