<font size="7">DATA FETCHING USING MAPBOX API</font>

In [None]:
import os
import time
import requests
from pathlib import Path
import pandas as pd

MAPBOX_TOKEN = "pk.eyJ1IjoicGl5dXNoLTA2MDYwNiIsImEiOiJjbWp3d3drYjYzaDRtM2RzZXdwb2p2cnpmIn0.oQ3z0H9hZuZqTl1TtCSfxA"

def fetch_mapbox(lat, lon, zoom=18, size=512, save_path="img.png", retries=1, timeout=10):
    base_url = f"https://api.mapbox.com/styles/v1/mapbox/satellite-v9/static/{lon},{lat},{zoom},0/{size}x{size}"
    
    params = {"access_token": MAPBOX_TOKEN}
    
    for attempt in range(retries):
        try:
            r = requests.get(base_url, params=params, timeout=timeout)
            
            if r.status_code == 200:
                with open(save_path, "wb") as f:
                    f.write(r.content)
                return True
            else:
                print(f"  -> Mapbox Error {r.status_code}: {r.text}")
                
        except Exception as e:
            print(f"  -> Connection error on attempt {attempt + 1}: {e}")
            
        time.sleep(1)
    return False

def main(args):
    if not os.path.exists(args.csv):
        print(f"Error: File {args.csv} not found.")
        return

    df = pd.read_csv(args.csv)
    df.columns = [c.strip().lower() for c in df.columns]
    
    print(f"Loaded CSV with {len(df)} rows.")
    print(f"Columns detected: {list(df.columns)}")

    out_dir = Path(args.out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
    
    count = 0
    success_count = 0

    for idx, row in df.iterrows():
        if args.max > 0 and count >= args.max:
            break
        
        lat = row.get('lat')
        lon = row.get('long') or row.get('lon') or row.get('longitude')
        img_id = row.get('id')

        if lat is None or lon is None or img_id is None:
            print(f"Skipping row {idx}: Missing coordinates or ID")
            continue

        img_name = out_dir / f"{img_id}.png"
        
        if img_name.exists():
            count += 1
            success_count += 1
            continue

        print(f"[{count+1}] Fetching ID {img_id} (Lat: {lat}, Lon: {lon})...")
        
        ok = fetch_mapbox(lat, lon, 
                          zoom=args.zoom, 
                          size=args.size, 
                          save_path=str(img_name))
        
        if ok:
            success_count += 1
        
        count += 1
        
        if count % 50 == 0:
            time.sleep(1)

    print("-" * 30)
    print(f"Done! Successfully saved {success_count} images to '{out_dir}'.")

if __name__ == "__main__":
    class Args:
        csv = "train(1)(train(1)).csv"
        out_dir = "images_mapbox"
        zoom = 18
        size = 512
        max = 0
        
    args = Args()
    main(args)

Loaded CSV with 16209 rows.
Columns detected: ['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long', 'sqft_living15', 'sqft_lot15']
[1] Fetching ID 9117000170 (Lat: 47.4362, Lon: -122.187)...
[2] Fetching ID 6700390210 (Lat: 47.4034, Lon: -122.187)...
[3] Fetching ID 7212660540 (Lat: 47.2704, Lon: -122.313)...
[4] Fetching ID 8562780200 (Lat: 47.5321, Lon: -122.073)...
[5] Fetching ID 7760400350 (Lat: 47.3715, Lon: -122.074)...
[6] Fetching ID 464001025 (Lat: 47.6948, Lon: -122.395)...
[7] Fetching ID 3432500486 (Lat: 47.7463, Lon: -122.315)...
[8] Fetching ID 1126059095 (Lat: 47.7489, Lon: -122.123)...
[9] Fetching ID 3876500290 (Lat: 47.3377, Lon: -122.291)...
[10] Fetching ID 1865400075 (Lat: 47.6983, Lon: -122.367)...
[11] Fetching ID 2558690150 (Lat: 47.7212, Lon: -122.172)...
[12] Fetching ID 7154200070 (Lat: 47.7764, Lon: