# Scraping images from mapbox satelitar view  
Free tier allows 50,000 requests per month : consider logging how many images have been scraped already to not exceed this limit.

importing packages

In [10]:
import os
import random
import requests
import time
from pathlib import Path
from dotenv import load_dotenv

CONFIG

In [None]:
# ======================
# CONFIG
# ======================

load_dotenv()

MAPBOX_ACCESS_TOKEN = os.getenv("MAPBOX_ACCESS_TOKEN")
NB_IMAGES = 2
WIDTH = 800
HEIGHT = 600
ZOOM = 18  # 18-19 to see buildings, solar panels, pools clearly
PATH = Path().cwd()
OUTPUT_DIR = PATH.parent / "data" / "scraped_images"

# Bounding box France métropolitaine
LAT_MIN, LAT_MAX = 41.0, 51.5
LON_MIN, LON_MAX = -5.5, 9.5


Number of scraped images. remember : free tier allows 50,000 requests per month

In [20]:
EXTENSIONS = {".jpg", ".jpeg", ".png"}


nb_images = sum(
1 for f in OUTPUT_DIR.iterdir()
if f.suffix.lower() in EXTENSIONS
)


print(f"! scraped images : {nb_images}")

! scraped images : 3


SETUP

In [12]:
# ======================
# SETUP
# ======================
os.makedirs(OUTPUT_DIR, exist_ok=True)

def random_coord_france():
    lat = random.uniform(LAT_MIN, LAT_MAX)
    lon = random.uniform(LON_MIN, LON_MAX)
    return lat, lon

In [13]:
# ======================
# DOWNLOAD LOOP
# ======================
for i in range(NB_IMAGES):
    lat, lon = random_coord_france()

    url = (
        f"https://api.mapbox.com/styles/v1/mapbox/satellite-v9/static/"
        f"{lon},{lat},{ZOOM}/"
        f"{WIDTH}x{HEIGHT}"
        f"?access_token={MAPBOX_ACCESS_TOKEN}"
    )

    filename = os.path.join(OUTPUT_DIR, f"sat_{i:04d}.jpg")

    response = requests.get(url, timeout=30)

    if response.status_code == 200:
        with open(filename, "wb") as f:
            f.write(response.content)
        print(f"[OK] {filename}")
    else:
        print(f"[ERREUR] Image {i} - status {response.status_code}")

    # Petite pause pour éviter le rate limit
    time.sleep(0.1)

[OK] c:\Users\Prout\Documents\GitHub\SISE_satelitar_identifier\notebooks\data\scraped_images\sat_0000.jpg
[OK] c:\Users\Prout\Documents\GitHub\SISE_satelitar_identifier\notebooks\data\scraped_images\sat_0001.jpg


Trying on a city: Lyon

In [15]:
LYON_COORDS = (45.764043, 4.835659)

In [16]:
url = (
    f"https://api.mapbox.com/styles/v1/mapbox/satellite-v9/static/"
    f"{LYON_COORDS[1]},{LYON_COORDS[0]},{ZOOM}/"
    f"{WIDTH}x{HEIGHT}"
    f"?access_token={MAPBOX_ACCESS_TOKEN}"
)

filename = os.path.join(OUTPUT_DIR, f"sat_lyon.jpg")

response = requests.get(url, timeout=30)

if response.status_code == 200:
    with open(filename, "wb") as f:
        f.write(response.content)
    print(f"[OK] {filename}")
else:
    print(f"[ERREUR] Image {i} - status {response.status_code}")

[OK] c:\Users\Prout\Documents\GitHub\SISE_satelitar_identifier\notebooks\data\scraped_images\sat_lyon.jpg
