In [6]:
pip install tqdm

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import time
import requests
import pandas as pd
from tqdm import tqdm



MAPBOX_TOKEN = os.getenv("MAXBOX_TOKEN")  
STYLE = "mapbox/satellite-v9"

IMAGE_SIZE = "224x224"   
ZOOM = 17                
SCALE = 2                

SLEEP_TIME = 0.15        

def fetch_image(lat, lon, save_path):
    """
    Fetch a satellite image for given latitude & longitude
    and save it to disk.
    """
    url = (
        f"https://api.mapbox.com/styles/v1/{STYLE}/static/"
        f"{lon},{lat},{ZOOM}/"
        f"{IMAGE_SIZE}@{SCALE}x"
        f"?access_token={MAPBOX_TOKEN}"
    )

    response = requests.get(url, timeout=10)

    if response.status_code == 200:
        with open(save_path, "wb") as f:
            f.write(response.content)
        return True
    else:
        return False


def download_images(csv_or_excel_path, output_dir, id_col="id"):
    """
    Download satellite images for all rows in a dataset.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Load dataset
    if csv_or_excel_path.endswith(".csv"):
        df = pd.read_csv(csv_or_excel_path, encoding ='latin1')
    else:
        df = pd.read_excel(csv_or_excel_path)

    assert "lat" in df.columns and "long" in df.columns, \
        "Dataset must contain 'lat' and 'long' columns"

    success, failed = 0, 0

    for _, row in tqdm(df.iterrows(), total=len(df)):
        prop_id = row[id_col]
        lat = row["lat"]
        lon = row["long"]

        save_path = os.path.join(output_dir, f"{prop_id}.png")

        # Skip already downloaded images
        if os.path.exists(save_path):
            continue

        ok = fetch_image(lat, lon, save_path)

        if ok:
            success += 1
        else:
            failed += 1

        time.sleep(SLEEP_TIME)

    print("\nDownload Summary")
    print("----------------")
    print(f"Successful: {success}")
    print(f"Failed:     {failed}")


if __name__ == "__main__":
    # Example usage
    download_images(
        csv_or_excel_path="data/train.csv",
        output_dir="data/images/train",
        id_col="id"
    )

    download_images(
        csv_or_excel_path="data/test.csv",
        output_dir="data/images/test",
        id_col="id"
    )

100%|██████████| 16209/16209 [00:07<00:00, 2029.16it/s]



Download Summary
----------------
Successful: 0
Failed:     0


100%|██████████| 5404/5404 [00:02<00:00, 1919.75it/s]


Download Summary
----------------
Successful: 0
Failed:     0



