Importing Necessary Dependencies

In [1]:
import os
import json
import ee
import requests
from tqdm import tqdm
from dotenv import load_dotenv

Load the environment variables

In [2]:
load_dotenv()
PROJECT_ID = os.getenv("PROJECT_ID")
SERVICE_ACCOUNT = os.getenv("SERVICE_ACCOUNT")

In [3]:
base_dir = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
key_path = os.path.join(base_dir, "service-account-key.json")

Input and Output Path

In [None]:
GEOJSON_DIR = "../../data/geojson-data"
OUTPUT_DIR = "../../data/satellite-images"

üìå Important: Configurations

In [5]:
SATELLITE = "Landsat-8"  # Options: "Sentinel-2", "Landsat-8"
BANDS = [
    "SR_B2",
    "SR_B3",
    "SR_B4",
    "SR_B5",
    "SR_B6",
    "SR_B7",
]
YEAR = 2014
CLOUD_FILTER = 10

Service Account Authentication for Google Earth Engine

In [6]:
def init_ee():
    try:
        # 2. Read the file as a dictionary
        with open(key_path, "r") as f:
            key_data = json.load(f)

        # 3. Get Project ID and Service Account from the JSON itself
        # This is safer because it matches the key perfectly
        project_id = key_data["project_id"]
        service_account = key_data["client_email"]

        # 4. Initialize
        credentials = ee.ServiceAccountCredentials(
            email=service_account, key_data=json.dumps(key_data)
        )
        ee.Initialize(credentials, project=project_id)

        print(f"‚úÖ Success! Connected to project: {project_id}")
        print(f"üîë Using Service Account: {service_account}")

    except FileNotFoundError:
        print(f"‚ùå Error: Could not find file at {key_path}")
    except Exception as e:
        print(f"‚ùå Authentication failed: {e}")

Dynamic Satellite Image Collection Filtering by Sensor and Metadata

In [7]:
def get_collection(satellite, geom, start_date, end_date):
    """Retrieve filtered ImageCollection based on satellite type."""
    if satellite == "Sentinel-2":
        coll_id = "COPERNICUS/S2_SR_HARMONIZED"
        cloud_prop = "CLOUDY_PIXEL_PERCENTAGE"
    else:
        coll_id = "LANDSAT/LC08/C02/T1_L2"
        cloud_prop = "CLOUD_COVER"

    return (
        ee.ImageCollection(coll_id)
        .filterBounds(geom)
        .filterDate(start_date, end_date)
        .filter(ee.Filter.lt(cloud_prop, CLOUD_FILTER))
        .sort(cloud_prop)
    )

Automated Multi-Spectral Band Extraction and GeoTIFF Export

In [8]:
def download_bands(image, geom, plot_id, output_path):
    """Download specified bands as GeoTIFFs."""
    region = geom.bounds().getInfo()["coordinates"]

    for band in BANDS:
        # Scale: 10m for S2 visible/NIR, 30m for Landsat
        scale = 10 if SATELLITE == "Sentinel-2" else 30

        try:
            url = image.select(band).getDownloadURL(
                {"scale": scale, "region": region, "format": "GEO_TIFF"}
            )

            r = requests.get(url, timeout=30)
            if r.status_code == 200:
                filename = f"{plot_id}_{band}.tif"
                with open(os.path.join(output_path, filename), "wb") as f:
                    f.write(r.content)
        except Exception as e:
            print(f"  ‚ö†Ô∏è Error downloading {band} for {plot_id}: {e}")

In [9]:
# Initialize Earth Engine with your secure key
init_ee()
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Get list of GeoJSON files
geojson_files = [f for f in os.listdir(GEOJSON_DIR) if f.endswith('.geojson')]
print(f"üìÇ Found {len(geojson_files)} plots to process.")

start_date = f"{YEAR}-01-01"
end_date = f"{YEAR}-12-31"

‚úÖ Success! Connected to project: geopulse-477105
üîë Using Service Account: geopulse-service@geopulse-477105.iam.gserviceaccount.com
üìÇ Found 524 plots to process.


In [10]:
year_folder = os.path.join(OUTPUT_DIR, str(YEAR))
os.makedirs(year_folder, exist_ok=True)

for file_name in tqdm(geojson_files, desc=f"Processing Plots for {YEAR}"):
    plot_id = file_name.replace(".geojson", "").replace("plot_", "")

    # NEW: Nested directory structure: YEAR -> PLOT_ID
    plot_folder = os.path.join(year_folder, plot_id)

    # Skip if plot already processed (Checkpointing)
    if os.path.exists(plot_folder) and len(os.listdir(plot_folder)) >= len(BANDS):
        continue

    os.makedirs(plot_folder, exist_ok=True)

    # Load GeoJSON geometry
    with open(os.path.join(GEOJSON_DIR, file_name), "r") as f:
        data = json.load(f)
        geom_dict = (
            data["features"][0]["geometry"] if "features" in data else data["geometry"]
        )

        # Remove Z-coords
        if len(geom_dict["coordinates"][0][0]) > 2:
            geom_dict["coordinates"] = [
                [[coord[0], coord[1]] for coord in ring]
                for ring in geom_dict["coordinates"]
            ]

        geom = ee.Geometry(geom_dict)

    # Define a focused seasonal window (e.g., Post-Monsoon Dry Season)
    # We look at Nov of the measurement year to March of the following year
    seasonal_start = f"{YEAR}-11-01"
    seasonal_end = f"{YEAR + 1}-03-31"
    
    # Get best image
    collection = get_collection(SATELLITE, geom, seasonal_start, seasonal_end)

    if collection.size().getInfo() > 0:
        best_img = ee.Image(collection.first()).clip(geom)
        download_bands(best_img, geom, plot_id, plot_folder)
    else:
        # Clean up empty folder if no imagery found
        if not os.listdir(plot_folder):
            os.rmdir(plot_folder)
        print(f"  ‚ùå No clear imagery found for {plot_id} in {YEAR}")

Processing Plots for 2014: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 524/524 [1:18:09<00:00,  8.95s/it]
