In [None]:
pip install rasterio numpy geopandas shapely geemap earthengine-api

In [None]:
import os
import zipfile
import numpy as np
import rasterio
import shutil
from rasterio.mask import mask
from rasterio.features import geometry_mask
from shapely.geometry import shape, mapping
import ee
import geemap
from datetime import datetime, timedelta

# Authenticate and initialize Earth Engine
service_account_key = '/kaggle/input/earth-engine-key/ee-drought-forecasting-d17f1e4a39f6.json'
credentials = ee.ServiceAccountCredentials('kaggle@ee-drought-forecasting.iam.gserviceaccount.com', service_account_key)
ee.Initialize(credentials)

# Load LSIB boundaries from Earth Engine
countries = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")

# Define the input paths for each country
COUNTRY_PATHS = {
    'Benin': '/kaggle/input/soil-moisture-benin-burkina-faso-gambia-guinea/GLDAS_Soil_Moisture/Benin',
    'Burkina Faso': '/kaggle/input/soil-moisture-benin-burkina-faso-gambia-guinea/GLDAS_Soil_Moisture/Burkina_Faso',
    'Gambia, The': '/kaggle/input/soil-moisture-benin-burkina-faso-gambia-guinea/GLDAS_Soil_Moisture/Gambia_The',
    'Guinea': '/kaggle/input/soil-moisture-benin-burkina-faso-gambia-guinea/GLDAS_Soil_Moisture/Guinea',
    'Chad': '/kaggle/input/soil-moisture-chad-cote-divoire/GLDAS_Soil_Moisture/Chad',
    'Cote d\'Ivoire': '/kaggle/input/soil-moisture-chad-cote-divoire/GLDAS_Soil_Moisture/Cote_dIvoire',
    'Ghana': '/kaggle/input/soil-moisture-ghana/GLDAS_Soil_Moisture/Ghana',
    'Sudan': '/kaggle/input/soil-moisture-sudan/GLDAS_Soil_Moisture/Sudan',
    'Niger': '/kaggle/input/soil-moisture-niger-sierra-leone/GLDAS_Soil_Moisture/Niger',
    'Sierra Leone': '/kaggle/input/soil-moisture-niger-sierra-leone/GLDAS_Soil_Moisture/Sierra_Leone',
    'Guinea-Bissau': '/kaggle/input/soil-moisture-nigeria-guinea-bissau/GLDAS_Soil_Moisture/Guinea-Bissau',
    'Nigeria': '/kaggle/input/soil-moisture-nigeria-guinea-bissau/GLDAS_Soil_Moisture/Nigeria',
    'Senegal': '/kaggle/input/soil-moisture-senegal/GLDAS_Soil_Moisture/Senegal',
    'Togo': '/kaggle/input/soil-moisture-togo/GLDAS_Soil_Moisture/Togo',
    'Liberia': '/kaggle/input/soil-moisture-liberia-mauritania/GLDAS_Soil_Moisture/Liberia',
    'Mauritania': '/kaggle/input/soil-moisture-liberia-mauritania/GLDAS_Soil_Moisture/Mauritania',
    'Mali': '/kaggle/input/soil-moisture-mali/GLDAS_Soil_Moisture/Mali'
}

def get_country_geometry(country_name):
    """Fetch country geometry from LSIB dataset."""
    try:
        country = countries.filter(ee.Filter.eq('country_na', country_name))
        if country.size().getInfo() == 0:
            print(f"No country found with name: {country_name}")
            return None
        
        geometry = country.geometry().getInfo()
        print(f"Geometry retrieved for {country_name}")
        return geometry
    except Exception as e:
        print(f"Error retrieving geometry for {country_name}: {str(e)}")
        return None

def save_masked_raster(raster_data, transform, crs, output_path, mask_geom):
    """Save masked raster with country mask."""
    mask_shape = raster_data.shape
    mask_array = geometry_mask([mask_geom], transform=transform, out_shape=mask_shape, invert=True)
    masked_raster = np.where(mask_array, raster_data, np.nan)
    
    with rasterio.open(
        output_path,
        'w',
        driver='GTiff',
        height=masked_raster.shape[0],
        width=masked_raster.shape[1],
        count=2,
        dtype=masked_raster.dtype,
        crs=crs,
        transform=transform,
        nodata=np.nan
    ) as dst:
        dst.write(masked_raster, 1)
        dst.write(mask_array.astype(np.uint8) * 255, 2)

def zip_folder(folder_path, output_zip_path):
    """Zip a folder and its contents."""
    with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, folder_path)
                zipf.write(file_path, arcname)

def process_country_images(country_name, output_folder, country_geom):
    """Process raster files for a given country."""
    print(f"\nProcessing images for {country_name}...")

    try:
        shapely_geom = shape(country_geom)
    except Exception as e:
        print(f"Error processing geometry for {country_name}: {str(e)}")
        return

    input_folder = COUNTRY_PATHS.get(country_name)
    if not input_folder or not os.path.exists(input_folder):
        print(f"Error: Directory not found for country: {country_name}")
        return

    # Create output folder - special handling for Cote d'Ivoire
    if country_name == "Cote d'Ivoire":
        country_output_folder = os.path.join(output_folder, "Cote_dIvoire")
    else:
        country_output_folder = os.path.join(output_folder, country_name.replace(' ', '_').replace(',', ''))
    os.makedirs(country_output_folder, exist_ok=True)

    # Process all .tif files
    lst_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".tif")])
    for filename in lst_files:
        file_path = os.path.join(input_folder, filename)
        try:
            with rasterio.open(file_path) as src:
                print(f"Processing {filename}")

                # Ensure CRS compatibility
                if src.crs:
                    geom_transformed = shapely_geom
                else:
                    geom_transformed = shapely_geom

                # Mask the raster
                masked_data, masked_transform = mask(
                    src, [mapping(geom_transformed)], crop=True, nodata=src.nodata
                )
                masked_data = masked_data[0]

                # Handle nodata values
                if src.nodata is not None:
                    masked_data = np.where(masked_data == src.nodata, np.nan, masked_data)

                # Save output
                output_raster_path = os.path.join(country_output_folder, filename)
                save_masked_raster(masked_data, masked_transform, src.crs, output_raster_path, shapely_geom)
                print(f"Processed and saved {filename}")

        except Exception as e:
            print(f"Error processing file {filename}: {str(e)}")

    # Zip the processed files - special handling for Cote d'Ivoire
    if country_name == "Cote d'Ivoire":
        zip_output_path = os.path.join(output_folder, "Cote_dIvoire.zip")
    else:
        zip_output_path = os.path.join(output_folder, f"{country_name.replace(' ', '_').replace(',', '')}.zip")
    zip_folder(country_output_folder, zip_output_path)
    print(f"Zipped {country_output_folder} to {zip_output_path}")
    
    # Clean up temporary files
    shutil.rmtree(country_output_folder)
    print(f"Deleted folder {country_output_folder} to free up space")

def process_all_countries(output_folder):
    """Process all countries in the COUNTRY_PATHS dictionary."""
    os.makedirs(output_folder, exist_ok=True)

    for country_name in COUNTRY_PATHS.keys():
        print(f"\nStarting processing for {country_name}...")
        
        # Get the geometry for the country
        country_geom = get_country_geometry(country_name)
        if not country_geom:
            print(f"Skipping {country_name} due to missing geometry.")
            continue

        process_country_images(country_name, output_folder, country_geom)

# Set output path and start processing
output_folder = "/kaggle/working/soil_moisture_masked"
process_all_countries(output_folder)