# Phase 2: Soil Moisture Prediction with CNN and Remote Sensing Data

### Step 1: Setup Environment

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rasterio
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from pathlib import Path
import yaml
import xarray as xr
import geopandas as gpd
import earthpy.plot as ep
from sklearn.preprocessing import MinMaxScaler
import json

# Set seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

### Step 2: Load Processed Data

In [None]:
# -----------------------------------------------------------------------------
# Load config.yml
# -----------------------------------------------------------------------------

# Get project root (adjust based on your folder depth)
current_dir = Path(os.getcwd())
project_root = current_dir.parent
with open(project_root / "config.yml", "r") as f:
    config = yaml.safe_load(f)

# -----------------------------------------------------------------------------
# Construct paths
# -----------------------------------------------------------------------------
processed_dir = project_root / Path(config['paths']['processed_data'])
soil_dir = processed_dir / "GIS/Soil" # Soil data directory: clay, sand, silt, ocd, wv0110
dem_path = processed_dir / "GIS/Topography/tadla_dem_10m.tif" 
slope_path = processed_dir / "GIS/Topography/tadla_slope.tif"
aspect_path = processed_dir / "GIS/Topography/tadla_aspect.tif"
rainfall_dir = processed_dir / "Weather/CHIRPS_Annual" # Rainfall data directory: chirps from 2017 to 2023, 1 file per year with 12 bands
evapotranspiration_dir = processed_dir / "Weather/ERA5_Annual" # Evapotranspiration data directory: era5 from 2017 to 2023, 1 file per year with 12 bands
boundaries_dir = processed_dir / "GIS/Study_Area_Boundary" 
ndvi_dir = processed_dir / "GIS/Land_Use" # NDVI data directory: ndvi from 2017 to 2023, 1 file per year with 12 bands

In [None]:
def load_raster(path, band=None):
    """Load raster data with validation"""
    try:
        with rasterio.open(path) as src:
            if band is not None:
                data = src.read(band)
                if src.count < band:
                    raise ValueError(f"Band {band} not found in {path.name}")
            else:
                data = src.read()
            return np.squeeze(data)  # Remove singleton dimensions
    except FileNotFoundError:
        raise FileNotFoundError(f"Critical file missing: {path}") from None
    except Exception as e:
        raise RuntimeError(f"Error loading {path}: {str(e)}") from e


In [None]:
# Load static features
print("Loading static features...")
dem = load_raster(dem_path, 1)        # Elevation [m]
slope = load_raster(slope_path, 1)    # Slope [degrees]
aspect = load_raster(aspect_path, 1)  # Aspect [degrees]

In [None]:
# Load soil properties (all 10m resolution)
print("\nLoading soil properties...")
soil_layers = {
    'clay': load_raster(soil_dir / "tadla_clay_10m.tif", 1),    # [%]
    'silt': load_raster(soil_dir / "tadla_silt_10m.tif", 1),     # [%]
    'sand': load_raster(soil_dir / "tadla_sand_10m.tif", 1),     # [%]
    'ocd': load_raster(soil_dir / "tadla_ocd_10m.tif", 1),       # [g/kg]
    'wv0010': load_raster(soil_dir / "tadla_wv0010_10m.tif", 1)  # [m³/m³]
}

### Step 3: Preprocess Data for CNN