# Phase 2: Soil Moisture Prediction 

### Step 1: Prepare Training Data

    We’ll include all features to maximize model accuracy:
        
##### 1. Static Features (do not change over time):
            
    Soil properties: clay, silt, sand, ocd (organic carbon density), wv0010 (water content at saturation).
    Topography: DEM, slope, aspect.
    Dynamic Features (vary monthly):

##### 2. Weather: CHIRPS (rainfall), ERA5 (evaporation).
    
    Vegetation: NDVI.

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rasterio
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from pathlib import Path
import yaml
import xarray as xr
import rioxarray as rxr
import geopandas as gpd
import earthpy.plot as ep
from sklearn.preprocessing import MinMaxScaler
import json

# Set seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

In [4]:
# -----------------------------------------------------------------------------
# Load config.yml
# -----------------------------------------------------------------------------

# Get project root (adjust based on your folder depth)
current_dir = Path(os.getcwd())
project_root = current_dir.parent
with open(project_root / "config.yml", "r") as f:
    config = yaml.safe_load(f)

# -----------------------------------------------------------------------------
# Construct paths
# -----------------------------------------------------------------------------
processed_dir = project_root / Path(config['paths']['processed_data'])
soil_dir = processed_dir / "GIS/Soil" # Soil data directory: clay, sand, silt, ocd, wv0110
dem_path = processed_dir / "GIS/Topography/tadla_dem_10m.tif" 
slope_path = processed_dir / "GIS/Topography/tadla_slope.tif"
aspect_path = processed_dir / "GIS/Topography/tadla_aspect.tif"
rainfall_dir = processed_dir / "Weather/CHIRPS_Annual" # Rainfall data directory: chirps from 2017 to 2023, 1 file per year with 12 bands
evapotranspiration_dir = processed_dir / "Weather/ERA5_Annual" # Evapotranspiration data directory: era5 from 2017 to 2023, 1 file per year with 12 bands
boundaries_dir = processed_dir / "GIS/Study_Area_Boundary" 
ndvi_dir = processed_dir / "GIS/Land_Use" # NDVI data directory: ndvi from 2017 to 2023, 1 file per year with 12 bands

In [5]:
# Load static features (soil + topography)
static_data = {
    "clay": rxr.open_rasterio(Path(soil_dir / "tadla_clay_10m.tif")).squeeze(),
    "silt": rxr.open_rasterio(Path(soil_dir / "tadla_silt_10m.tif")).squeeze(),
    "sand": rxr.open_rasterio(Path(soil_dir / "tadla_sand_10m.tif")).squeeze(),
    "ocd": rxr.open_rasterio(Path(soil_dir / "tadla_ocd_10m.tif")).squeeze(),
    "wv0010": rxr.open_rasterio(Path(soil_dir / "tadla_wv0010_10m.tif")).squeeze(),
    "dem": rxr.open_rasterio(Path(dem_path)).squeeze(),
    "slope": rxr.open_rasterio(Path(slope_path)).squeeze(),
    "aspect": rxr.open_rasterio(Path(aspect_path)).squeeze(),
}