# 🌧️ Green & Ampt Infiltration Processor

The FLO-2D model includes the Green-Ampt infiltration method as one of its core infiltration engines. This approach is used in hydrologic modeling because it effectively captures transmission losses as water infiltrates into the soil.

In the FLO-2D model, rainfall is distributed to the computational grid, where it infiltrates into the soil until reaching the saturation depth or fill volume. This continuous process accounts for the dynamic movement of water through the watershed as flood routing progresses.



In [1]:
# Automatically set base path to the project directory where the notebook is running
from pathlib import Path

# This gets the directory where the current notebook is located
base_path = Path.cwd()

print(f"📂 Base path automatically set to: {base_path}")

📂 Base path automatically set to: c:\Users\Karen\VS Code Projects\ASFPM-LLM-Data-Management-Workshop


# 🌧️ Find the fields 

In [None]:
"""
Print the fields of vector files in the project directory. This script prints the 
fields of shapefiles and GeoPackage files in the specified directory.  The resulting 
list can be used to fill the fields list in the next processor which will write a 
raster file for each field.
"""

import os
from osgeo import ogr, gdal
import shapefile  # pyshp
from pathlib import Path
import warnings

# Suppress GDAL exception warnings
ogr.UseExceptions()
gdal.UseExceptions()


# File paths
soil_shapefile = base_path / 'Data' / 'Infiltration' / 'Soil 2023.shp'
landuse_shapefile = base_path / 'Data' / 'Infiltration' / 'Land Use 2018.shp'
grid_layer_path = base_path / 'Data' / 'GeoPackage' / 'selfhelp.gpkg'

def print_fields(filepath):
    if filepath.suffix == '.shp':
        # Use pyshp for shapefiles
        sf = shapefile.Reader(str(filepath))
        fields = [f[0] for f in sf.fields[1:]]  # Skip deletion flag
        sf.close()
    elif filepath.suffix == '.gpkg':
        # Use ogr for GeoPackage
        ds = ogr.Open(str(filepath))
        layer = ds.GetLayer()
        fields = [field.name for field in layer.schema]
        ds = None
    else:
        print(f"⚠️ Unsupported file type: {filepath}")
        return

    # Print the fields
    print(f"\nFields in {filepath.name}:")
    for field in fields:
        print(f" - {field}")

# Print the fields for each vector file
print_fields(soil_shapefile)
print_fields(landuse_shapefile)
print_fields(grid_layer_path)


# 🌧️ Rasterize the data

In [None]:
import os
from osgeo import gdal, ogr
import shapefile  # pyshp
import tempfile

# Define input paths
soil_shapefile = base_path / 'Data' / 'Infiltration' / 'Soil 2023.shp'
landuse_shapefile = base_path / 'Data' / 'Infiltration' / 'Land Use 2018.shp'
grid_layer_path = base_path / 'Data' / 'GeoPackage' / 'selfhelp.gpkg'
output_folder = base_path / 'Data' / 'Infiltration' / 'Rasters'

# Fields to rasterize.  Make sure these fields are in the results printed by the previous cell.
soil_fields = ['hydc', 'soil_depth', 'psif', 'dthetad', 'dthetan', 'dthetaw']
landuse_fields = ['IA', 'RTIMP', 'VC', 'InitSat']

# EPSG code for NAD83 Central AZ
target_epsg = 2223

import shapefile
import tempfile
import os

def preprocess_sat_field(shapefile_path, field_name="InitSat"):
    """
    Preprocess the InitSat field in the shapefile to convert categorical values to numeric.  
    Converts:
    - "wet" to 0
    - "normal" to 1
    - "dry" to 2
    - Anything else to -1 (unknown)
    """
    # Load the shapefile
    reader = shapefile.Reader(shapefile_path)
    fields = reader.fields[1:]  # skip DeletionFlag
    field_names = [f[0] for f in fields]

    if field_name not in field_names:
        raise ValueError(f"Field '{field_name}' not found in {shapefile_path}")

    # Find the index of the InitSat field
    sat_index = field_names.index(field_name)

    # Create a temporary directory for the processed shapefile
    temp_dir = tempfile.mkdtemp()
    print(f"Temp directory for Sat processing: {temp_dir}")
    temp_shp = os.path.join(temp_dir, "processed.shp")

    # Create the new shapefile with the updated InitSat field
    writer = shapefile.Writer(temp_shp)
    for field in fields:
        if field[0] == field_name:
            writer.field(field_name, 'N', decimal=0)
        else:
            writer.field(*field)

    # Process each record
    record_count = 0
    for sr in reader.shapeRecords():
        rec = list(sr.record)
        val = str(rec[sat_index]).strip()
        if val == "wet":
            rec[sat_index] = 0
        elif val == "normal":
            rec[sat_index] = 1
        elif val == "dry":
            rec[sat_index] = 2
        else:
            rec[sat_index] = -1  # unknown category

        writer.shape(sr.shape)
        writer.record(*rec)
        record_count += 1

    writer.close()
    print(f"Processed {record_count} records to {temp_shp}")
    return temp_shp


# Rasterization properties calculated from the grid layer.
def rasterize_field(input_vector, field, reference_layer, output_path):
    """"
    Rasterizes a field from a vector file to a raster file using the specified reference layer.
    """

    grid_ds = gdal.OpenEx(reference_layer, gdal.OF_VECTOR)
    grid_layer = grid_ds.GetLayer()
    xmin, xmax, ymin, ymax = grid_layer.GetExtent()
    xres = 30
    yres = 30
    cols = int((xmax - xmin) / xres)
    rows = int((ymax - ymin) / yres)

    driver = gdal.GetDriverByName('GTiff')
    out_raster = driver.Create(output_path, cols, rows, 1, gdal.GDT_Float32)
    out_raster.SetGeoTransform((xmin, xres, 0, ymax, 0, -yres))

    srs = ogr.osr.SpatialReference()
    srs.ImportFromEPSG(target_epsg)
    out_raster.SetProjection(srs.ExportToWkt())

    vector_ds = ogr.Open(input_vector)
    vector_layer = vector_ds.GetLayer()

    gdal.RasterizeLayer(out_raster, [1], vector_layer, options=[
        f"ATTRIBUTE={field}",
        "ALL_TOUCHED=TRUE"
    ])
    print(f"Raster saved: {output_path}")

# Make sure output directory exists
os.makedirs(output_folder, exist_ok=True)

# Rasterize Soil fields
for field in soil_fields:
    out_path = os.path.join(output_folder, f"{field}.tif")
    rasterize_field(soil_shapefile, field, grid_layer_path, out_path)

# Rasterize Land Use fields
for field in landuse_fields:
    input_path = landuse_shapefile

    # Preprocess InitSat specifically
    if field == "InitSat":
        print(f"Preprocessing {field} for numeric conversion...")
        # Corrected shapefile path
        input_path = preprocess_sat_field(str(landuse_shapefile), field)

    out_path = os.path.join(output_folder, f"{field}.tif")
    print(f"Rasterizing {field} from {input_path} to {out_path}")
    rasterize_field(input_path, field, grid_layer_path, out_path)



# 🌧️ Process GeoPackage

In [9]:
"""
Grid Centroid Raster Sampling Script
-----------------------------------

This script extracts centroids from a grid polygon layer in a GeoPackage and samples 
values from multiple rasters at those centroid locations. The results are stored in 
a new table within the GeoPackage.

Functionality:
1. Extracts centroids from polygons in the 'grid' layer
2. Handles coordinate system transformations between grid and rasters
3. Samples each raster at each centroid location
4. Creates a new table with FID and sampled values from each raster

Requirements:
- GDAL/OGR
- Pandas
- NumPy
- SQLite3
- Pathlib

Usage:
1. Set the base_path variable to point to your project directory
2. Ensure your GeoPackage has a 'grid' polygon layer
3. Place your raster files in the specified rasters directory
4. Run the script to create a 'green_ampt_sampled' table in your GeoPackage
"""

import os
import sqlite3
import pandas as pd
from osgeo import gdal, ogr, osr
import numpy as np
from pathlib import Path

# Define base path - update this to your project directory if necessary
# base_path = Path(r"C:\Users\Karen\VS Code Projects\ASFPM-LLM-Data-Management-Workshop")

# Define paths using pathlib
gpkg_path = base_path / 'Data' / 'GeoPackage' / 'selfhelp.gpkg'
rasters_dir = base_path / 'Data' / 'Infiltration' / 'Rasters'

# Connect to the geopackage
conn = sqlite3.connect(gpkg_path)

# Use GDAL to access the grid layer and extract centroids
driver = ogr.GetDriverByName('GPKG')
data_source = driver.Open(str(gpkg_path), 0)  # 0 = read-only

if not data_source:
    raise Exception(f"Could not open {gpkg_path}")

layer = data_source.GetLayerByName('grid')
if not layer:
    raise Exception("Could not access 'grid' layer")

# Get the spatial reference of the grid layer
grid_srs = layer.GetSpatialRef()

# Get the layer definition to check available fields
layer_defn = layer.GetLayerDefn()
field_count = layer_defn.GetFieldCount()
field_names = []
for i in range(field_count):
    field_names.append(layer_defn.GetFieldDefn(i).GetName())

# Try to find the FID field (case-insensitive search)
fid_field = None
for field in field_names:
    if field.lower() == 'fid':
        fid_field = field
        break

if not fid_field:
    print("WARNING: Could not find a field named 'fid'. Using feature ID instead.")

# Extract centroids from grid polygons
fid_list = []
x_list = []
y_list = []

for feature in layer:
    # Use the correct FID field if found, otherwise use feature ID
    if fid_field:
        fid = feature.GetField(fid_field)
    else:
        fid = feature.GetFID()  # Use feature ID as fallback
    
    geom = feature.GetGeometryRef()
    if geom:
        centroid = geom.Centroid()
        fid_list.append(fid)
        x_list.append(centroid.GetX())
        y_list.append(centroid.GetY())

# Create dataframe with centroids
centroids_df = pd.DataFrame({
    'fid': fid_list,
    'centroid_x': x_list,
    'centroid_y': y_list
})
print(f"Extracted {len(centroids_df)} centroids from grid layer")

# Release the data source
data_source = None

# Define coordinate transformation function
def transform_point(x, y, source_srs, target_srs):
    """Transform coordinates from source to target spatial reference system"""
    if source_srs.IsSame(target_srs):
        return x, y
    
    transform = osr.CoordinateTransformation(source_srs, target_srs)
    point = ogr.CreateGeometryFromWkt(f"POINT ({x} {y})")
    point.Transform(transform)
    return point.GetX(), point.GetY()

# List all raster files
raster_files = list(rasters_dir.glob('*.tif')) + list(rasters_dir.glob('*.tiff')) + \
               list(rasters_dir.glob('*.img')) + list(rasters_dir.glob('*.asc'))

print(f"Found {len(raster_files)} raster files to process")

# Create a dictionary to store sampled values
sampled_values = {'fid': centroids_df['fid'].tolist()}

# Sample each raster at the centroid locations
for raster_file in raster_files:
    raster_name = raster_file.stem
    print(f"Processing {raster_name}...")
    
    # Open the raster
    raster = gdal.Open(str(raster_file))
    if raster is None:
        print(f"  Could not open {raster_file}, skipping")
        continue
    
    # Get raster spatial reference
    raster_srs = osr.SpatialReference()
    raster_srs.ImportFromWkt(raster.GetProjection())
    
    # Get raster geotransform and band
    gt = raster.GetGeoTransform()
    rb = raster.GetRasterBand(1)
    no_data_value = rb.GetNoDataValue()
    
    # Initialize list for this raster's values
    values = []
    
    # For each centroid, sample the raster
    for idx, row in centroids_df.iterrows():
        try:
            x, y = row['centroid_x'], row['centroid_y']
            
            # Skip if coordinates are None or NaN
            if x is None or y is None or pd.isna(x) or pd.isna(y):
                values.append(None)
                continue
            
            # Transform coordinates from grid CRS to raster CRS if needed
            if grid_srs and raster_srs and not grid_srs.IsSame(raster_srs):
                try:
                    x, y = transform_point(x, y, grid_srs, raster_srs)
                except Exception:
                    values.append(None)
                    continue
            
            # Convert from map coordinates to pixel coordinates
            px = int((x - gt[0]) / gt[1])
            py = int((y - gt[3]) / gt[5])
            
            # Check if pixel is within raster bounds
            if px >= 0 and px < raster.RasterXSize and py >= 0 and py < raster.RasterYSize:
                # Read the pixel value
                data = rb.ReadAsArray(px, py, 1, 1)
                if data is not None:
                    value = data[0][0]
                    # Check if the value is NoData
                    if no_data_value is not None and value == no_data_value:
                        values.append(None)
                    else:
                        values.append(float(value))
                else:
                    values.append(None)
            else:
                values.append(None)
        except Exception:
            values.append(None)
    
    # Add values to the dictionary
    sampled_values[raster_name] = values
    
    # Calculate some basic statistics
    non_null_values = [v for v in values if v is not None]
    print(f"  Sampled {len(non_null_values)} valid values out of {len(values)} points")
    
    # Close the raster
    raster = None

# Create a dataframe from the sampled values
result_df = pd.DataFrame(sampled_values)

# Replace NaN values with NULL for SQLite compatibility
result_df = result_df.replace({np.nan: None})

# Create the new table in the geopackage
result_df.to_sql('green_ampt_sampled', conn, if_exists='replace', index=False)

print("\nProcessing complete!")
print(f"Created 'green_ampt_sampled' table with {len(result_df)} rows and {len(result_df.columns)} columns")
print("Columns: " + ", ".join(result_df.columns))

# Close the connection with proper cleanup
def close_connection_with_cleanup(connection):
    # First, we disable WAL mode (Write-Ahead Logging) to help release the -wal and -shm files
    try:
        cursor = connection.cursor()
        cursor.execute("PRAGMA journal_mode=DELETE;")
        cursor.close()
    except Exception as e:
        print(f"Warning: Could not disable WAL mode: {e}")
    
    # Commit any pending transactions
    try:
        connection.commit()
    except Exception as e:
        print(f"Warning: Could not commit: {e}")
    
    # Close the connection
    try:
        connection.close()
    except Exception as e:
        print(f"Warning: Error closing connection: {e}")
    
    print("Database connection closed and cleanup attempted")

# Use the function to close the connection
close_connection_with_cleanup(conn)

# If you're using GDAL/OGR data sources, make sure those are closed too
# For example, if you have a data_source variable:
# data_source = None

Extracted 54306 centroids from grid layer
Found 10 raster files to process
Processing dthetad...
  Sampled 54306 valid values out of 54306 points
Processing dthetan...
  Sampled 54306 valid values out of 54306 points
Processing dthetaw...
  Sampled 54306 valid values out of 54306 points
Processing hydc...
  Sampled 54306 valid values out of 54306 points
Processing IA...
  Sampled 54306 valid values out of 54306 points
Processing InitSat...
  Sampled 54306 valid values out of 54306 points
Processing psif...
  Sampled 54306 valid values out of 54306 points
Processing RTIMP...
  Sampled 54306 valid values out of 54306 points
Processing soil_depth...
  Sampled 54306 valid values out of 54306 points
Processing VC...
  Sampled 54306 valid values out of 54306 points

Processing complete!
Created 'green_ampt_sampled' table with 54306 rows and 11 columns
Columns: fid, dthetad, dthetan, dthetaw, hydc, IA, InitSat, psif, RTIMP, soil_depth, VC
Database connection closed and cleanup attempted



# 🤖 Instructions for Students: Next Steps with VS Code Copilot

## Overview
You've successfully created a script that extracts centroids from a grid layer and samples multiple rasters at those locations. The next step is to join this sampled data with another functional table in your GeoPackage. VS Code Copilot can help you with this task.

## Task Instructions

### 1. Prepare Your Environment
- Make sure VS Code with GitHub Copilot extension is installed and enabled
- Have your notebook and the completed Grid Centroid Raster Sampling script open

### 2. Identify the Target Table
- First, determine which functional table in the GeoPackage you want to join with the 'green_ampt_sampled' table
- You'll need to know:
  - The table name
  - The key field to join on (likely 'fid')
  - Which fields you want to include in the final result

### 3. Ask Copilot to Create the Join Script
Here are some prompts you can use with VS Copilot:

```
"Create a Python script that joins the green_ampt_sampled table with the [YOUR_TABLE_NAME] table in my GeoPackage. The tables should be joined on the fid field."
```

```
"I need to create a new table in my GeoPackage by joining green_ampt_sampled with [YOUR_TABLE_NAME]. Show me how to do this using SQLite and GDAL/OGR."
```

```
"Write a SQL query that joins the green_ampt_sampled table with the [YOUR_TABLE_NAME] table and explain how to execute it using Python and SQLite."
```

### 4. Refine Your Request
If the initial response isn't quite what you need, try specifying more details:

```
"The query should include the following fields from [YOUR_TABLE_NAME]: [FIELD1, FIELD2, etc.] along with all fields from green_ampt_sampled."
```

```
"I want to save the joined data as a new table called [NEW_TABLE_NAME] in the same GeoPackage."
```

```
"Can you also add code to verify that the join worked correctly by counting the number of rows and showing the first few records?"
```

### 5. Example Implementation Template
You can ask Copilot to fill in the details for a template like this:

```python
# Join green_ampt_sampled with another table
import sqlite3
from pathlib import Path

# Define paths
base_path = Path(r"C:\Users\Karen\VS Code Projects\ASFPM-LLM-Data-Management-Workshop")
gpkg_path = base_path / 'Data' / 'GeoPackage' / 'selfhelp.gpkg'

# Connect to the geopackage
conn = sqlite3.connect(gpkg_path)
cursor = conn.cursor()

# Create join query
join_query = """
-- Ask Copilot to create this SQL join query
"""

# Execute query and create new table
# Ask Copilot to complete this section

# Verify the results
# Ask Copilot to add verification code

# Close connection
conn.close()

print("Join completed successfully!")
```

### 6. Additional Tasks You Can Ask Copilot For
- Creating a visualization of the joined data
- Adding calculated fields based on the raster values
- Exporting the results to other formats (CSV, shapefile, etc.)
- Creating a function that automates both the sampling and joining process
- Adding data validation to ensure the join produced correct results

## Tips for Working with Copilot
- Be specific about table and field names
- Break complex tasks into smaller, focused requests
- Ask Copilot to explain any code it generates that you don't understand
- Use iterative prompts to refine the solution
- If you're not getting what you need, try rephrasing or providing a small example

Good luck with your data integration task!