In [1]:
import os
import rasterio
import numpy as np
import geopandas as gpd
from rasterio import features
import pandas as pd
import subprocess
from joblib import Parallel, delayed

def get_decade_number(month, decade):
    """
    Convert year, month, and decade into a sequential decade number relative to a base year.
    """
    year_offset = 36
    month_offset = (month - 1) * 3
    return year_offset + month_offset + (decade - 1) + 1
def read_tif_values(tif_file_path, geometry):
    """
    Read values from a GeoTIFF file for a specific geometry.
    """
    with rasterio.open(tif_file_path) as src:
        mask = features.geometry_mask([geometry], out_shape=src.shape, transform=src.transform, invert=True)
        values = src.read(1, masked=True)[mask]
    return values

def find_growing_season(et_values):
    """
    Find the start and end of the growing season based on ET values.
    """
    peak_index = et_values.idxmax()
    before_peak = et_values[:peak_index].dropna()
    after_peak = et_values[peak_index:].dropna()

    start_index = None
    end_index = None

    if not before_peak.empty:
        before_diff = before_peak.diff()
        if not before_diff.empty and not before_diff.isna().all():
            start_index = before_diff.idxmax()

    if not after_peak.empty:
        after_diff = after_peak.diff()
        if not after_diff.empty and not after_diff.isna().all():
            end_index = after_diff.idxmin()

    return start_index, end_index

def write_tif(output_path, data, model_ds, dtype=rasterio.float32):
    """
    Write data to a GeoTIFF file.
    """
    with rasterio.open(
        output_path,
        'w',
        driver='GTiff',
        height=model_ds.height,
        width=model_ds.width,
        count=1,
        dtype=dtype,
        crs=model_ds.crs,
        transform=model_ds.transform,
    ) as dst:
        dst.write(data, 1)

def process_year(selected_year):
    """
    Process data for the selected year, starting from September of the previous year until May of the selected year.
    """
    print(f"Processing for year {selected_year}")

    # Load shapefile with segments
    gdf = gpd.read_file('/home/karim/WaPOR/data/algeria/segmentation/24tiles/V3/SNIC_30000_V2.shp')

    # Initialize arrays to store start and end decades for each segment
    start_decades = np.zeros(len(gdf), dtype='int')
    end_decades = np.zeros(len(gdf), dtype='int')

    # Load the model GeoTIFF to get its projection and resolution
    model_path = "/home/karim/WaPOR/data/algeria/segmentation/24tiles/V3/SNIC_30000_V2.tif"
    with rasterio.open(model_path) as model_ds:
        model_crs = model_ds.crs
        model_transform = model_ds.transform
        model_width = model_ds.width
        model_height = model_ds.height

        # Create empty arrays to store the results
        start_season_array = np.zeros((model_height, model_width), dtype='float32')
        end_season_array = np.zeros((model_height, model_width), dtype='float32')

    # List of TIFF files to be processed
    tif_directory = '/home/karim/WaPOR/data/algeria/outputs/AET_dekad/'
    tif_files = []
    tif_dates = []

    # Start from September of the previous year to May of the selected year
    for year, start_month, end_month in [(selected_year - 1, 9, 12), (selected_year, 1, 5)]:
        for month in range(start_month, end_month + 1):
            for decade in range(1, 4):
                decade_number = get_decade_number(month, decade)
                file_name = f"WAPOR-3.L3-AETI-D.MIT.{year}-{str(month).zfill(2)}-D{decade}.tif"
                file_path = os.path.join(tif_directory, file_name)
                if os.path.exists(file_path):
                    tif_files.append(file_path)
                    tif_dates.append(f"{year}-{str(month).zfill(2)}-D{decade}")

    print(tif_dates)
    for idx, row in gdf.iterrows():
        print(f"Processing segment {idx + 1} of {len(gdf)}")  # Progress print statement
        
        segment_et_values = []

        # Read ET values for each decade
        for tif_file in tif_files:
            values = read_tif_values(tif_file, row['geometry'])
            if len(values) > 0:
                segment_et_values.append(np.mean(values))  # Use the mean of the segment's values
            else:
                segment_et_values.append(np.nan)  # Handle cases where no values are found

        # Convert the list to a pandas Series for easier processing
        et_series = pd.Series(segment_et_values, index=tif_dates)

        # Calculate the start and end of the growing season
        start_idx, end_idx = find_growing_season(et_series)
        

        # Assign values to start and end decades, and also to the corresponding segment in the output arrays
        if start_idx is not None and not pd.isna(start_idx):
            year, month, decade = map(int, start_idx.split('-')[0:2] + [start_idx.split('-')[2][1]])
            start_decades[idx] = get_decade_number(month, decade)
        else:
            start_decades[idx] = 0  # Or handle it differently

        if end_idx is not None and not pd.isna(end_idx):
            year, month, decade = map(int, end_idx.split('-')[0:2] + [end_idx.split('-')[2][1]])
            end_decades[idx] = get_decade_number(month, decade)
        else:
            end_decades[idx] = 0  # Or handle it differently

        
        # Write the results to the final arrays
        geom = row['geometry']
        mask = features.geometry_mask([geom], out_shape=(model_height, model_width), transform=model_transform, invert=True)

        start_season_array[mask] = start_decades[idx]
        end_season_array[mask] = end_decades[idx]

        print ('here i have ',start_decades[idx],' ',end_decades[idx])

    # Write the start and end season arrays to GeoTIFF files
    output_start_tif = f'/home/karim/WaPOR/data/algeria/outputs/start_of_season_{selected_year}.tif'
    output_end_tif = f'/home/karim/WaPOR/data/algeria/outputs/end_of_season_{selected_year}.tif'

    write_tif(output_start_tif, start_season_array, model_ds)
    write_tif(output_end_tif, end_season_array, model_ds)

    print(f"Processing complete for year {selected_year}! Start and end season TIFF files have been created.")

# Example usage
process_year(2019)  # Process for the year 2019 (September 2018 to May 2019)
#process_year(2020)  # Process for the year 2020 (September 2019 to May 2020)

#num_cores = 5
#deks=[2019, 2020, 2021, 2022, 2023]
#Parallel(n_jobs=num_cores)(delayed(process_year)(yy) for yy in deks)

Processing for year 2019
['2018-09-D1', '2018-09-D2', '2018-09-D3', '2018-10-D1', '2018-10-D2', '2018-10-D3', '2018-11-D1', '2018-11-D2', '2018-11-D3', '2018-12-D1', '2018-12-D2', '2018-12-D3', '2019-01-D1', '2019-01-D2', '2019-01-D3', '2019-02-D1', '2019-02-D2', '2019-02-D3', '2019-03-D1', '2019-03-D2', '2019-03-D3', '2019-04-D1', '2019-04-D2', '2019-04-D3', '2019-05-D1', '2019-05-D2', '2019-05-D3']
Processing segment 1 of 32903
here i have  0   61
Processing segment 2 of 32903
here i have  0   61
Processing segment 3 of 32903
here i have  0   61
Processing segment 4 of 32903
here i have  0   61
Processing segment 5 of 32903
here i have  0   61
Processing segment 6 of 32903
here i have  0   61
Processing segment 7 of 32903
here i have  0   61
Processing segment 8 of 32903
here i have  0   61
Processing segment 9 of 32903
here i have  0   61
Processing segment 10 of 32903
here i have  0   61
Processing segment 11 of 32903
here i have  0   61
Processing segment 12 of 32903
here i have  

KeyboardInterrupt: 