### Functions for stepFunction File

In [None]:
# Import libraries
import os
import sys

import numpy as np
import pandas as pd
import geopandas as gpd

from osgeo import gdal

import rasterio
from rasterio.mask import mask
from rasterio.features import geometry_mask

from shapely.geometry import mapping


import matplotlib.pyplot as plt

In [None]:

def mask_raster_by_geometries(shapefile_path, tiff_path, output_dir, year, visualize=False):
    """
    Mask a raster by each geometry in a shapefile and save the output rasters using the 'unitcode' column as filenames.

    This function processes a raster file by applying masks defined by the geometries in a given shapefile.
    The resulting masked rasters are saved to disk with filenames based on the 'unitcode' attribute from the shapefile.

    Parameters:
    ----------
    shapefile_path : str
        Path to the input shapefile (.shp) containing the geometries.
    tiff_path : str
        Path to the input raster file (.tif) to be masked.
    output_dir : str
        Directory where the output masked raster files will be saved.
    year : int
        The year associated with the output directory structure.
    visualize : bool, optional
        If True, each masked raster is displayed as an image for visual inspection. Defaults to False.

    Returns:
    -------
    None
        The function saves masked raster files directly to the specified output directory.
    """

    # Load the shapefile as a GeoDataFrame
    gdf = gpd.read_file(shapefile_path)
    raster_basename = os.path.splitext(os.path.basename(tiff_path))[0]
    
    # Open the raster file
    with rasterio.open(tiff_path) as src:
        print("Masking {} with {}".format(os.path.basename(tiff_path), os.path.basename(shapefile_path)))
        
        # Iterate through each geometry in the GeoDataFrame
        for index, row in gdf.iterrows():
            geom = row.geometry
            unitcode = int(row['unit_code'])

            # Mask the raster using the current geometry
            out_image, out_transform = mask(src, [geom], crop=True)

            # Copy and update the metadata for the output raster
            out_meta = src.meta.copy()
            out_meta.update({
                "driver": "GTiff",
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform
            })

            # Define the output path using the 'unitcode' column
            output_tiff_path = "{}/{}/{}/{}.tiff".format(output_dir, year, raster_basename, unitcode)
            
            # Create the output directory if it doesn't exist
            output_directory = os.path.dirname(output_tiff_path)
            if not os.path.exists(output_directory):
                os.makedirs(output_directory)

            # Save the masked raster to a new file
            with rasterio.open(output_tiff_path, "w", **out_meta) as dest:
                dest.write(out_image)

            # Optionally visualize the masked raster
            if visualize:
                plt.imshow(out_image[0], cmap="gray")
                plt.title(f"Masked Raster for {unitcode}")
                plt.colorbar()
                plt.show()


In [None]:
# Create growing months Functions
    
def createList(r1, r2):
    """
    Create a list of months between the given start (r1) and end (r2) months inclusive.
    
    Parameters:
    r1 (int): The starting month (e.g., 1 for January, 2 for February).
    r2 (int): The ending month.
    
    Returns:
    np.ndarray: An array of months between r1 and r2 inclusive.
        """
    # Create a list of the months betweeen planting and maturity
    return np.arange(r1, r2+1)

def getListcom(p, m):

    """
    Create a list of growing months considering whether planting and maturity 
    months are in the same year.

    Parameters:
    p (int): The planting month.
    m (int): The maturity month.

    Returns:
    np.ndarray: An array of months representing the growing period.
    """
    # check if planting and maturity are in the same year
    if np.any(p < m):
      return createList(p, m)
    else:
      return np.concatenate((createList(p, 12), createList(1, m)))



def growingMonths(ranked_df_ir_rank):
    """
    Create a list of growing months based on planting and maturity months.

    Parameters:
    ranked_df_ir_rank (pd.DataFrame): A DataFrame containing at least two columns, 
                                      'Planting_Month' and 'Maturity_Month'.

    Returns:
    np.ndarray: An array of months representing the growing period.
    """

        
  # Get the growing months 
  p = ranked_df_ir_rank.Planting_Month.tolist()[0]
  m = ranked_df_ir_rank.Maturity_Month.tolist()[0]
  
  return getListcom(p, m)




In [None]:
def getpath2croptiff(crop_name, root_folder, allcrops, year, unit_code):
    """
    Get the file path to the TIFF file for a given crop.

    Parameters:
    crop_name (str): The name of the crop.
    root_folder (str): The root folder where the crop data is stored.
    allcrops (dict): A dictionary where keys are crop names or identifiers 
                     and values are corresponding folder names.
    year (str): The year identifier for the file path.
    unit_code (str): The unit code or identifier for the specific geographic unit.

    Returns:
    str: The file path to the corresponding TIFF file if found, otherwise None.
    """
    tiff_file = None
    for k in allcrops.keys():
        if k in crop_name:
            tiff_file = os.path.join("../{}".format(root_folder), 
                                     "{}_{}".format(allcrops[k], year), 
                                     "{}.tif".format(unit_code))
            break
    return tiff_file

def gdfmask(gdf, raster_path, all_touched=True):
    """
    Generate raster masks for each geometry in a GeoDataFrame based on a given raster file.

    Parameters:
    gdf (geopandas.GeoDataFrame): The GeoDataFrame containing the geometries to mask.
    raster_path (str): The file path to the TIFF raster file.
    all_touched (bool): If True, all pixels touched by geometries will be included in the mask. 
                        If False, only pixels whose center is within the geometry or that are 
                        selected by Bresenham's line algorithm will be included. Default is True.

    Returns:
    dict: A dictionary where keys are GeoDataFrame indices and values are NumPy arrays 
          representing the raster masks for each geometry.
    """
    with rasterio.open(raster_path) as src:
        raster_shape = src.read(1).shape  # Get the shape of the raster data
        raster_crs = src.crs  # Get the coordinate reference system (CRS) of the raster
        raster_transform = src.transform  # Get the affine transformation of the raster

    # Reproject the GeoDataFrame to match the CRS of the raster
    gdf0 = gdf.to_crs(src.crs)
    
    # Initialize a dictionary to store masks for each geometry in the GeoDataFrame
    gdf_masks = {}
    for index, row in gdf0.iterrows():
        # Generate a raster mask for the current geometry
        gdf_masks[index] = geometry_mask([mapping(row['geometry'])],
                                         out_shape=raster_shape,
                                         transform=raster_transform,
                                         all_touched=all_touched, 
                                         invert=True)
    return gdf_masks

In [None]:
# Create crop rank functions
def rank_crops(df_area):
    """
    Ranks crops based on their growing area in descending order.

    Parameters:
    df_area (pd.DataFrame): A DataFrame containing crop data with a 'Growing_area' column.

    Returns:
    pd.DataFrame: A DataFrame with crops ranked by their growing area, with ranks set as the index.
    """
    # Sort the DataFrame by the 'Growing_area' column in descending order
    df_area = df_area.sort_values('Growing_area', ascending=False)
    
    # Assign ranks based on the sorted growing area
    df_area['rank'] = range(1, len(df_area) + 1)
    
    # Return the DataFrame with 'rank' as the index
    return df_area.set_index('rank')

def custom_rank(sub_df):
    """
    Ranks crops within a subset of data based on their type and growing area.

    Parameters:
    sub_df (pd.DataFrame): A DataFrame containing a subset of crop data with 'Type' and 'Growing_area' columns.

    Returns:
    pd.DataFrame: A DataFrame with crops ranked by 'Type' and then by 'Growing_area' within each type.
    """
    # Define the type order for ranking
    type_order = {'Perennial': 1, 'Other Perennial': 2, 'Fodder': 3, 'Annual': 4, 'Other Annual': 5}

    sub_df = sub_df.copy()  # Create a copy to avoid SettingWithCopyWarning
    
    # Redirect standard error to suppress warnings
    sys.stderr = open('nul', 'w')
    
    # Map the 'Type' column to the defined type order and rank the crops
    sub_df['Type_Rank'] = sub_df['Type'].map(type_order)
    sub_df = sub_df.sort_values(by=['Type_Rank', 'Growing_area'], ascending=[True, False])
    
    # Assign ranks based on the sorted order
    sub_df['rank'] = range(1, len(sub_df) + 1)
    
    # Reset standard error
    sys.stderr = sys.__stderr__
    
    return sub_df

def rankbyarea(CropCal_dir = "../data/CropCalender", irrigated_rainfed = "ir", year="00",
               by_state = False, options=('ir', 'rf')):
    """
    Ranks crops by growing area, either for the entire dataset or by individual states.

    Parameters:
    CropCal_dir (str): The directory path containing crop calendar files.
    irrigated_rainfed (str): Specifies whether the data is for irrigated or rainfed crops. Default is "ir".
    year (str): The year identifier for the file name. Default is "00".
    by_state (bool): If True, ranks crops by individual states; if False, ranks them for the entire dataset.
    options (tuple): A tuple of valid options for 'irrigated_rainfed'. Default is ('ir', 'rf').

    Returns:
    pd.DataFrame or None: A DataFrame with ranked crops or None if the file does not exist.
    """
    if irrigated_rainfed not in options:
        raise ValueError(f"Invalid value: {irrigated_rainfed}. Allowed values: {options}")

    # Construct the file path
    path_ir = f"{CropCal_dir}/20{year}_{irrigated_rainfed}.csv"

    # Check if the file exists
    if not os.path.isfile(path_ir):
        return None

    if not by_state:
        # Read the file and rank crops for the entire dataset
        cCropCal_ir = pd.read_csv(path_ir)
        ranked_df_ir = cCropCal_ir.sort_values(['unit_code', 'Crop']) \
                                  .groupby('unit_code') \
                                  .apply(rank_crops) \
                                  .reset_index(level=0, drop=True) \
                                  .reset_index() \
                                  .sort_values(['unit_code', 'rank'], ignore_index=True)
    else:
        # Rank crops by individual states
        df_rainfed = pd.read_csv(path_ir)
        ranked_df_ir = pd.DataFrame()  # Initialize an empty DataFrame

        # Iterate over unique unit_codes (states) and rank crops for each state
        for unit_code in df_rainfed['unit_code'].unique():
            state_subset_ir = df_rainfed[df_rainfed['unit_code'] == unit_code]
            ranked_subset_ir = custom_rank(state_subset_ir)
            ranked_df_ir = pd.concat([ranked_df_ir, ranked_subset_ir], ignore_index=True)

        # Sort the final DataFrame by 'unit_code' and 'rank'
        ranked_df_ir = ranked_df_ir.sort_values(by=['unit_code', 'rank']).reset_index(drop=True)

    return ranked_df_ir


In [None]:


def calculate_AD_area(step, dist_array, ranked_df_rank, Annual=None):
    """
    Calculate the area to be distributed (AD_area) for different steps in the MIRCA model.

    Parameters:
    step (int): The current step in the MIRCA model.
    dist_array (dict): A dictionary containing distribution arrays for different factors 
                       like AEI (Available Energy Input), ARA (Arable Area), and CLE (Climatic Limiting Effects).
    ranked_df_rank (pd.DataFrame): The ranked DataFrame containing information about the crops.
    Annual (list, optional): A list of annual crops. Used in step 5 to determine potential area.

    Returns:
    np.ndarray: The calculated AD_area array, representing the area to be distributed.
    """
    if step in [1]:
        # Calculate AD_area for step 1
        AD_area = np.where(dist_array["dist_array_ara"] > 0,
                           (AH_array * dist_array["dist_array_aei"]) / dist_array["dist_array_ara"], 0)
        
        # Adjust AD_area if more than one crop
        if ranked_df_rank.Subcrop.tolist()[0] > 1:
            AD_area = AD_area / ranked_df_rank.Subcrop.tolist()[0] 

    elif step in [5]:
        # Calculate AD_area for step 5, considering annual crops
        if ranked_df_rank.Crop.tolist()[0] in Annual:
            Pot_ATS5 = dist_array["dist_array_cle"]
        else:
            Pot_ATS5 = np.where(dist_array["dist_array_cle"] > dist_array["dist_array_aei"], 
                                dist_array["dist_array_cle"], 0)
        with np.errstate(divide='ignore'):
            ATS_5 = ranked_df_rank.Growing_area.tolist()[0]
            AD_ra5 = np.where(ATS_5 < np.sum(Pot_ATS5), (ATS_5 / np.sum(Pot_ATS5)), 1)
            AD_area = Pot_ATS5 * AD_ra5

    return AD_area


def adjust_AD_area(step, dist_array, df_tot, AH_array, AD_area, ATS_2, 
                   listcom, r_s, c_s, ranked_df_rf=None, Annual=None):
    """
    Adjust the area to be distributed (AD_area) for steps in the MIRCA model.

    Parameters:
    step (int): The current step in the MIRCA model.
    dist_array (dict): A dictionary containing distribution arrays for different factors 
                       like AEI (Available Energy Input), ARA (Arable Area), and CLE (Climatic Limiting Effects).
    df_tot (pd.DataFrame): A DataFrame containing the total area for each cell and month.
    AH_array (np.ndarray): An array representing the harvested area.
    AD_area (np.ndarray): The initial area to be distributed.
    ATS_2 (float): The total area to be distributed in the current step.
    listcom (list): A list of months during which the crop grows.
    r_s (int): Number of rows in the spatial grid.
    c_s (int): Number of columns in the spatial grid.
    ranked_df_rf (pd.DataFrame, optional): The ranked DataFrame for rainfed crops.
    Annual (list, optional): A list of annual crops. Used in step 6 and 7.

    Returns:
    np.ndarray: The adjusted AD_area array, representing the area to be distributed after adjustments.
    """
    if step in [2, 3, 4, 6, 7]:
        # Filter the total DataFrame for months in which crops grow
        df_max_filt = df_tot[df_tot["Month"].isin(listcom)]
        
        # Group the cell values
        n = len(df_max_filt) // (r_s * c_s)
        
        # Select the maximum total area assigned for the month in which this crop grows
        df_max = df_max_filt.groupby(np.arange(len(df_max_filt)) // n).max()
        
        # Change it to an array
        max_step2 = np.array(df_max["Total_st1"]).reshape(r_s, c_s)
        
        # Determine the free AEI cell
        AEI_fr_ce_2 = np.where(dist_array["dist_array_aei"] > max_step2, 
                               (dist_array["dist_array_aei"] - max_step2), 0)

    if step in [6, 7]:
        CLE_free_6 = np.where(dist_array["dist_array_cle"] > max_step2, 
                              (dist_array["dist_array_cle"] - max_step2), 0)
        area_95_ = 0.95 * dist_array["dist_array_ara"]
        free_area_95_6 = np.where(area_95_ > max_step2, (area_95_ - max_step2), 0)

    if step in [2]:
        # Determine the AH_free_cell
        AH_fr_ce_2 = np.maximum(AH_array - AD_area, 0)
        
        # Potential area to be distributed in this step
        Pot_ATS2 = np.minimum(AEI_fr_ce_2, AH_fr_ce_2)

    elif step in [3]:
        Pot_ATS2 = np.where(dist_array["dist_array_cle"] > 0, AEI_fr_ce_2, 0)

    elif step in [4]:
        Pot_ATS2 = np.where(dist_array["dist_array_cle"] == 0, AEI_fr_ce_2, 0)

    elif step in [6]:
        if ranked_df_rf.Crop.tolist()[0] in Annual:
            Pot_ATS2 = np.where(dist_array["dist_array_cle"] > 0, AEI_fr_ce_2, 0)
        else:
            Pot_ATS2 = np.where(dist_array["dist_array_cle"] > dist_array["dist_array_aei"],
                                (free_area_95_6), 0)

    elif step in [7]:
        Pot_ATS2 = np.where(((dist_array["dist_array_cle"] > 0) | (dist_array["dist_array_aei"] > 0)),
                            (free_area_95_6), 0)

    # Determine the distribution ratio as the ratio of ATS and Pot_ATS
    with np.errstate(divide='ignore'):
        AD_ra2 = np.where(ATS_2 < np.sum(Pot_ATS2), (ATS_2 / np.sum(Pot_ATS2)), 1)

    # Exact area to be distributed at this step
    if ATS_2 > 0:
        AD_area2 = Pot_ATS2 * AD_ra2
    else:
        AD_area2 = Pot_ATS2 * 0 if step not in [6, 7] else (Pot_ATS2 * AD_ra2) * 0

    # Update AD_area with the distributed area
    AD_area = AD_area + AD_area2

    return AD_area


def distributeArea(step, ranked_df_ir_rank, c_s, r_s, listcom, arr1_re, df_mon):
    """
    Distribute area according to the crop calendar and update the array for the current step.

    Parameters:
    step (int): The current step in the MIRCA model.
    ranked_df_ir_rank (pd.DataFrame): The ranked DataFrame containing information about the crops.
    c_s (int): Number of columns in the spatial grid.
    r_s (int): Number of rows in the spatial grid.
    listcom (list): A list of months during which the crop grows.
    arr1_re (np.ndarray): The array to be distributed in the current step.
    df_mon (pd.DataFrame): DataFrame containing month information for the spatial grid.

    Returns:
    tuple: A tuple containing the list of distributed areas (df_list) and the updated month DataFrame (df_mon).
    """
    # Call the array to be distributed in step 1 and convert it to a DataFrame
    array_name = "Array_" + ranked_df_ir_rank.Crop.tolist()[0][:3] + ranked_df_ir_rank.Crop.tolist()[0][-1] + "1"
    
    # Create a new DataFrame for each month according to the crop calendar
    df_month = pd.DataFrame(np.arange(1, 13), columns=['Month'])
    df_mon = pd.concat([df_month] * c_s * r_s, ignore_index=True)
    
    # Convert the array to a DataFrame
    df_1 = pd.DataFrame(arr1_re, index=np.arange(len(df_mon)), columns=[array_name])
    
    # Merge the month and array DataFrame
    df_area1 = pd.concat([df_mon, df_1], axis=1)
    
    # Filter the months in which the crop grows
    crop_1 = df_area1[df_area1["Month"].isin(listcom)]
    
    # Filter the months in which the crop doesn't grow
    crop_not = df_area1[~df_area1["Month"].isin(listcom)]
    
    # Set the array values to 0 for months when the crop doesn't grow
    crop_not[array_name].values[:] = 0
    
    # Merge the two DataFrames and sort by index
    df_name = ranked_df_ir_rank.Crop.tolist()[0][:3] + ranked_df_ir_rank.Crop.tolist()[0][-1] + "_step1"
    df_name = pd.concat([crop_1, crop_not])
    df_name = df_name.sort_index()

    # Create a list of distributed areas
    df_list = []
    df_list.append(df_name[array_name])

    return df_list, df_mon


In [None]:
from osgeo import gdal
import numpy as np
import os
import pandas as pd

def step47_tiff_save(step, AD_area, ranked_df_rank, template_tiff, 
                     listcom, df_mon, Step_1, r_s, c_s, year, unit_code, crop_name, 
                     Irrigated="Irrigated", output_folder="../scratch"):
    """
    Saves the distributed area as a TIFF file after steps 4 and 7 in the MIRCA model.

    Parameters:
    ----------
    step : int
        The current step in the model (4 or 7).
    AD_area : np.ndarray
        The array representing the area distributed in the current step.
    ranked_df_rank : pd.DataFrame
        The ranked DataFrame containing information about the crop being processed.
    template_tiff : str
        Path to the template TIFF file, used to extract geotransform and projection.
    listcom : list
        List of months during which the crop grows.
    df_mon : pd.DataFrame
        DataFrame containing month information for each grid cell.
    Step_1 : pd.Series
        Data representing the area distributed in step 1, used to initialize the distribution for the current step.
    r_s, c_s : int
        Number of rows and columns in the spatial grid.
    year : str
        The year for which the TIFF file is being generated.
    unit_code : str
        The unit code (e.g., district or region) for which the TIFF file is being generated.
    crop_name : str
        The name of the crop being processed.
    Irrigated : str, optional
        Specifies whether the crop is irrigated or rainfed. Defaults to "Irrigated".
    output_folder : str, optional
        The directory where the output TIFF files will be saved. Defaults to "../scratch".

    Returns:
    -------
    bool
        Returns True if the TIFF file was saved successfully.
    """
    
    # Combine month and step data into a single DataFrame
    df_irrigaed = pd.concat([df_mon, Step_1], axis=1)
    df_irrigaed.columns = ['Month', 'Total_st1']

    # Check if the area distributed matches the expected growing area
    if round(np.sum(AD_area)) < (ranked_df_rank.Growing_area.tolist()[0]):
        print("Error: Full area not assigned. Check your code!")
        print("Area distributed after step {}: {}".format(step, round(np.sum(AD_area))))
    elif round(np.sum(AD_area)) == (ranked_df_rank.Growing_area.tolist()[0]):
        print("Iteration completed.")
        print("Area distributed after step {}: {}".format(step, round(np.sum(AD_area))))
    else:
        # Handle small discrepancies
        if (round(np.sum(AD_area)) - (ranked_df_rank.Growing_area.tolist()[0])) < 1:
            print("Iteration completed.")
            print("Area distributed after step {}: {}".format(step, round(np.sum(AD_area))))

    # Proceed with saving the TIFF file if there is a growing area
    if ranked_df_rank.Growing_area.tolist()[0] > 0:
        # Open the template TIFF to extract geotransform and projection
        template_tiff_dataset = gdal.Open(template_tiff)
        geotransform = template_tiff_dataset.GetGeoTransform()
        projection = template_tiff_dataset.GetProjection()
        
        # Save a TIFF file for each month in the growing season
        for k in listcom:
            df_max_filt4 = df_irrigaed[df_irrigaed["Month"] == int(k)]
            arr_filenam = np.array(df_max_filt4["Total_st1"]).reshape(r_s, c_s)
            mask = np.logical_or(np.isnan(arr_filenam), 
                                 np.logical_or(np.isinf(arr_filenam), arr_filenam == np.NINF))
            arr_filenam = np.where(mask, 0, arr_filenam)
            
            # Construct the output TIFF file path
            ir_rf = "ir" if Irrigated == "Irrigated" else "rf"
            output_tiff_path = "{0}/{1}/District/{2}/{3}_{4}_{5}_{6}.tiff".format(output_folder, year, 
                                                                                  Irrigated, crop_name, 
                                                                                  unit_code, ir_rf, k)
            print(output_tiff_path)
            
            # Create the output directory if it doesn't exist
            output_directory = os.path.dirname(output_tiff_path)
            if not os.path.exists(output_directory):
                os.makedirs(output_directory)

            # Create the TIFF file and set its geotransform and projection
            driver = gdal.GetDriverByName("GTiff")
            out_tiff = driver.Create(output_tiff_path, arr_filenam.shape[1], 
                                     arr_filenam.shape[0], 1, gdal.GDT_Byte)
            out_tiff.SetGeoTransform(geotransform)
            out_tiff.SetProjection(projection)

            # Write the array to the TIFF file
            out_band = out_tiff.GetRasterBand(1)
            out_band.WriteArray(arr_filenam)
            
            # Flush data to disk and close the file
            out_band.FlushCache()
            out_tiff = None
            print("TIFF file saved successfully @ {}".format(output_tiff_path))
    
    return True


In [None]:
import numpy as np
import pandas as pd

def step00(step, year, ranked_df_ir_rank, ranked_df_rf_rank, 
           dist_array, AH_array, AD_area,
           c_s=None, r_s=None,
           df_mon=None, df_tot=None, TAD_12=None, df_cumulative=None, 
           ATS_2=None, verbose=False,
           template_tiff=None):
    """
    Main function to execute a specific step in the MIRCA model.

    This function handles the area distribution process across different steps of the MIRCA model,
    adjusting the distributed area and saving the results as TIFF files for certain steps.

    Parameters:
    ----------
    step : int
        The step in the MIRCA model to execute (1-7).
    year : int
        The year for which the model is being run.
    ranked_df_ir_rank : pd.DataFrame
        The DataFrame containing ranked irrigated crop data.
    ranked_df_rf_rank : pd.DataFrame
        The DataFrame containing ranked rainfed crop data.
    dist_array : np.ndarray
        The distribution array used for area calculation.
    AH_array : np.ndarray
        The array representing harvested area.
    AD_area : np.ndarray
        The array representing distributed area.
    c_s, r_s : int, optional
        Number of columns and rows in the spatial grid. Defaults to None.
    df_mon : pd.DataFrame, optional
        DataFrame containing month information for each grid cell. Defaults to None.
    df_tot : pd.DataFrame, optional
        Cumulative DataFrame tracking total distributed area over steps. Defaults to None.
    TAD_12 : np.ndarray, optional
        Total Area Distributed across 12 months, used for certain steps. Defaults to None.
    df_cumulative : pd.Series, optional
        Cumulative Series tracking the sum of distributed areas across steps. Defaults to None.
    ATS_2 : float, optional
        Area to be distributed in the subsequent step. Defaults to None.
    verbose : bool, optional
        Whether to print detailed information during execution. Defaults to False.
    template_tiff : str, optional
        Path to the template TIFF file for saving results. Defaults to None.

    Returns:
    -------
    dict
        A dictionary containing the cumulative DataFrame (`df_cumulative`), 
        the distributed area (`AD_area`), the month DataFrame (`df_mon`), 
        the area to be distributed in the subsequent step (`ATS_2`), 
        the cumulative total DataFrame (`df_tot`), and grid size (`c_s`, `r_s`).
    """
    
    # Get the growing months based on the step
    if step in [1, 2, 3, 4]:
        listcom = growingMonths(ranked_df_ir_rank) 
    elif step in [5, 6, 7]:
        listcom = growingMonths(ranked_df_rf_rank) 

    # Calculate AD_area for step 1 and step 5
    if step == 1:
        AD_area = calculate_AD_area(step, dist_array, ranked_df_ir_rank)
    elif step == 5:
        AD_area = calculate_AD_area(step, dist_array, ranked_df_rf_rank, Annual)

    # Adjust AD_area for steps 2, 3, 4, 6, and 7
    if step in [2, 3, 4]:
        AD_area = adjust_AD_area(step, dist_array, df_tot, AH_array, AD_area, 
                                 ATS_2, listcom, r_s, c_s, ranked_df_ir_rank)
    elif step in [6, 7]:
        AD_area = adjust_AD_area(step, dist_array, df_tot, AH_array, AD_area, 
                                 ATS_2, listcom, r_s, c_s, ranked_df_rf_rank, Annual)

    # Validate and adjust the area distribution
    if step in [1, 2, 3, 5, 6]:
        if round(np.sum(AD_area), 2) <= (ranked_df_ir_rank.Growing_area.tolist()[0]):
            ATS_2 = (ranked_df_ir_rank.Growing_area.tolist()[0]) - round(np.sum(AD_area), 2)
            
            if verbose:
                print("Area distributed after step {}:".format(step), round(np.sum(AD_area), 2))
                print("Iteration continues")
                
            if round(np.sum(AD_area), 2) == (ranked_df_ir_rank.Growing_area.tolist()[0]):
                if step in [5, 6]:
                    ATS_2 = 0
                if verbose:
                    print("Area to be distributed in next step (step {}):".format(step + 1), ATS_2)
                    print("Iteration completed")
        else:
            ATS_2 = 0
            if ranked_df_ir_rank.Growing_area.tolist()[0] > 0:
                if verbose:
                    print("Error: More area available than the growing area. Check your code!")
                ratio = ranked_df_ir_rank.Growing_area.tolist()[0] / np.sum(AD_area)
                AD_area *= ratio
                
                if verbose:
                    print("Iteration completed")
                    print("Area distributed in step {}:".format(step), round(np.sum(AD_area), 2))
            elif ranked_df_ir_rank.Growing_area.tolist()[0] == 0:
                AD_area *= 0
                if verbose:
                    print("No area will be distributed in step {} (Zero harvested area)".format(step))

    # Prepare for area distribution
    if step in [1, 2, 3, 4, 5, 6, 7]:
        arr1 = AD_area
        arr1_re = np.repeat(arr1, 12)
        c_s = len(arr1[0])
        r_s = len(arr1)

    # Distribute area for irrigated or rainfed steps
    if step in [1, 2, 3, 4]:
        df_list, df_mon = distributeArea(step, ranked_df_ir_rank, c_s, r_s, listcom, arr1_re, df_mon)
    elif step in [5, 6, 7]:
        df_list, df_mon = distributeArea(step, ranked_df_rf_rank, c_s, r_s, listcom, arr1_re, df_mon)

    Step_1 = pd.concat(df_list, axis=1).sum(axis=1)

    if df_cumulative is None:
        df_cumulative = pd.Series(np.zeros(len(Step_1)), index=Step_1.index)
    
    df_cumulative += Step_1
    df_tot = pd.concat([df_mon, df_cumulative], axis=1)
    df_tot.columns = ['Month', 'Total_st1']

    # Save the results as TIFF files for steps 4 and 7
    if step == 4:
        step47_tiff_save(step, AD_area, ranked_df_ir_rank, template_tiff, 
                         listcom, df_mon, Step_1, r_s, c_s, year, unit_code, crop_name, 
                         Irrigated="Irrigated", output_folder="../scratch")
    elif step == 7:
        step47_tiff_save(step, AD_area, ranked_df_rf_rank, template_tiff, 
                         listcom, df_mon, Step_1, r_s, c_s, year, unit_code, crop_name, 
                         Irrigated="Rainfed", output_folder="../scratch")
    
    return {
        "df_cumulative": df_cumulative, 
        "AD_area": AD_area, 
        'df_mon': df_mon, 
        "ATS_2": ATS_2, 
        "df_tot": df_tot,
        "c_s": c_s, 
        "r_s": r_s
    }


In [None]:
# Load the datasets


# AEI = '../data/Ras_shape_out/Rasters/AEI/AEI_10.tif'
# Area = '../data/Ras_shape_out/Rasters/Area/ara_10.tif'
# AH = '../data/Ras_shape_out/Rasters/AH'

# # ROOT FOLDER CONTAINS SHOULD CONTAIN CROP SUBFOLDERS WITH CLIPPED AREAS TIFF
# root_folder = {y:"data/Clipped_20{}".format(y) for y in ["00", "05", "10", "15"]}

# # Load the crop and area data clipped by unit code 
# raster_folder = "../data/Ras_shape_out/Rasters"
# tiff_files = glob.glob("{}/**/*.tif".format(raster_folder), recursive=True)
# tiff_files

# Load Crop Calenders
# cCropCal = pd.read_csv("step1/hypothetical_ccc3.csv")
# cCropCal_ir = {}
# cCropCal_rf = {}
# for year in ["00", "05", "10", "15"]:
#   path_ir = "../data/CropCalender/20{}_ir.csv".format(year)
#   if os.path.isfile(path_ir):
#     cCropCal_ir[year] = pd.read_csv(path_ir)
#     # cCropCal_ir[year]
#   path_rf = "../data/CropCalender/20{}_rf.csv".format(year)
#   if os.path.isfile(path_rf):
#     cCropCal_rf[year] = pd.read_csv(path_rf)
#     # cCropCal_rf[year]


# # CLE crop land extent

# # Load the unit code files
# tiff_files = glob.glob("{}/**/*.nc4".format(raster_folder), recursive=True)
# tiff_files

# shp_folder = "../data/Ras_shape_out/Shapefiles"
# shpfiles = glob.glob("{}/**/*.shp".format(shp_folder), recursive=True)
# shpfiles

# MASK data
# # os.listdir(shp_folder)
# # gdf = gpd.read_file(shpfiles[0])
# gdf = gpd.read_file(shpfiles[0]).iloc[:5]
# gdf.plot()

# gdfmask(gdf=gdf, raster_path = AEI)
# gdfmask(gdf=gdf, raster_path = Area)

In [None]:
# def rasteriosave7(ranked_df, tiff_ir, unit_code, listcom, output_folder):
#   with rasterio.open(tiff_ir) as src:
#     xmin, ymin, xmax, ymax = src.bounds
#     xres, yres = src.res
#     pixel_width, pixel_height = src.res
#     crs = src.crs
      
#     for k in listcom:
#       file_name = ranked_df.Crop[i][:3] + ranked_df.Crop[i][-1] + str(unit_code)+"_tot"
#       df_max_filt4 = df_tot_st4[(df_tot_st4["Month"] == k)]
#       arr_filenam = np.array(df_max_filt4["Total_st4"]).reshape(r_s, c_s)
#       arr_filenam = np.where(np.logical_or(np.isnan(arr_filenam), 
#                                            np.isinf(arr_filenam)), 0, arr_filenam)
#       arr_filenam = arr_filenam.astype(rasterio.float32)  # ensure data type is compatible with output GeoTIFF
#       Ras_filena = ranked_df.Crop[i][:3] + ranked_df.Crop[i][-1] + "_" + str(unit_code) + "_ir" + "_" + str(k)
#       transform = rasterio.transform.from_origin(xmin, ymax, pixel_width, pixel_height)
#       profile = {
#           'driver': 'GTiff',
#           'dtype': arr_filenam.dtype,
#           'count': 1,
#           'height': arr_filenam.shape[0],
#           'width': arr_filenam.shape[1],
#           'transform': transform,
#           'crs': crs,
#           'nodata': 0
#         }
        
#       with rasterio.open(os.path.join(output_folder, Ras_filena + '.tif'), 
#                          'w', **profile) as dst:
#           dst.write(arr_filenam, 1)