## leaf area index
1. use grid files to extract mean leaf area index for regions around berlin (8 day resolution)
2. load data in from different years in data frame, normalize and build mean
3. extrapolate leaf are index over every timestep#


NOTE: the grid-based source file is not in the repository due to size constrains. Please add the [.tif grid files for 5km and 8d resolution for 2016-2020 (8.4GB) + 2022-2022(3.3BG)](https://zenodo.org/records/8296768) from  Yan et al. (2024) in the lai folder inside data folder (data/lai/HiQ_LAI_WGS84_5km_8day_{year + index_day[n]}.tif)

[Yan, K. et al (2024). Hiq-lai: A highquality reprocessed modis leaf area index dataset with better spatiotemporal consistency from 2000 to 2022. Earth System Science Data, 16(3), 1601–1622.](https://doi.org/10.5194/essd16-1601-2024 )



In [8]:
import rioxarray

# define function to process grid of leaf are index .tif 

def mean_in_grid(file_path, bbox):
    """
    crop grid to relevant region and return mean in value (LAI) of grid 
    bbox: buffer to filter the data, in the format (min_lon, min_lat, max_lon, max_lat)
    return: mean
    """
    
    data = rioxarray.open_rasterio(file_path) # Load the data with rioxarray
   
    # Define the bounding box for Berlin and filter
    min_lon, min_lat, max_lon, max_lat = bbox 
    data_clipped = data.rio.clip_box(minx=min_lon, miny=min_lat, maxx=max_lon, maxy=max_lat) # filter
    first_band = data_clipped.sel(band=1)

    # Convert the clipped data to a DataFrame
    df = first_band.to_series().reset_index().rename(columns={0: "Value"})
    df = df[df['Value'] != 255] # drop rows with fill (na) value 255


    return round(df['Value'].mean() * 0.1,3)  # return only mean of LAI Value

# generate time steps as index to load .tif data (8 day resolution)
index_day = []

for value in [ x for x in range(1,362,8)]:
    if len(str(value)) == 1:
        index_day.append('00' +str(value))  
    elif len(str(value)) == 2:
        index_day.append('0' +str(value))  
    else:
        index_day.append(str(value))  


# load data for each time step
lai_mean_timesteps = {}

for n in range(len(index_day)):
    current_year = []
    for year in ['2017', '2018', '2019', '2020', '2021', '2022']:

        path = f"../data/lai/HiQ_LAI_WGS84_5km_8day_{year + index_day[n]}.tif" # define current path to .tif file
        try:
            current_year.append(mean_in_grid(path, bbox= (12.4, 51.5, 14.4, 53.5))) # extract mean lai for this file
        except:
            current_year.append(lai_mean_timesteps.get(index_day[n-1])[-1])
            print(f'timestep: {index_day[n]} not found!') 

    lai_mean_timesteps[index_day[n]] = current_year


timestep: 289 not found!


In [7]:
# transform leaf area index to 
import pandas as pd

lai_df = pd.DataFrame(lai_mean_timesteps).transpose().rename(columns={0:'2017', 1: '2018', 2:'2019', 3: '2020', 4:'2021', 5: '2022'})

for year in ['2017', '2018', '2019', '2020', '2021', '2022']:
    lai_df[year] = lai_df[year].apply(lambda x: x/ lai_df[year].max())
    
lai_df['lai_factor'] = (lai_df['2017'] + lai_df['2018']+ lai_df['2019'] + lai_df['2020']+ lai_df['2021'] + lai_df['2022']) / 6
lai_df


Unnamed: 0,2017,2018,2019,2020,2021,2022,lai_factor
1,0.212213,0.346486,0.319563,0.535135,0.154615,0.525335,0.348891
9,0.264983,0.441563,0.451195,0.627928,0.26,0.671606,0.452879
17,0.298153,0.360767,0.520049,0.736486,0.395,0.743786,0.50904
25,0.336977,0.376174,0.434184,0.440541,0.358077,0.480402,0.404392
33,0.262721,0.345735,0.36695,0.33964,0.302692,0.436424,0.34236
41,0.240106,0.321684,0.350344,0.334234,0.271154,0.40153,0.319842
49,0.238598,0.311537,0.326853,0.310811,0.314615,0.423518,0.320989
57,0.224651,0.291244,0.317942,0.383333,0.306923,0.456501,0.330099
65,0.263852,0.29162,0.36047,0.44009,0.328846,0.442161,0.354506
73,0.309461,0.272454,0.366545,0.476577,0.382692,0.451721,0.376575


In [10]:
# combine factor with dates
import pandas as pd
import datetime


def assin_lai(date):
    # date = e.g. 2023010100
    date = str(date)

    # 1. extract week nr
    day_nr = str(datetime.date(int(date[:4]), int(date[4:6]), int(date[6:8])).timetuple().tm_yday)
    if len(day_nr) == 1:
        day_nr = "00" + day_nr
    elif len(day_nr) == 2:
        day_nr = "0" + day_nr

    # 2. return leaf are index factor based on week nr.
    try:
        return lai_df[lai_df.index == day_nr]["lai_factor"].iloc[0]

    except:
        return None


timesteps = pd.read_csv('../datasets/df_meteorological_impute.csv').filter(['time_step'])
timesteps['lai_factor'] = timesteps['time_step'].apply(lambda x: assin_lai(x))
timesteps = timesteps.ffill()
timesteps.to_csv("../datasets/df_lai_factor.csv")
timesteps


Unnamed: 0,time_step,lai_factor
0,2023010100,0.348891
1,2023010101,0.348891
2,2023010102,0.348891
3,2023010103,0.348891
4,2023010104,0.348891
...,...,...
8755,2023123119,0.291876
8756,2023123120,0.291876
8757,2023123121,0.291876
8758,2023123122,0.291876
