# Notebook 3: Aggregating FPAR Data

After successfully downloading and clipping FPAR data to your areas of interest, the next step is aggregating this data. This Jupyter notebook is designed to guide you through the aggregation of FPAR data, a process critical for summarizing and analyzing the data across various spatial and temporal dimensions.

In this notebook, you will Aggregate FPAR data spatially and temporally to suit your research needs and save it as a CSV file.

In [None]:
import pandas as pd
from tqdm import tqdm
import xarray as xr
import geopandas as gpd
import numpy as np
import rioxarray
import rasterio
from rasterio.features import geometry_mask
import os
import shutil
import rasterio
from rasterio.mask import mask

In [None]:
# Define the shapefile and crop mask
EU_shape = gpd.read_file("")

# Define the path of Crop Mask
crop_mask = ''

In [None]:
# function to extract the date from each file's name
def extract_date_from_filename(filename):
    pattern = r'\d{8}'
    match = re.search(pattern, filename)
    date_str = match.group()
    return date_str

In [None]:
# define the path of folders that contain the clipped data
folder_path = ""
file_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.tif')]

In [None]:
# Initialize an empty DataFrame to store results
columns = ['date', 'LEVL_CODE', 'NUTS_ID', 'CNTR_CODE', 'MOUNT_TYPE', 'URBN_TYPE', 'COAST_TYPE', 'fpar']
df = pd.DataFrame(columns=columns)

***Note:*** If there addition information you need in the shape file you can modify the columns varialbe

In [None]:
#######################################################################
##### Load crop mask and loop through each admin level in shapefile####
#######################################################################
with rasterio.open(crop_mask) as mask_src:
    for raster in tqdm(file_paths[470:], desc="Processing rasters", unit="raster"):

        # Extract the date from the filename
        file_name = os.path.basename(raster)
        date_str = int(file_name[-12:-4])  # Extract the date part

        # Read the shapefile into a GeoDataFrame using GeoPandas
        EU_shape = gpd.read_file("drive/MyDrive/AgML/shapefiles/maize_wheat_EU/maize_wheat_EU.shp")

        # Open the raster file
        with rasterio.open(raster) as src:
            
            # Loop through each row in the shapefile
            for _, row in EU_shape.iterrows():
                
                # Clip the raster with the current geometry
                geom = row.geometry
                out_image, out_transform = mask(src, [geom], crop=True)

                # Read the raster values and update them to nan where specified values are present
                # 255 254 251 values not processed in FPAR data
                invalid_values = [255, 254, 251]
                out_image = np.where(np.isin(out_image, invalid_values), np.nan, out_image)

                # Clip the mask with the current geometry
                mask_image, _ = mask(mask_src, [geom], crop=True)

                # Read the mask values and update them to nan where specified values are present
                mask_image = np.where(np.isin(mask_image, invalid_values), np.nan, mask_image)

                # Calculate the weighted mean
                weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)


##################################################
### Edit this part according to your shapefile####
##################################################
                
                # Extract information of each admin1 from shapefile
                LEVL_CODE = row['LEVL_CODE']
                NUTS_ID = row['NUTS_ID']
                CNTR_CODE = row['CNTR_CODE']
                MOUNT_TYPE = row['MOUNT_TYPE']
                URBN_TYPE = row['URBN_TYPE']
                COAST_TYPE = row['COAST_TYPE']

                # Append the weighted mean to the list
                new_row = pd.DataFrame([{
                    'date': date_str,
                    'LEVL_CODE': LEVL_CODE,
                    'NUTS_ID': NUTS_ID,
                    'CNTR_CODE': CNTR_CODE,
                    'MOUNT_TYPE': MOUNT_TYPE,
                    'URBN_TYPE': URBN_TYPE,
                    'COAST_TYPE': COAST_TYPE,
                    'fpar': weighted_mean
                }])

                df = pd.concat([df, new_row], ignore_index=True)

# Save the DataFrame to CSV
df.to_csv('drive/MyDrive/AgML/FPAR/FPAR_CSV/FPAR_Wheat_Winter_2.csv', index=False)

  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_mean = np.nansum(out_image * mask_image) / np.nansum(mask_image)
  weighted_m

In [None]:
df.to_csv('', index=False)