# Notebook 2: Clipping FPAR Data to Specified AOIs
Following the download of FPAR data, the next critical step in data preparation is clipping or cropping this data to specific areas of interest, often aligned with administrative boundaries such as countries or states.

This Jupyter notebook focuses on the clipping process, enabling you to reduce the extensive datasets to your precise study areas, thereby ***reducing computational load***. This Jupyter notebook is designed to facilitate the clipping of (FPAR) data to your specific areas of interest (AOIs) as administrative regions.

Within this notebook, you will:

- Read and handle administrative boundaries using shapefiles with geopandas, then execute the clipping process on FPAR raster datasets.

***Note:*** You can use this notebook to clip any raster data with shapefile 

In [None]:
# install libraries
!pip install geopandas rioxarray tqdm

In [None]:
import geopandas as gpd
import rioxarray
from tqdm import tqdm
import os
import gc # Manage Memory
import re

In [None]:
# Read the administrative boundary shapefile
shapefile_path = ''
admin_shapefile = gpd.read_file(shapefile_path)

In [21]:
def extract_date_from_filename(filename):
    
    # Define a regex pattern for a date in the format YYYYMMDD
    pattern = r'\d{8}'
    
    match = re.search(pattern, filename)
    date_str = match.group()
    
    return date_str

In [None]:
def clip_raster_with_shapefile(raster_path, shapefile, output_dir):
    raster = rioxarray.open_rasterio(raster_path)

    # Make sure your raster is using a geographic coordinate system
    raster = raster.rio.write_crs("EPSG:4326", inplace=True)

    # Extract date from the raster filename
    raster_filename = os.path.basename(raster_path)
    date_str = extract_date_from_filename(raster_filename)

    if date_str is not None:
        # Clip the raster with the shapefile
        clipped = raster.rio.clip(shapefile.geometry)

        # Define the output file path with the extracted date
        output_path = os.path.join(output_dir, f"fpar_eu_{date_str}.tif")

        # Save the clipped raster to a new GeoTIFF file
        clipped.rio.to_raster(output_path)

        # Delete the opened variables to save system from crashes due to RAM consumption
        del raster
        del clipped
        gc.collect()

        print(f"Raster clipped successfully and saved to {output_path}")

In [None]:
# Define the path to the folder containing the raw FPAR raster data
folder_path = ""

# Create a list of file paths for all .tif files in the folder
file_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.tif')]

# Define the output directory where clipped raster files will be saved
output_dir = ""

# Define the path to the shapefile used for clipping
shapefile_path = ""
shapefile = gpd.read_file(shapefile_path)

# NOTE: You can slice the data path based on your processing power.
# Adjust the slice size or processing batch size accordingly.
for file_path in tqdm(file_paths, desc="Processing files", unit="file"):
    clip_raster_with_shapefile(file_path, shapefile, output_dir)