# Filter Data for US-MMS Site
This notebook loads a multi-feature dataset stored in a Parquet file and filters it to retain only the records corresponding to the US-MMS AmeriFlux location.

In [None]:
import pandas as pd
from pathlib import Path

# Path to your dataset
parquet_path = Path('path_to_dataset.parquet')

# Load the dataset
# This expects latitude, longitude, and time columns
# along with any other features.
df = pd.read_parquet(parquet_path)
print('Rows before filtering:', len(df))

In [None]:
# Coordinates for US-MMS (Morgan Monroe State Forest)
US_MMS_LAT = 39.3232
US_MMS_LON = -86.4137

tolerance = 0.01  # adjust depending on grid resolution

site_df = df[(df['latitude'].sub(US_MMS_LAT).abs() <= tolerance) &
             (df['longitude'].sub(US_MMS_LON).abs() <= tolerance)].copy()

print('Rows after filtering:', len(site_df))

In [None]:
# Save the filtered data if needed
out_path = parquet_path.with_name(parquet_path.stem + '_US_MMS.parquet')
site_df.to_parquet(out_path, index=False)
print('Saved filtered site data to', out_path)