# FLUXNET Large Sample Experiment Tutorial

This notebook demonstrates how to run CONFLUENCE over multiple FLUXNET tower sites for point-scale large-sample analysis.

In [None]:
import sys
import os
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import subprocess
import yaml
from datetime import datetime
import seaborn as sns

# Add CONFLUENCE to path
confluence_path = Path('../').resolve()
sys.path.append(str(confluence_path))

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

print("Setup complete!")

In [None]:
# Configuration for the FLUXNET large sample experiment
experiment_config = {
    'experiment_name': 'fluxnet_tutorial',
    'max_sites': 5,
    'dry_run': False,
    'template_config': '../CONFLUENCE/0_config_files/config_point_template.yaml',
    'config_dir': '../CONFLUENCE/0_config_files/fluxnet',
    'fluxnet_script': '../CONFLUENCE/9_scripts/run_towers_fluxnet.py',
    'fluxnet_csv': 'fluxnet_transformed.csv'
}

# Create experiment directory
experiment_dir = Path(f"./experiments/{experiment_config['experiment_name']}")
experiment_dir.mkdir(parents=True, exist_ok=True)

# Save configuration
with open(experiment_dir / 'experiment_config.yaml', 'w') as f:
    yaml.dump(experiment_config, f)

print(f"Experiment configured: {experiment_config['experiment_name']}")

In [None]:
# Load FLUXNET sites data
fluxnet_df = pd.read_csv(experiment_config['fluxnet_csv'])

print(f"Loaded {len(fluxnet_df)} FLUXNET sites")
print("\nColumns in dataset:")
for col in fluxnet_df.columns:
    print(f"  - {col}")

# Display first few sites
print("\nFirst 5 sites:")
display(fluxnet_df[['ID', 'Watershed_Name', 'KG', 'Dominant_LC', 'Area_km2']].head())

In [None]:
# Extract coordinates from POUR_POINT_COORDS
coords = fluxnet_df['POUR_POINT_COORDS'].str.split('/', expand=True)
fluxnet_df['lat'] = coords[0].astype(float)
fluxnet_df['lon'] = coords[1].astype(float)

# Create global distribution plot
plt.figure(figsize=(15, 8))
plt.scatter(fluxnet_df['lon'], fluxnet_df['lat'], c='red', alpha=0.6)
plt.title('Global Distribution of FLUXNET Sites')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True, alpha=0.3)
plt.xlim(-180, 180)
plt.ylim(-60, 80)
plt.show()

In [None]:
# Select sites based on criteria - diverse climate types
climate_types = fluxnet_df['KG'].unique()

# Select one site from each climate type (up to max_sites)
selected_sites = []
for climate in climate_types[:experiment_config['max_sites']]:
    site = fluxnet_df[fluxnet_df['KG'] == climate].iloc[0]
    selected_sites.append(site)

selected_df = pd.DataFrame(selected_sites)

print(f"Selected {len(selected_df)} sites for processing:")
display(selected_df[['ID', 'Watershed_Name', 'KG', 'Dominant_LC']])

In [None]:
# Generate configs for selected sites
config_dir = Path(experiment_config['config_dir'])
config_dir.mkdir(parents=True, exist_ok=True)

generated_configs = []

for _, site in selected_df.iterrows():
    site_name = site['Watershed_Name']
    pour_point = site['POUR_POINT_COORDS']
    bounding_box = site['BOUNDING_BOX_COORDS']
    
    # Create config file name
    config_path = config_dir / f"config_{site_name}.yaml"
    
    # Generate config using the script function
    cmd = [
        'python', '-c',
        f"""
import sys
sys.path.append('{str(confluence_path)}/9_scripts')
from run_towers_fluxnet import generate_config_file
generate_config_file(
    '{experiment_config['template_config']}',
    '{config_path}',
    '{site_name}',
    '{pour_point}',
    '{bounding_box}'
)
"""
    ]
    
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode == 0:
        generated_configs.append(config_path)
        print(f"Generated config for {site_name}")

print(f"\nGenerated {len(generated_configs)} configuration files")

In [None]:
# Launch CONFLUENCE runs
cmd = ['python', experiment_config['fluxnet_script']]

# For dry run, add appropriate option
if experiment_config['dry_run']:
    print("DRY RUN MODE - No jobs will be submitted")

print(f"Launching CONFLUENCE for FLUXNET sites...")

# Execute the script (requires user input)
result = subprocess.run(cmd, input='n\n' if experiment_config['dry_run'] else 'y\n', 
                       capture_output=True, text=True)

print("\nOutput:")
print(result.stdout[:500] + "..." if len(result.stdout) > 500 else result.stdout)

In [None]:
# Check job status and find completed simulations
def check_job_status(user=None):
    user = user or os.environ.get('USER')
    cmd = ['squeue', '-u', user]
    result = subprocess.run(cmd, capture_output=True, text=True)
    return result.stdout

print("Current jobs:")
print(check_job_status())

# Find completed FLUXNET simulations
confluence_data_dir = Path("/work/comphyd_lab/data/CONFLUENCE_data")
fluxnet_dir = confluence_data_dir / "fluxnet"

completed = []
if fluxnet_dir.exists():
    for domain_dir in fluxnet_dir.glob("domain_*"):
        site_name = domain_dir.name.replace("domain_", "")
        sim_dir = domain_dir / "simulations"
        
        if sim_dir.exists() and list(sim_dir.rglob("*.nc")):
            completed.append({
                'site_name': site_name,
                'sim_dir': sim_dir
            })

print(f"Completed simulations: {len(completed)}")

In [None]:
# Load and analyze model results
def load_summa_output(sim_dir, variable='scalarLatHeatTotal'):
    import xarray as xr
    
    output_files = list(sim_dir.rglob("*timestep*.nc"))
    if output_files:
        ds = xr.open_dataset(output_files[0])
        if variable in ds.variables:
            return pd.DataFrame({
                'time': pd.to_datetime(ds.time.values),
                'value': ds[variable].values.flatten()
            })
    return None

# Summary Report
if completed:
    print("### FLUXNET Experiment Summary Report ###")
    print(f"Experiment Name: {experiment_config['experiment_name']}")
    print(f"Date: {datetime.now().strftime('%Y-%m-%d')}")
    print(f"Total Sites Selected: {len(selected_df)}")
    print(f"Completed Simulations: {len(completed)}")
    
    # Save report to file
    with open(experiment_dir / 'experiment_report.txt', 'w') as f:
        f.write(f"FLUXNET Experiment Summary\n")
        f.write(f"Date: {datetime.now().strftime('%Y-%m-%d')}\n")
        f.write(f"Sites Processed: {len(completed)} of {len(selected_df)}\n")
    
    print(f"Summary report saved to {experiment_dir / 'experiment_report.txt'}")