# Ablation Study 1: LDA-Based Phenology Classification

In this ablation study, we aimed to assess the impact of different configurations on the performance of an LDA-based phenology classification pipeline. We focused on the following configurations:
1. Resampled data without weights
2. No resampling and no weights
3. No resampling with cloud weights
4. No resampling with cloud disturbance weights

We used the "no_resample_cloud_disturbance_weights" configuration to train our LDA model. This model was then applied consistently across all configurations to ensure a fair comparison. For each configuration, we computed the ROC curve and ROC-AUC score, identified the optimal threshold, and applied it to the validation set to predict phenology classes. Metrics were computed on the validation set and broken down by GRECO regions. The results highlighted the classification performance of each configuration through ROC curves and a bar chart showing the best F1 scores for each feature.

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score
import numpy as np 
from sklearn.base import BaseEstimator
from sklearn.model_selection import GroupKFold
import pandas as pd
from joblib import Parallel, delayed

def train_and_evaluate_fold(model, X_train, y_train, X_test, y_test, regions_test, region_metrics):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    metrics = {
        'precision': precision_score(y_test, y_pred, average='weighted', zero_division=0),
        'recall': recall_score(y_test, y_pred, average='weighted', zero_division=0),
        'f1_score': f1_score(y_test, y_pred, average='weighted', zero_division=0),
    }

    for region in region_metrics.keys():
        region_mask = (regions_test == region)
        y_test_region = y_test[region_mask]
        y_pred_region = pd.Series(y_pred, index=y_test.index)[region_mask]
        if not y_test_region.empty:
            region_metrics_values = {
                'precision': precision_score(y_test_region, y_pred_region, average='weighted', zero_division=0),
                'recall': recall_score(y_test_region, y_pred_region, average='weighted', zero_division=0),
                'f1_score': f1_score(y_test_region, y_pred_region, average='weighted', zero_division=0),
            }
            region_metrics[region].append(region_metrics_values)
    
    return metrics, region_metrics

def evaluate_model(model: BaseEstimator, X: pd.DataFrame, y: pd.Series, gkf: GroupKFold, groups, regions: pd.Series) -> (pd.DataFrame, dict):
    """
    Evaluates a model using stratified k-fold cross-validation and returns the averaged metrics.
    Also calculates the metrics per region.

    Parameters:
    model (BaseEstimator): The machine learning model to be evaluated.
    X (pd.DataFrame): The feature matrix.
    y (pd.Series): The target vector.
    gkf (GroupKFold): The group k-fold cross-validator.
    regions (pd.Series): The series indicating the region for each sample.

    Returns:
    pd.DataFrame: A DataFrame containing the averaged metrics across all folds.
    dict: A dictionary with regions as keys and DataFrames of metrics as values.
    """
    metrics_list = []
    region_metrics = {region: [] for region in regions.unique()}
    
    results = Parallel(n_jobs=-1, verbose=1)(
        delayed(train_and_evaluate_fold)(
            model, X.iloc[train_index], y.iloc[train_index], X.iloc[test_index], y.iloc[test_index], regions.iloc[test_index], region_metrics
        )
        for train_index, test_index in gkf.split(X, y, groups)
    )

    for metrics, fold_region_metrics in results:
        metrics_list.append(metrics)
        for region in region_metrics.keys():
            region_metrics[region].extend(fold_region_metrics[region])

    # Average the metrics over all the folds
    avg_metrics = {metric: np.mean([fold_metrics[metric] for fold_metrics in metrics_list]) for metric in metrics_list[0].keys()}
    
    print('Average metrics:', avg_metrics)
    avg_region_metrics = {}
    for region, region_metrics_list in region_metrics.items():
        avg_region_metrics[region] = {metric: np.mean([region_metrics_values[metric] for region_metrics_values in region_metrics_list]) for metric in region_metrics_list[0].keys()}
    
    return pd.DataFrame([avg_metrics]), {region: pd.DataFrame([metrics]) for region, metrics in avg_region_metrics.items()}


In [None]:
import pandas as pd
from sklearn.model_selection import GroupKFold
from utils import load_and_preprocess_table_data, load_checkpoint
from warnings import filterwarnings
from tqdm import tqdm
from sklearn.preprocessing import RobustScaler
from sklearn.neural_network import MLPClassifier

filterwarnings('ignore')

model_name = 'MLP_no_resample_cloud_disturbance_weights_3Y_Group'
params = load_checkpoint(model_name, checkpoint_dir='checkpoints').best_params_
model = MLPClassifier(**params)

# Define the configurations for the ablation study
configs = [
    "no_resample_cloud_disturbance_weights",
    "no_resample_cloud_weights",
    "no_resample_no_weights",
    "resampled_no_weights"
]

years = [1, 2, 3]

# Define features and target
features = ['amplitude_red', 'cos_phase_red','offset_red',
            'cos_phase_blue', 
            'amplitude_crswir', 'cos_phase_crswir', 'sin_phase_crswir', 'offset_crswir', 
            'elevation']

target = 'phen'

# Initialize dictionaries to store metrics
overall_metrics = {}
greco_region_metrics = {}

# Perform stratified k-fold cross-validation
n_splits = 5
gkf = GroupKFold(n_splits=n_splits)

# Apply the trained RF model to all configurations
for config in configs:
    for year in years:
        data = load_and_preprocess_table_data(config+f'_{year}Y')
        X = data[features]
        y = data[target]
        # Adjust target labels to start from 0
        y = y - 1

        # Standardize features
        scaler = RobustScaler()
        X_scaled = scaler.fit_transform(X)

        # Convert X_scaled back to DataFrame
        X_scaled = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)

        regions = data['greco_region']
        groups = data['tile_id']

        # Compute metrics on the validation set
        overall_report, region_reports = evaluate_model(model, X_scaled, y, gkf, groups,  regions)
        overall_metrics[f"{config}_{year}Y"] = overall_report

        for region, report in region_reports.items():
            if f"{config}_{year}Y" not in greco_region_metrics:
                greco_region_metrics[f"{config}_{year}Y"] = {}
            greco_region_metrics[f"{config}_{year}Y"][region] = report

# year = 3
# for config in tqdm(configs[1:]):
#     data = load_and_preprocess_table_data(config+f'_{year}Y')
#     X = data[features]
#     y = data[target]
#     # Adjust target labels to start from 0
#     y = y - 1

#     # Standardize features
#     scaler = RobustScaler()
#     X_scaled = scaler.fit_transform(X)

#     # Convert X_scaled back to DataFrame
#     X_scaled = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)

#     regions = data['greco_region']
#     groups = data['tile_id']

#     # Compute metrics on the validation set
#     overall_report, region_reports = evaluate_model(model, X_scaled, y, gkf, groups,  regions)
#     overall_metrics[f"{config}_{year}Y"] = overall_report

#     for region, report in region_reports.items():
#         if f"{config}_{year}Y" not in greco_region_metrics:
#             greco_region_metrics[f"{config}_{year}Y"] = {}
#         greco_region_metrics[f"{config}_{year}Y"][region] = report

# Save overall metrics to CSV
overall_metrics_df = pd.concat(overall_metrics, axis=1).transpose()
overall_metrics_df.to_csv(f'results/ablation_study_overall_metrics_{model_name}.csv', index=True)
print("Overall metrics saved.")

# Save GRECO region metrics to CSV
combined_greco_metrics = []
for config_year, regions in greco_region_metrics.items():
    for region, df in regions.items():
        df['config_year'] = config_year
        df['region'] = region
        combined_greco_metrics.append(df)

combined_greco_metrics_df = pd.concat(combined_greco_metrics, axis=0)
combined_greco_metrics_df.to_csv(f'results/ablation_study_greco_region_metrics_{model_name}.csv', index=True)
print("GRECO region metrics saved.")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from utils import mapping_real_greco

# Load the combined_greco_metrics_df from the CSV file
combined_greco_metrics_df = pd.read_csv(f'results/ablation_study_greco_region_metrics_{model_name}.csv', index_col=0)

# Filter only F1-score metrics
f1_score_df = combined_greco_metrics_df[['f1_score', 'config_year', 'region']]
f1_score_df['region'] = f1_score_df['region'].map(mapping_real_greco)

# Extract method and year from config_year
f1_score_df['method'] = f1_score_df['config_year'].apply(lambda x: '_'.join(x.split('_')[:-1]))
f1_score_df['year'] = f1_score_df['config_year'].apply(lambda x: x.split('_')[-1])

# Define a custom color palette showing the goodness of each method
palette = {
    "no_resample_cloud_disturbance_weights": "#1f77b4",  # Blue
    "no_resample_cloud_weights": "#2ca02c",  # Green
    "no_resample_no_weights": "#ff7f0e",  # Orange
    "resampled_no_weights": "#d62728"  # Red
}

# Set the plot style
sns.set(style="whitegrid")

# Get the unique regions
regions = f1_score_df['region'].unique()

# Create a figure and axes for each region in a 4x3 grid
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(10, 12), sharey=True)
axes = axes.flatten()  # Flatten the axes array for easy iteration

# Plot each region
for i, (ax, region) in enumerate(zip(axes, regions)):
    region_data = f1_score_df[f1_score_df['region'] == region]
    sns.barplot(x='year', y='f1_score', hue='method', data=region_data, ax=ax, palette=palette, alpha=0.75)
    ax.set_title(f'{region}', fontsize=14)
    ax.set_xticks([0, 1, 2])
    ax.set_xticklabels(['1y', '2y', '3y'])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    # Horizontal grid
    ax.yaxis.grid(True, linestyle='--', alpha=0.75)

    if i%3 == 0:
        ax.set_ylabel('F1-Score')
    else:
        ax.set_ylabel('')

    if i >= 9:
        ax.set_xlabel('Length of the Time Series')
    else:
        ax.set_xlabel('')
    # Remove the legend
    ax.get_legend().remove()

# Hide any unused subplots except the last one for the legend
for i in range(len(regions), len(axes) - 1):
    fig.delaxes(axes[i])

# Use the last subplot for the legend
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, [x.replace('_', ' ') for x in labels], loc='center', title='Method', frameon=False, ncol=4, bbox_to_anchor=(0.5, 0.04), fontsize=12)
axes[-1].axis('off')  # Hide the axis

# Adjust the layout to leave space for the legend
plt.tight_layout()
plt.subplots_adjust(bottom=0.15)

# Show the plot
plt.show()
fig.savefig(f'results/ablation_study_greco_region_metrics_{model_name}.png', dpi=300, bbox_inches='tight')


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from utils import mapping_real_greco

# Load the combined_greco_metrics_df from the CSV file
overall_metrics_df = pd.read_csv(f'results/ablation_study_overall_metrics_{model_name}.csv', index_col=0).reset_index()
cols = overall_metrics_df.columns
f1_metrics = overall_metrics_df[overall_metrics_df[cols[1]] == 'f1_score'].reset_index()
f1_metrics.rename(columns={cols[0]: 'config_year', cols[1]: 'metric', cols[2]: 'score'}, inplace=True)
f1_metrics.drop(columns='level_0', inplace=True)
# Extract method and year from config_year
f1_metrics['method'] = f1_metrics['config_year'].apply(lambda x: '_'.join(x.split('_')[:-1]))
f1_metrics['year'] = f1_metrics['config_year'].apply(lambda x: x.split('_')[-1])

# Create a figure and axes for each region in a 4x3 grid
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 4), sharey=True)

sns.barplot(x='year', y='score', hue='method', data=f1_metrics, ax=ax, palette=palette, alpha=0.75)
ax.set_title(f'Overall', fontsize=14)
ax.set_xticks([0, 1, 2])
ax.set_xticklabels(['1y', '2y', '3y'])
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Horizontal grid
ax.yaxis.grid(True, linestyle='--', alpha=0.75)

ax.set_ylabel('F1-Score')
ax.set_xlabel('Length of the Time Series')

ax.get_legend().remove()


In [None]:
f1_metrics[ f1_metrics['method'] == 'no_resample_cloud_disturbance_weights']

In [None]:
f1_metrics.groupby('year')['score'].mean()

In [None]:
0.887682 - 0.819943