# Evaluation of cloud and shadow mask refinement methods

## Modules

In [None]:
from tabulate import tabulate
import pandas as pd
import numpy as np
import requests
import torch
import segmentation_models_pytorch as smp
from creds import *
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from rasterio.windows import from_bounds
import rioxarray
import tacoreader
import rasterio as rio
import os


## Masks refinement - Performance, time & resources

### Functions

In [None]:
def plot_memory_usage(csv_path, note = ''):
    data = pd.read_csv(csv_path)
    total_time = data["Elapsed Time (s)"].iloc[-1]
    avg_memory = data["Memory Usage (MB)"].mean()
    max_memory = data["Memory Usage (MB)"].max()

    plt.figure(figsize=(10, 6))
    plt.plot(data["Elapsed Time (s)"], data["Memory Usage (MB)"], label="Memory Usage")
    plt.title(f"{note} - Memory Usage Over Time\nAvg Time: {total_time:.2f}s (~{(total_time / 60):.2f}m) | Avg Memory: {avg_memory:.2f}MB | Max Memory: {max_memory:.2f}MB")
    plt.xlabel("Elapsed Time (s)")
    plt.ylabel("Memory Usage (MB)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
def get_run_files(folder_path, id_prefix):

    return [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.startswith(id_prefix) and f.endswith('.csv')]

def read_memory_data(file_paths):

    data_frames = [pd.read_csv(file) for file in file_paths]
    return data_frames

def compute_stats(data_list):

    avg_memory = np.mean([df["Memory Usage (MB)"].mean() for df in data_list])
    max_memory = max(df["Memory Usage (MB)"].max() for df in data_list)
    total_time = np.mean([df["Elapsed Time (s)"].iloc[-1] for df in data_list])
    return avg_memory, max_memory, total_time

def plot_memory_usage_combined(id, new_folder, old_folder):

    files_new = get_run_files(new_folder, id)
    files_old = get_run_files(old_folder, id)
    data_new = read_memory_data(files_new)
    data_old = read_memory_data(files_old)
    
    avg_memory_new, max_memory_new, total_time_new = compute_stats(data_new)
    avg_memory_old, max_memory_old, total_time_old = compute_stats(data_old)
    
    fig, axes = plt.subplots(2, 1, figsize=(15, 20), sharex=True)
    
    fig.text(0.5, 0.965, f"Memory Usage Analysis - Tile {id}", fontsize=20, ha='center')
    fig.text(0.5, 0.945, f"Comparison Between New and Existing Version", fontsize=18, ha='center')
    fig.text(0.5, 0.900, \
             f"New Version: Avg Mem = {avg_memory_new:.2f}MB, Max Mem = {max_memory_new:.2f}MB, Average Time = {total_time_new:.2f}s\n"
             f"Existing Version: Avg Mem = {avg_memory_old:.2f}MB, Max Mem = {max_memory_old:.2f}MB, Average Time = {total_time_old:.2f}s",
             fontsize=16, ha='center')
    
    new_stats = "\n".join(
        [f"Run {i+1}: Avg: {df['Memory Usage (MB)'].mean():.2f}MB | Max: {df['Memory Usage (MB)'].max():.2f}MB | Time: {df['Elapsed Time (s)'].iloc[-1]:.2f}s" 
         for i, df in enumerate(data_new)]
    )
    for i, df in enumerate(data_new):
        axes[0].plot(df["Elapsed Time (s)"], df["Memory Usage (MB)"], label=f"Run {i+1}")
    axes[0].set_ylabel("Memory Usage (MB)")
    axes[0].set_title(f"Memory Usage for {id} (New Version)\n\n{new_stats}", fontsize=18)
    axes[0].legend()
    axes[0].grid()
    
    old_stats = "\n".join(
        [f"Run {i+1}: Avg: {df['Memory Usage (MB)'].mean():.2f}MB | Max: {df['Memory Usage (MB)'].max():.2f}MB | Time: {df['Elapsed Time (s)'].iloc[-1]:.2f}s" 
         for i, df in enumerate(data_old)]
    )
    for i, df in enumerate(data_old):
        axes[1].plot(df["Elapsed Time (s)"], df["Memory Usage (MB)"], label=f"Run {i+1}")
    axes[1].set_xlabel("Elapsed Time (s)")
    axes[1].set_ylabel("Memory Usage (MB)")
    axes[1].set_title(f"Memory Usage for {id} (Existing Version)\n\n{old_stats}", fontsize=18, pad=15)
    axes[1].legend()
    axes[1].grid()
    
    plt.tight_layout(rect=[0, 0.03, 1, 0.88])
    plt.savefig("lcmap_memory_usage.png", dpi=300)
    plt.show()

### Single files inspection

In [None]:
plot_memory_usage('/media/datapart/lucazanolo/S2_processed_masks/logs/T18NWL_1.csv', note="Run 1 - Reimplemented Pipeline - Masks Refinemenet for tile 18NWL")
plot_memory_usage('/media/datapart/lucazanolo/S2_processed_masks/logs/T18NWL_2.csv', note = "Run 2 - Reimplemented Pipeline - Masks Refinemenet for tile 18NWL")
plot_memory_usage('/media/datapart/lucazanolo/S2_processed_masks_old/logs/T18NWL_1.csv', note="Run 1 - Existing Pipeline - Masks Refinemenet for tile 18NWL")


### Tile processing report

In [None]:
id = "T10UED"
new_folder = "/media/datapart/lucazanolo/S2_processed_masks/logs"
old_folder = "/media/datapart/lucazanolo/S2_processed_masks_old/logs"
plot_memory_usage_combined(id, new_folder, old_folder)

### Evaluate Cloud and Shadow masks with CloudSEN12

#### Functions

In [None]:
def print_dict(config_map, title = None):
    """ Structured print for a dict object"""
    if title:
        print(title)
        
    for i, (key, value) in enumerate(config_map.items(), 1):
        print(f"{i} - {key} : {value}")
        
def print_dataframe(df, title='', columns_to_exclude=[]):
    """ Structured print for a DataFrame object"""

    if title:
        print(title)
    if df is not None and len(df) != 0:
        df_to_show = df.drop(columns=columns_to_exclude)
        print(tabulate(df_to_show, headers='keys', tablefmt='psql'))
    else:
        print("No data to display.")
        
def get_season(month):
    """
    Return an id built with the first letter of the months of each season.
    """
    if month in [12, 1, 2]:
        return 'DJF'  # Winter (December, January, February)
    elif month in [3, 4, 5]:
        return 'MAM'  # Spring (March, April, May)
    elif month in [6, 7, 8]:
        return 'JJA'  # Summer (June, July, August)
    else:
        return 'SON'  # Fall (September, October, November)


def load_cloussen12_metadata():
    
    # Load Cloudsen12 data and analyze for which seasons there 3 or more images of the same tile available -> keep the images in these groups
    ds = tacoreader.load("tacofoundation:cloudsen12-l2a")
    ds['s2_date'] = pd.to_datetime(ds['s2_id'].str[11:19], format='%Y%m%d')

    ds = ds[ds["label_type"] == "high"]
    ds = ds[ds["real_proj_shape"] == 2000]

    # Generating 4 new columns
    # Used to extract MM-YYYY cloumn
    ds["YearMonth"] = pd.to_datetime(ds["s2_date"]).dt.to_period("M")

    # Used to extract one of [DJF, MAM, JJA, SON] season ids.
    ds["season"] = ds["YearMonth"].dt.month.apply(get_season)

    # Create combined id to distinguish between seasons of different years
    ds["YearSeason"] = ds["season"] + ds["YearMonth"].dt.year.astype(str)

    # Add a column with the explicit tile id
    ds["tile"] = ds["s2_id"].str[39:44]
    
    return ds


def evaluate_with_classification_report(ground_truth, predicted_mask, label="Cloud"):
    print(ground_truth.shape, predicted_mask.shape)

    y_true = ground_truth.flatten()[:2000*2000]
    y_pred = predicted_mask.flatten()

    print("Shapes:", "y_true:", y_true.shape, "y_pred:", y_pred.shape)
    # Ensure integer values
    y_true = np.nan_to_num(y_true, nan=0).astype(int)
    y_pred = np.nan_to_num(y_pred, nan=0).astype(int)

    unique_labels = np.unique(np.concatenate([y_true, y_pred]))
    
    if len(unique_labels) < 2:
        print(f"Warning: Not enough classes found in ground truth for {label} evaluation.")
        return "Insufficient class variety for evaluation"

    report = classification_report(y_true, y_pred, target_names=[f"No {label}", label], zero_division=0)

    return report

def crop_3d(array, target_shape=(2000, 2000)):

    return array[:, :target_shape[0], :target_shape[1]]

def crop_2d(array, target_shape=(2000, 2000)):

    return array[:target_shape[0], :target_shape[1]]

def read_cloudsen12_sample(dt, sample_idx):
    s2 = dt.read(sample_idx).read(0)
    s2_label = dt.read(sample_idx).read(1)

    with rio.open(s2) as src, rio.open(s2_label) as dst:
        s2_data = src.read([4, 3, 2])
        s2_label_data = dst.read()
        
        s2_data = crop_3d(s2_data, target_shape=(2000, 2000))
        s2_label_data = crop_3d(s2_label_data, target_shape=(2000, 2000))

        patch_bounds = dst.bounds
        crs = dst.crs

    return s2_data, s2_label_data, patch_bounds, crs

def read_patch_with_window(src_path, bounds, target_crs):
    """
    Reads a raster patch using a window defined by bounds.
    """
    with rio.open(src_path) as src:
        if src.crs != target_crs:
            raise ValueError(f"CRS mismatch: Source CRS is {src.crs}, but expected {target_crs}")
        window = from_bounds(*bounds, transform=src.transform)
        patch = src.read(1, window=window)
    return patch

def evaluation(paths : dict, tile):
    
    ds = load_cloussen12_metadata()
    filtered_subset = ds.groupby(["tile", "YearSeason"]) \
        .filter(lambda x: len(x) >= 3) \
        .sort_values("tile")
    filtered_subset = filtered_subset[filtered_subset['tile'] == tile]

    #sen2cor_cloud_src = rioxarray.open_rasterio(paths.values()[0]).squeeze()
    #refined_cloud_src = rioxarray.open_rasterio(paths.values()[1]).squeeze()
    #sen2cor_shadow_src = rioxarray.open_rasterio(paths.values()[2]).squeeze()
    #refined_shadow_src = rioxarray.open_rasterio(paths.values()[3]).squeeze()

    print_dataframe(filtered_subset, f"Filtered subset of samples -> {len(filtered_subset)} samples")
    print("Spatial information:\n")
    #print_dict(refined_cloud_src.spatial_ref.attrs)

    all_cloud_gt = []
    all_sen2cor_cloud_pred = []
    all_refined_cloud_pred = []
    all_shadow_gt = []
    all_sen2cor_shadow_pred = []
    all_refined_shadow_pred = []

    verbose = False
    
    for i in range(len(filtered_subset)):
        
        print(f"\n\nEvaluating for patch {i}:")
        print(f" - Downloading data points from CloudSen12 ...")
        datapoint, datapoint_gt, patch_bounds, crs = read_cloudsen12_sample(filtered_subset, sample_idx = i)
        datapoint_gt = datapoint_gt.squeeze()
        info = filtered_subset.iloc[0]

        cloud_gt = np.where((datapoint_gt == 1) | (datapoint_gt == 2), 1, 0)
        shadow_gt = np.where(datapoint_gt == 3, 1, 0)
        

        if verbose:
            print_dataframe(pd.DataFrame(info), title = " - Current datapoint info")
        
        refined_cloud_patch = read_patch_with_window(list(paths.values())[0], patch_bounds, crs)
        refined_shadow_patch = read_patch_with_window(list(paths.values())[1], patch_bounds, crs)
        sen2cor_cloud_patch = read_patch_with_window(list(paths.values())[2], patch_bounds, crs)
        sen2cor_shadow_patch = read_patch_with_window(list(paths.values())[3], patch_bounds, crs)

        print(" - Datapoints downloaded, shape: ", datapoint.shape)
        print(" - Datapoints gt downloaded, shape: ", datapoint_gt.shape)
        print(" - sen2cor_cloud_patch, shape: ", sen2cor_cloud_patch.shape)
        print(" - refined_cloud_patch, shape: ", refined_cloud_patch.shape)
        print(" - sen2cor_shadow_patch, shape: ", sen2cor_shadow_patch.shape)
        print(" - refined_shadow_patch, shape: ", refined_shadow_patch.shape)
        
        # Define minimal common shape for alignment
        min_x = min(sen2cor_cloud_patch.shape[0], datapoint_gt.shape[0])
        min_y = min(sen2cor_cloud_patch.shape[1], datapoint_gt.shape[1])
        final_shape = (min_x, min_y)

        # Top-left crop to align all patches
        cloud_gt = crop_2d(cloud_gt, final_shape)
        shadow_gt = crop_2d(shadow_gt, final_shape)
        sen2cor_cloud_patch = crop_2d(sen2cor_cloud_patch, final_shape)
        refined_cloud_patch = crop_2d(refined_cloud_patch, final_shape)
        sen2cor_shadow_patch = crop_2d(sen2cor_shadow_patch, final_shape)
        refined_shadow_patch = crop_2d(refined_shadow_patch, final_shape)

        print(f" - Final cropped shape for all patches: {final_shape}")
        print(" - Datapoint classes distribution: ", np.unique(datapoint_gt, return_counts=True))
        print(" - Cloud ground truth: ", np.unique(cloud_gt, return_counts=True))
        print(" - Shadow ground truth: ", np.unique(shadow_gt, return_counts=True))
        print(f" - Unique values in Sen2Cor cloud mask patch: {np.unique(sen2cor_cloud_patch, return_counts=True)}")
        print(f" - Unique values in Refined cloud mask patch: {np.unique(refined_cloud_patch, return_counts=True)}")
        print(f" - Unique values in Sen2Cor shadow mask patch: {np.unique(sen2cor_shadow_patch, return_counts=True)}")
        print(f" - Unique values in Refined shadow mask patch: {np.unique(refined_shadow_patch, return_counts=True)}")

        all_cloud_gt.extend(cloud_gt.flatten())
        all_sen2cor_cloud_pred.extend(sen2cor_cloud_patch.flatten())
        all_refined_cloud_pred.extend(refined_cloud_patch.flatten())

        all_shadow_gt.extend(shadow_gt.flatten())
        all_sen2cor_shadow_pred.extend(sen2cor_shadow_patch.flatten())
        all_refined_shadow_pred.extend(refined_shadow_patch.flatten())
        
    print("\nSen2Cor Cloud Mask Evaluation:\n", classification_report(all_cloud_gt, all_sen2cor_cloud_pred, zero_division=0, target_names=['No Cloud','Cloud']))
    print("\nRefined Cloud Mask Evaluation:\n", classification_report(all_cloud_gt, all_refined_cloud_pred, zero_division=0, target_names=['No Cloud','Cloud']))
    print("\nSen2Cor Shadow Mask Evaluation:\n", classification_report(all_shadow_gt, all_sen2cor_shadow_pred, zero_division=0, target_names=['No Shadow','Shadow']))
    print("\nRefined Shadow Mask Evaluation:\n", classification_report(all_shadow_gt, all_refined_shadow_pred, zero_division=0,target_names=['No Shadow','Shadow'] ))

### CloudSen12 - Inspect and filter samples

In [None]:

cloudsen12_metadata = load_cloussen12_metadata()
# Look how many data are available for each YearSeason in CloudSen12 for training
# Filtered to keep data of tiles with more than 3 samples.

GROUP_MIN = 4
TILE = "10UED"

filtered_subset = cloudsen12_metadata.groupby(["tile", "YearSeason"]).filter(lambda x: len(x) >= GROUP_MIN).sort_values("tile")[cloudsen12_metadata['tile'] == TILE]
print_dataframe(filtered_subset, f"Filtered subset of samples -> {len(filtered_subset)} samples")

# Dataframe not filtered grouped by tile, to understand how many samples are available for each tile

grouped_counts = cloudsen12_metadata.groupby(["tile", "YearSeason"]).size().reset_index(name="image_count")
print_dataframe(grouped_counts[grouped_counts["image_count"] >= GROUP_MIN], f"Retained groups of {GROUP_MIN} or more elements of the same tile per season")

### CloudSEN12 - Inspect patch data

In [None]:
# Select one image from the test dataset

from matplotlib.patches import Patch
from matplotlib.colors import ListedColormap
idx = 1
subset = filtered_subset.iloc[idx]
datapoint, datapoint_gt, patch_bounds, crs = read_cloudsen12_sample(filtered_subset, sample_idx = idx)
datapoint_gt = datapoint_gt.squeeze()
print(datapoint_gt.shape, datapoint.shape)

# RGB normalization
datapoint_rgb = datapoint.transpose(1, 2, 0) / 3000

# Colormap personalizzata per le classi 0-3
cloudsen_cmap = ListedColormap(['skyblue', 'white', 'lightgray', 'black'])

# Legenda corrispondente
legend_gt = [
    Patch(color='skyblue', label='0 - Clear'),
    Patch(color='white', label='1 - Thick Cloud'),
    Patch(color='lightgray', label='2 - Thin Cloud'),
    Patch(color='black', label='3 - Cloud Shadow'),
]

# Colormap binaria e legenda per le maschere
legend_binary1 = [
    Patch(color='black', label='No Cloud'),
    Patch(color='white', label='Cloud Pixel'),
]

legend_binary2 = [
    Patch(color='black', label='No Shadow'),
    Patch(color='white', label='Shadow Pixel'),
]

fig, ax = plt.subplots(2, 2, figsize=(10, 10))

# RGB patch (no legend)
ax[0, 0].imshow(datapoint_rgb)
ax[0, 0].set_title("RGB patch")
ax[0, 0].axis('off')

# Human annotated patch
ax[0, 1].imshow(datapoint_gt, cmap=cloudsen_cmap, vmin=0, vmax=3)
ax[0, 1].set_title("Human annotated patch")
ax[0, 1].axis('off')
ax[0, 1].legend(handles=legend_gt, loc='upper right', fontsize=8, frameon=True)

# Cloud mask (1 or 2 → 1)
cloud_mask = np.where((datapoint_gt == 1) | (datapoint_gt == 2), 1, 0)
ax[1, 0].imshow(cloud_mask, cmap="gray", vmin=0, vmax=1)
ax[1, 0].set_title("Cloud mask")
ax[1, 0].axis('off')
ax[1, 0].legend(handles=legend_binary1, loc='upper right', fontsize=8, frameon=True)

# Shadow mask (3 → 1)
shadow_mask = np.where(datapoint_gt == 3, 1, 0)
ax[1, 1].imshow(shadow_mask, cmap="gray", vmin=0, vmax=1)
ax[1, 1].set_title("Shadow mask")
ax[1, 1].axis('off')
ax[1, 1].legend(handles=legend_binary2, loc='upper right', fontsize=8, frameon=True)

plt.tight_layout()
plt.show()

#### Download UnetmobV2


In [None]:

# Download the model
model_path = "https://huggingface.co/datasets/isp-uv-es/CloudSEN12Plus/resolve/main/demo/models/UNetMobV2_V2.pt"
local_model_path = "../models/UNetMobV2_V2.pt"
with requests.get(model_path, stream=True) as r:
    with open(local_model_path, "wb") as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)

# Load the weights into the model
model_v2 = smp.Unet(encoder_name="mobilenet_v2", encoder_weights=None, classes=4, in_channels=13)
model_v2.load_state_dict(torch.load(local_model_path, map_location=torch.device('cpu')))

# Desactivate the gradient estimation
for param in model_v2.parameters():
    param.requires_grad = False

model_v2 = model_v2.eval()

### T18NWL - Existing pipeline

In [None]:
t18nwl_sen2cor_masks = {
    "sen2cor_cloud_mask" : "/media/datapart/lucazanolo/S2_processed_masks_old/cloud_masks/MSIL2A_20190223T152641_N0500_R025_T18NWL_20221213T203925_cloudMediumMask_Sen2Cor.tif",
    "sen2cor_shadow_mask" : "/media/datapart/lucazanolo/S2_processed_masks_old/shadow_masks/MSIL2A_20190223T152641_N0500_R025_T18NWL_20221213T203925_shadowMask_Sen2Cor.tif"
}
t18nwl_old = {
    "refined_cloud_mask" : "/media/datapart/lucazanolo/S2_processed_masks_old/cloud_masks/MSIL2A_20190223T152641_N0500_R025_T18NWL_20221213T203925_cloudMediumMask.tif",
    "refined_shadow_mask" : "/media/datapart/lucazanolo/S2_processed_masks_old/shadow_masks/MSIL2A_20190223T152641_N0500_R025_T18NWL_20221213T203925_shadowMask.tif"
}
t18nwl_old.update(t18nwl_sen2cor_masks)
print_dict(t18nwl_old, title="Specified paths:")
evaluation(t18nwl_old, tile='18NWL')

In [None]:
Refined Cloud Mask Evaluation:
               precision    recall  f1-score   support

    No Cloud       0.79      0.29      0.43   8610243
       Cloud       0.51      0.90      0.65   7097757

    accuracy                           0.57  15708000
   macro avg       0.65      0.60      0.54  15708000
weighted avg       0.66      0.57      0.53  15708000

Refined Cloud Mask Evaluation:
               precision    recall  f1-score   support

    No Cloud       0.83      0.38      0.52   8610243
       Cloud       0.55      0.91      0.68   7097757

    accuracy                           0.62  15708000
   macro avg       0.69      0.64      0.60  15708000
weighted avg       0.70      0.62      0.59  15708000

### T18NWL - Reimplemented pipeline

In [None]:
t18nwl_new = {
    "refined_cloud_mask" : "/media/datapart/lucazanolo/S2_processed_masks/cloud_masks/MSIL2A_20190223T152641_N0500_R025_T18NWL_20221213T203925_cloudMediumMask.tif",
    "refined_shadow_mask" : "/media/datapart/lucazanolo/S2_processed_masks/shadow_masks/MSIL2A_20190223T152641_N0500_R025_T18NWL_20221213T203925_shadowMask.tif"
}
t18nwl_new.update(t18nwl_sen2cor_masks)
print_dict(t18nwl_new, title="Specified paths:")
evaluation(t18nwl_new, tile = "18NWL")

### T10UED - Existing pipeline

In [None]:
t10ued_sen2cor_masks = {
    "sen2cor_cloud_mask" : "/media/datapart/lucazanolo/S2_processed_masks_old/cloud_masks/MSIL2A_20191006T192251_N0500_R099_T10UED_20230702T041139_cloudMediumMask_Sen2Cor.tif",
    "sen2cor_shadow_mask" : "/media/datapart/lucazanolo/S2_processed_masks_old/shadow_masks/MSIL2A_20191006T192251_N0500_R099_T10UED_20230702T041139_shadowMask_Sen2Cor.tif"
}
t10ued_old = {
    "refined_cloud_mask" : "/media/datapart/lucazanolo/S2_processed_masks_old/cloud_masks/MSIL2A_20191006T192251_N0500_R099_T10UED_20230702T041139_cloudMediumMask.tif",
    "refined_shadow_mask" : "/media/datapart/lucazanolo/S2_processed_masks_old/shadow_masks/MSIL2A_20191006T192251_N0500_R099_T10UED_20230702T041139_shadowMask.tif"
}
t10ued_old.update(t10ued_sen2cor_masks)
print_dict(t10ued_old, title="Specified paths:")
evaluation(t10ued_old, tile = "10UED")

### T10UED - Reimplemented pipeline

In [None]:
t10ued_new = {
    "refined_cloud_mask" : "/media/datapart/lucazanolo/S2_processed_masks/cloud_masks/MSIL2A_20191006T192251_N0500_R099_T10UED_20230702T041139_cloudMediumMask.tif",
    "refined_shadow_mask" : "/media/datapart/lucazanolo/S2_processed_masks/shadow_masks/MSIL2A_20191006T192251_N0500_R099_T10UED_20230702T041139_shadowMask.tif",
}
t10ued_new.update(t10ued_sen2cor_masks)
print_dict(t10ued_new, title="Specified paths:")
evaluation(t10ued_new, tile = "10UED")

## LC maps generation - Performance, time & resources

### Functions

In [None]:
def plot_memory_usage_combined(id, new_folder, old_folder):

    files_new = get_run_files(new_folder, id)
    files_old = get_run_files(old_folder, '1map')
    data_new = read_memory_data(files_new)
    data_old = read_memory_data(files_old)
    
    avg_memory_new, max_memory_new, total_time_new = compute_stats(data_new)
    avg_memory_old, max_memory_old, total_time_old = compute_stats(data_old)
    
    fig, axes = plt.subplots(2, 1, figsize=(15, 20), sharex=True)
    
    fig.text(0.5, 0.965, f"Memory Usage Analysis - Land Cover map generation", fontsize=20, ha='center')
    fig.text(0.5, 0.945, f"Comparison Between New and Existing Version", fontsize=18, ha='center')
    fig.text(0.5, 0.900, \
             f"New Version: Avg Mem = {avg_memory_new:.2f}MB, Max Mem = {max_memory_new:.2f}MB, Average Time = {total_time_new:.2f}s\n"
             f"Existing Version: Avg Mem = {avg_memory_old:.2f}MB, Max Mem = {max_memory_old:.2f}MB, Average Time = {total_time_old:.2f}s",
             fontsize=16, ha='center')
    
    new_stats = "\n".join(
        [f"Run {i+1}: Avg: {df['Memory Usage (MB)'].mean():.2f}MB | Max: {df['Memory Usage (MB)'].max():.2f}MB | Time: {df['Elapsed Time (s)'].iloc[-1]:.2f}s" 
         for i, df in enumerate(data_new)]
    )
    for i, df in enumerate(data_new):
        axes[0].plot(df["Elapsed Time (s)"], df["Memory Usage (MB)"], label=f"Run {i+1}")
    axes[0].set_ylabel("Memory Usage (MB)")
    axes[0].set_title(f"Memory Usage for {id} (New Version)\n\n{new_stats}", fontsize=18)
    axes[0].legend()
    axes[0].grid()
    
    old_stats = "\n".join(
        [f"Run {i+1}: Avg: {df['Memory Usage (MB)'].mean():.2f}MB | Max: {df['Memory Usage (MB)'].max():.2f}MB | Time: {df['Elapsed Time (s)'].iloc[-1]:.2f}s" 
         for i, df in enumerate(data_old)]
    )
    for i, df in enumerate(data_old):
        axes[1].plot(df["Elapsed Time (s)"], df["Memory Usage (MB)"], label=f"Run {i+1}")
    axes[1].set_xlabel("Elapsed Time (s)")
    axes[1].set_ylabel("Memory Usage (MB)")
    axes[1].set_title(f"Memory Usage for {id} (Existing Version)\n\n{old_stats}", fontsize=18, pad=15)
    axes[1].legend()
    axes[1].grid()
    
    plt.tight_layout(rect=[0, 0.03, 1, 0.88])
    plt.savefig("lcmap_memory_usage.png", dpi=300)
    plt.show()
    

def plot_memory_usage(csv_path, note = ''):
    data = pd.read_csv(csv_path)
    total_time = data["Elapsed Time (s)"].iloc[-1]
    avg_memory = data["Memory Usage (MB)"].mean()
    max_memory = data["Memory Usage (MB)"].max()

    plt.figure(figsize=(10, 6))
    plt.plot(data["Elapsed Time (s)"], data["Memory Usage (MB)"], label="Memory Usage")
    plt.title(f"{note} - Memory Usage Over Time\nTime: {total_time:.2f}s (~{(total_time / 60):.2f}m) | Avg Memory: {avg_memory:.2f}MB | Max Memory: {max_memory:.2f}MB")
    plt.xlabel("Elapsed Time (s)")
    plt.ylabel("Memory Usage (MB)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

### Generate reports

In [None]:
#plot_memory_usage('/media/datapart/lucazanolo/SVM_old/classification/logs/cloud-detection_2025-04-23-07-59.csv', note="Existing Pipeline - LC map generation")
id = "4map"
new_folder = "/media/datapart/lucazanolo/SVM/lc_maps/logs"
old_folder = "/media/datapart/lucazanolo/SVM_old/classification/logs"
plot_memory_usage_combined(id, new_folder, old_folder)