In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data_path = "./../output/cls_time_series.csv"
df = pd.read_csv(data_path, index_col=0)

In [None]:
df

In [None]:
# Calculate the percentage of 'null_px' values for each row
df['null_px_percentage'] = (df['null_px'] / df.drop(['name', 'date'], axis=1).sum(axis=1)) * 100

# Set a threshold for the percentage of 'null_px' values to consider as a majority
threshold = 50  # Adjust this threshold as needed

# Filter the DataFrame to keep rows where the 'null_px' percentage is below the threshold
df = df[df['null_px_percentage'] < threshold]

# Drop the 'null_px_percentage' column if you no longer need it
df = df.drop('null_px_percentage', axis=1)

# Now 'filtered_df' contains rows where 'null_px' is not the majority
df

In [None]:
names = list(df["name"].unique())

# Define colors for each category
colors = {
    "land_px": "green",
    "snow_px": "pink",
    "cloud_px": "grey",
    "null_px": "black",
    "uncertain_px": "orange",
}

for name in names:
    sub_df = df[df["name"] == name]

     # Convert the "date" column to datetime if it's not already
    sub_df["date"] = pd.to_datetime(sub_df["date"])

    # Sort the DataFrame by date
    sub_df = sub_df.sort_values(by="date")

    # Merge a few columns
    sub_df["land_px"] = sub_df["vegetation_px"] + sub_df["not_vegetation_px"] + sub_df["shadow_px"]

    # Create a figure and axis for the plot
    fig, ax = plt.subplots(figsize=(18, 8))

    # List of columns to plot (excluding "name" and "date" columns)
    columns_to_plot = ["snow_px", "cloud_px", "land_px", "water_px", "uncertain_px", "null_px"]

    # Convert to units of km^2 using 20m^2 pixel resolution
    sub_df[columns_to_plot] *= 400 * 1e-6

    # Initialize an array for cumulative sums
    cumulative_sum = 0

    # Plot each category with the specified color
    legend_labels = []  # To store legend labels for merged columns
    for column in columns_to_plot:
        ax.fill_between(
            sub_df["date"],
            cumulative_sum,
            cumulative_sum + sub_df[column],
            label=column,
            color=colors.get(column, "blue"),
        )
        cumulative_sum += sub_df[column]

    # Remove axis border
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    #ax.spines['bottom'].set_visible(False)
    #ax.spines['left'].set_visible(False)

    # Customize the plot
    ax.set_title(f"Stacked Time Series Plot for {name}")
    ax.set_xlabel("Date")
    ax.set_ylabel("Area [km^2]")
    
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))

    ax.grid(axis='x', linestyle='--', alpha=1.0, color="black")


    # Create a second subplot below the main plot, sharing the same x-axis
    ax2 = ax.inset_axes([0, -0.2, 1, 0.1])

    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    #ax2.spines['bottom'].set_visible(False)
    ax2.spines['left'].set_visible(False)

    # Add markers for every time point where the data is measured
    ax2.plot(sub_df["date"], [1] * len(sub_df), 'o', color='black', markersize=4, markeredgewidth=1)

    # Hide the y-axis labels and ticks for the second subplot
    ax2.set_yticklabels([])
    ax2.set_yticks([])
    ax2.set_ylabel("Measurements")
    ax2.grid(axis='x', linestyle='--', alpha=1.0, color="black")

    # Adjust spacing between the two subplots
    fig.subplots_adjust(hspace=0.05)
    fig.tight_layout()
    
    # Display or save the plot
    plt.show()
    fig.savefig(f"timeseries_{name}.png")
    plt.close()

    