In [20]:
import wandb
import matplotlib.pyplot as plt
import dice_ml
import warnings
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import f1_score, make_scorer, roc_auc_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
import os
from sklearn.preprocessing import MinMaxScaler
from scripts.SEP_CFE_functions import *

min_max_scaler = MinMaxScaler()
warnings.filterwarnings("ignore")
scale_factor = 1e3

In [21]:
combined_df = pd.read_csv("../../data/processed/goes18_2024_features_labels.csv",sep=",")
print(combined_df.head())
print(combined_df.columns)

                             input_filename  xrsa[0:120]_mean  \
0  GOES18_2024_0000_20240101_0000_input.csv      1.177751e-07   
1  GOES18_2024_0001_20240101_0200_input.csv      2.562598e-08   
2  GOES18_2024_0002_20240101_0400_input.csv      9.965425e-09   
3  GOES18_2024_0003_20240101_0600_input.csv      1.561024e-08   
4  GOES18_2024_0004_20240101_0800_input.csv      5.757184e-07   

   xrsa[0:60]_mean  xrsa[0:30]_mean  xrsa[0:15]_mean  xrsa[0:7]_mean  \
0     1.585111e-07     2.000465e-07     2.360244e-07    2.621947e-07   
1     3.107531e-08     3.511606e-08     3.661682e-08    3.710156e-08   
2     1.090954e-08     1.257245e-08     1.304101e-08    1.300326e-08   
3     1.177995e-08     8.713668e-09     1.366712e-08    2.128694e-08   
4     9.745299e-07     2.278376e-07     3.499181e-07    4.294937e-07   

   xrsa[0:3]_mean  xrsa[3:7]_mean  xrsa[7:15]_mean  xrsa[7:11]_mean  \
0    2.780908e-07    2.502726e-07     2.131253e-07     2.237296e-07   
1    3.822583e-08    3.625835e-08 

In [22]:
best_rf = joblib.load('../../models/RandomForestClassifier_model_goes18_xrs_cfe.pkl')
exp_genetic = joblib.load("../../models/goes18_xrs_2024_cfe_genetic_explainer.pkl")
exp_random = joblib.load("../../models/goes18_xrs_2024_cfe_random_explainer.pkl")

In [23]:
def get_query_instance(query_ts_filename, df_combined_labels, model):
    """
    Retrieve the query instance and its target label for a given filename.

    Args:
           query_ts_filename (str): The filename identifying the query instance.
           df_combined_labels (pd.DataFrame): DataFrame containing labeled data.
           model: A trained classifier with a `.predict()` method.

    Returns:
        Tuple[pd.DataFrame, Any]: The query instance (features only) and its true label.
     """
    query_instance_raw = df_combined_labels[df_combined_labels['input_filename'] == query_ts_filename]
    query_instance = query_instance_raw.drop(["label", 'input_filename','binary_label'], axis=1)*scale_factor
    query_instance = query_instance.fillna(0)
    true_label = query_instance_raw["binary_label"].values[0]
    predicted_label = model.predict(query_instance)

    print("Target value for the query instance:\n", true_label)
    print("Predicted value:\n", predicted_label)

    return query_instance, true_label, predicted_label

In [24]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [27]:
query_ts1_filename ='GOES18_2024_3628_20241029_0800_input.csv'
# query_ts1_filename ='GOES18_2024_4390_20241231_2000_input.csv'
# query_ts1_filename ='GOES18_2024_0026_20240103_0400_input.csv'
query_instance1, target_val1,predicted_val= get_query_instance(query_ts1_filename, df_combined_labels=combined_df, model=best_rf)
query_instance1 = query_instance1.astype(float)
query_instance1 = query_instance1.fillna(0)

Target value for the query instance:
 0
Predicted value:
 [0]


In [1]:
cfe_exp=[]
cfe_genetic = exp_genetic.generate_counterfactuals(
    query_instance1,  #multiple query instances can be provided as an input dataframe
    total_CFs=4,  # Number of counterfactual examples to generate
    desired_class=1#'opposite' #,  # Specify the desired class for a continuous target variable
    )
cfe_genetic.visualize_as_dataframe(show_only_changes=True)
cfe_df = cfe_genetic.cf_examples_list[0].final_cfs_df

# df_cfe_exp


NameError: name 'exp_genetic' is not defined

In [None]:
cfe_exp.append(cfe_df)  # Append each DataFrame
df_cfe_exp_genetic_2 = pd.concat(cfe_exp, ignore_index=True)
df_cfe_exp_genetic_2.drop_duplicates()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates

window_size = 10  # Smoothing window

fig, axs = plt.subplots(2, 2, figsize=(14, 10), facecolor='white')
axs = axs.flatten()  # Flatten for easy iteration

for cfe_idx in range(4):
    sample_cfe = df_cfe_exp_genetic_2.iloc[cfe_idx]

    # p3
    flux_type = 'p3_flux_ic'
    df_obs, final_series_p3, original_p3, min_y_p3, max_y_p3 = get_pertubed_series(
        csv_path, sample_cfe=sample_cfe, flux_type=flux_type, slices=slices,
        start_offset_min=300, end_offset_min=660)
    original_p3_smooth = original_p3.rolling(window=window_size, center=True).mean()
    final_series_p3_smooth = pd.Series(final_series_p3).rolling(window=window_size, center=True).mean()

    # p5
    flux_type = 'p5_flux_ic'
    df_obs_1, final_series_p5, original_p5, min_y_p5, max_y_p5 = get_pertubed_series(
        csv_path, sample_cfe=sample_cfe, flux_type=flux_type, slices=slices,
        start_offset_min=300, end_offset_min=660)
    original_p5_smooth = original_p5.rolling(window=window_size, center=True).mean()
    final_series_p5_smooth = pd.Series(final_series_p5).rolling(window=window_size, center=True).mean()

    # p7
    flux_type = 'p7_flux_ic'
    df_obs_2, final_series_p7, original_p7, min_y_p7, max_y_p7 = get_pertubed_series(
        csv_path, sample_cfe=sample_cfe, flux_type=flux_type, slices=slices,
        start_offset_min=300, end_offset_min=660)
    original_p7_smooth = original_p7.rolling(window=window_size, center=True).mean()
    final_series_p7_smooth = pd.Series(final_series_p7).rolling(window=window_size, center=True).mean()

    global_max_y = max(max_y_p3, max_y_p5, max_y_p7)

    ax = axs[cfe_idx]
    ax.set_facecolor('white')

    # Plot lines
    ax.plot(df_obs['time_stamp'], original_p3_smooth, label="Original p3_flux_ic(SEP Event)", color="#FF0000")
    ax.plot(df_obs['time_stamp'], final_series_p3_smooth, label="Smoothed CFE for p3_flux_ic(Non-SEP Event)", color="#FF0000", linestyle='dashed')
    ax.plot(df_obs_1['time_stamp'], original_p5_smooth, label="Original p5_flux_ic", color="#1b5c0c")
    ax.plot(df_obs_1['time_stamp'], final_series_p5_smooth, label="Smoothed CFE for p5_flux_ic", color="#1b5c0c", linestyle='dashed')
    ax.plot(df_obs_2['time_stamp'], original_p7_smooth, label="Original p7_flux_ic", color="#FFA500")
    ax.plot(df_obs_2['time_stamp'], final_series_p7_smooth, label="Smoothed CFE for p7_flux_ic", color="#FFA500", linestyle='dashed')

    ax.set_xlabel("Period of Observation", fontsize=9)
    ax.set_ylabel("Particle Flux Unit (log scale)", fontsize=9)
    ax.set_yscale('log')
    ax.set_ylim(bottom=0.01, top=4)
    ax.set_yticks([0.1, 0.5, 1, global_max_y])
    ax.get_yaxis().set_major_formatter(plt.ScalarFormatter())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
    plt.setp(ax.get_xticklabels(), rotation=10, ha='center', fontsize=8)
    plt.setp(ax.get_yticklabels(), fontsize=8)
    # if cfe_idx == 0:
    #     ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=3, frameon=False, fontsize=8)
    ax.set_title(f'CFE #{cfe_idx + 1}', fontsize=10)

handles, labels = axs[0].get_legend_handles_labels()
fig.legend(
    handles, labels,
    loc='upper center',
    bbox_to_anchor=(0.5, 0.995),  # Centered above the grid
    ncol=3,
    frameon=False,
    fontsize=8
)
plt.tight_layout(rect=[0, 0, 1, 0.96])  # Leave space for the legend at the top
plt.show()

wandb.log({"CFE generated using Constrained Dice Class": wandb.Image(fig)})
