# SVM pipeline

## Modules

In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os

# Get the parent directory of the current notebook
parent_dir = os.path.abspath(os.path.join(os.getcwd(), "../src"))

# Add the parent directory to sys.path
sys.path.insert(0, parent_dir)

from svm_pipeline import (
    train_svm, 
    test_svm,
    multiclass_svm_calibration_curves,
    binary_svms_calibration_curves,
    create_next_experiment_folder,
    preprocess_features,
    split_dataset,
    features_selection,
    SVMS,
    SVMS_prediction_test,
    save_SVMS, 
    load_SVMS,
    correlation_analysis,
    features_importance_analysis,
    store_ma_output,
    store_rfecv_output,
    standardize_dict,
    train_binary_svms,
    apply_pca,
    save_experiment_result,
    get_scaler,
    grid_search,
    reload_and_apply_PCA)

from scripting import (
    df_numerical_columns_stats, 
    print_dict, 
    print_dataframe,
    print_list
)

import numpy as np
import pandas as pd
import yaml
import re
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.decomposition import PCA
from ast import literal_eval
from sklearn.calibration import CalibratedClassifierCV

def read_yaml(file_path: str) -> dict:
    with open(file_path, 'r') as yaml_file: return yaml.safe_load(yaml_file)
    
def fix_paths_for_nb(input_dict, old_substring = "/home/hrlcuser/media", new_substring = "/media/datapart/lucazanolo"):
    return {
        key: (value.replace(old_substring, new_substring) if isinstance(value, str) else value)
        for key, value in input_dict.items()
    }


## Parameters

In [None]:
parameters = fix_paths_for_nb(read_yaml("/home/lucazanolo/luca-zanolo/scripts/config_files/6.svm_pipeline.yaml"))
parameters = {k:v for k,v in parameters.items() if not k.startswith("GridSearch")}

os.makedirs(parameters['output_path'], exist_ok=True)
experiment_path = create_next_experiment_folder(parameters["output_path"])

parameters["dataset_type"] = parameters['dataset_path'].split('_')[-1][:-4]
parameters["features_inspection_path"] = f"{experiment_path}/features_info"
parameters["svm_model_path"] = f"{experiment_path}/svm.pkl"

parameters["calibrated_model_path"] = f"{experiment_path}/svm_calibrated.pkl"
parameters["calibration_curves_path"] = f"{experiment_path}/calibration_curves"
parameters["performance_csv_path"] = f"{parameters['output_path']}/experiments.csv"
train_class_distribution_path = f"{parameters['features_inspection_path']}/train_class_distribution.png"
test_class_distribution_path = f"{parameters['features_inspection_path']}/test_class_distribution.png"
cal_class_distribution_path = f"{parameters['features_inspection_path']}/cal_class_distribution.png"

os.makedirs(parameters["features_inspection_path"], exist_ok=True)
os.makedirs(parameters["calibration_curves_path"], exist_ok=True)

## Load dataset

In [None]:
dataset = pd.read_csv(parameters["dataset_path"])
dataset["ground_truth_index"].astype(int)
parameters["non_features_columns"] = [f for f in parameters["non_features_columns"] if f in dataset.columns]

df_numerical_columns_stats(dataset, title = f"\nFull dataset samples: {len(dataset)}\nColumns info:")
print_dataframe(dataset)

## Features preprocessing


In [None]:
dataset_train, dataset_test, dataset_calibration = split_dataset(dataset, parameters, generate_report=True)
dataset_train, preprocessing_metadata = preprocess_features(dataset_train, parameters)

scalers = {f : get_scaler(info["method"], info["params"]) for f, info in preprocessing_metadata.items()}
for feature_name in dataset_train.keys():
    
    if feature_name in parameters["non_features_columns"] or feature_name in parameters["exluded_from_preprocessing"]:
        continue
    
    dataset_test[feature_name] = scalers[feature_name].transform(dataset_test[feature_name].values.reshape(-1,1)).flatten()
    dataset_calibration[feature_name] = scalers[feature_name].transform(dataset_calibration[feature_name].values.reshape(-1,1)).flatten()




## Features selection


In [None]:

parameters["verbose"] = True
    
features_to_remove, features_to_use, selection_report = features_selection(parameters, dataset_train)

assert features_to_use.intersection(features_to_remove) == set(),  "Detected a feature to be removed and used at the same time."
assert len(features_to_remove) + len(features_to_use) == len(dataset_train.columns),  f"Detected inconsistent number of dataset columns in features to use and to remove.\nFeatures to use: {len(features_to_use)}\nFeatures to remove: {len(features_to_remove)}"

print(f"\nLV. 0 - Total features to analyze: {len(dataset_train.columns)} ({len(parameters['non_features_columns'])} Non Features)")
print(f"LV. 1 - Features intially removed: {len(selection_report['removed_features_lv1'])}")
print(f"LV. 2 - Removed constant Features: {len(selection_report['constant_features'])}")
print(f"LV. 3 - Features removed with {parameters['features_selection_strategy']} selection strategy: {len(selection_report['removed_features_lv2'])}")
print(f"LV. 4 - Features to be used: {len(features_to_use)} ({len(parameters['non_features_columns'])} Non Features)")

if len(features_to_use) == len(parameters['non_features_columns']):
    raise ValueError("Parameters configuration error. Specify at least a feature to use.")


if selection_report["type"] == "multicollinearity_analysis" and parameters["verbose"] == True:
    
    correlation_matrix = selection_report["info"]["correlation_matrix"]
    features_importances = selection_report["info"]["features_importance"]
    store_ma_output(correlation_matrix, features_importances, parameters, note = "pre_selection")

elif selection_report["type"] == "rfecv" and parameters["verbose"] == True:
    
    correlation_matrix = correlation_analysis(dataset_train, parameters)
    features_importances = features_importance_analysis(dataset_train, parameters, target_col='ground_truth_index')
    store_ma_output(correlation_matrix, features_importances, parameters, note = "pre_selection")

    store_rfecv_output(selection_report, parameters, note="")

else:
    
    correlation_matrix = correlation_analysis(dataset_train, parameters)
    features_importances = features_importance_analysis(dataset_train, parameters, target_col='ground_truth_index')
    store_ma_output(correlation_matrix, features_importances, parameters, note = "pre_selection")

ordered_cols = [col for col in dataset.columns if col in features_to_use]
dataset_train = dataset_train[ordered_cols]
dataset_calibration = dataset_calibration[list(dataset_train.keys())]
dataset_test = dataset_test[list(dataset_train.keys())]

correlation_matrix = correlation_analysis(dataset_train, parameters)
features_importances = features_importance_analysis(dataset_train, parameters, target_col='ground_truth_index')
store_ma_output(correlation_matrix, features_importances, parameters, note = "post_selection")

parameters["features_not_used"] = features_to_remove
features_selected = dataset_train.columns.difference(parameters["non_features_columns"])
    
assert not dataset_train.isnull().any().any(), "NaN found in training set"
assert not dataset_test.isnull().any().any(), "NaN found in test set"
assert not dataset_calibration.isnull().any().any(), "NaN found in calibration set"


## Features reduction

In [None]:
pca = None
if parameters["apply_pca"] == True:
            
    dataset_train, pca = apply_pca(dataset_train, parameters)
    
    correlation_matrix = correlation_analysis(dataset_train, parameters)
    features_importances = features_importance_analysis(dataset_train, parameters, target_col='ground_truth_index')
    store_ma_output(correlation_matrix, features_importances, parameters, note = "post_pca")
    
    dataset_test = reload_and_apply_PCA(pca, dataset_test, parameters)
    dataset_calibration = reload_and_apply_PCA(pca, dataset_calibration, parameters)

## Train Single SVM

In [None]:
# Define the parameter grid

param_grid = {
    'C': np.linspace(100, 1000, 5),
    'gamma': np.linspace(1e-3, 10, 10),
    'probability' : [True],
}

Y_train = dataset_train["ground_truth_label"]
Y_test = dataset_test["ground_truth_label"]
Y_cal = dataset_calibration["ground_truth_label"]

X_train = dataset_train.drop(columns=parameters["non_features_columns"])
X_test = dataset_test.drop(columns=parameters["non_features_columns"])[X_train.columns]
X_cal = dataset_calibration.drop(columns=parameters["non_features_columns"])[X_train.columns]

features_used = list(X_train.columns.difference(parameters["non_features_columns"]))
    
classes = np.unique(Y_train)

svm_grid_search = train_svm(X_train, Y_train, param_grid, cv=3)

svm = svm_grid_search.best_estimator_
best_params = svm_grid_search.best_params_
best_params_std = standardize_dict(best_params, "SVM")
print_dict(best_params, f"Best SVM parameters")

svm_test_report = test_svm(svm, X_test, Y_test, verbose=True)
svm_test_report = standardize_dict(svm_test_report, "EVAL")
svm_test_report.update(best_params_std)

svm_calibrated = CalibratedClassifierCV(estimator=svm, method='isotonic', cv='prefit', n_jobs=-1)
svm_calibrated.fit(X_cal, Y_cal)

svm_calibration_report = test_svm(svm_calibrated, X_test, Y_test, verbose=True)
svm_calibration_report = standardize_dict(svm_calibration_report, "EVALCAL")
svm_test_report.update(svm_calibration_report)
mcsvm_report = svm_test_report.copy()

report_df = pd.DataFrame([mcsvm_report])
print_dataframe(report_df)

## Train Binary SVMs


In [None]:
param_grid = {
    'C': np.linspace(100, 1000, 5), 
    'gamma': np.linspace(1e-3, 10, 10), 
    'probability' : [False]
}
print("Training binary SVMs with parameters grid:")
print(param_grid)
svms = train_binary_svms(parameters, param_grid, X_train, X_test[X_train.columns], X_cal[X_train.columns], Y_train, Y_test, Y_cal)
   

## Create SVMs class and save

In [None]:
classes = np.unique(Y_train)

model = SVMS(svm=svm,
             svm_calibrated=svm_calibrated,
             svms=svms, 
             parameters=parameters, 
             classes=classes, 
             preprocessing_metadata=preprocessing_metadata, 
             training_report= mcsvm_report,  
             pca=pca, 
             features_selected = features_selected,
             features_used = features_used)

save_SVMS(model, parameters['svm_model_path'])

## Calibration Curves

In [None]:
multiclass_svm_calibration_curves(model, X_cal, Y_cal, output_dir=parameters['calibration_curves_path'], show_plots=True)

In [None]:
binary_svms_calibration_curves(model, X_cal, Y_cal, parameters['calibration_curves_path'], show_plots=True)

#### Multiclass SVM UNcalibrated prediction test

In [None]:
model.use_binary_svms = False
model.use_multiclass_svm_calibrated = False
model.use_binary_svms_with_softmax = False
SVMS_prediction_test(model, X_test, Y_test, n_samples=None, chart_title="Uncalibrated Multiclass SVM - Distribution of predicted labels")
print(f"SVM evaluation on test set:\n\n")
model.verbose = False
_ = test_svm(model, X_test, Y_test, verbose=True)
# Plot the calibration curves of the single MultiClass sklearn SVM (If has probability = True) and the same SVM calibrated


#### Multiclass SVM calibrated prediction test

In [None]:
model.use_binary_svms = False
model.use_multiclass_svm_calibrated = True
model.use_binary_svms_with_softmax = False

SVMS_prediction_test(model, X_test, Y_test, n_samples=None, chart_title="Calibrated Multiclass SVM - Distribution of predicted labels")
print(f"SVM evaluation on test set:\n\n")
model.verbose = False
_ = test_svm(model, X_test, Y_test, verbose=True)

#### Binary SVMs prediction test

In [None]:
model.use_binary_svms = True
model.use_multiclass_svm_calibrated = False
model.use_binary_svms_with_softmax = False
SVMS_prediction_test(model, X_test, Y_test, n_samples=None)
print(f"SVM evaluation on test set:\n\n")
model.verbose = False
_ = test_svm(model, X_test, Y_test, verbose=True)

#### Binary SVM with softmax prediction test

In [None]:
model.use_binary_svms = True
model.use_multiclass_svm_calibrated = False
model.use_binary_svms_with_softmax = True
SVMS_prediction_test(model, X_test, Y_test, n_samples=None)
print(f"SVM evaluation on test set:\n\n")
model.verbose = False
_ = test_svm(model, X_test, Y_test, verbose=True)

## Load saved model and display info

In [None]:

def load_and_display_svms(filepath):
    """Loads an SVMS model from a .pkl file and prints its details in a structured format."""
    
    svms_model = joblib.load(filepath)

    # Extract experiment parameters
    experiment_params = svms_model.experiment_parameters

    # Extract class-related information
    class_info = {
        "Number of Classes": svms_model.n_classes,
        "Classes": svms_model.classes,
        "ID to Class Mapping": svms_model.id2class,
        "Class to ID Mapping": svms_model.class2id
    }

    class_info_cleaned = {
        k: str(v.tolist()) if isinstance(v, np.ndarray) else str(v) if isinstance(v, (list, dict)) else v 
        for k, v in class_info.items()
    }

    # Extract multiclass SVM details
    multiclass_info = {
        "Multiclass SVM Type": type(svms_model.multiclass_svm).__name__,
        "Use Calibrated Multiclass SVM": svms_model.use_multiclass_svm_calibrated
    }

    # Extract binary SVMs information
    binary_svm_info = {
        "Use Binary SVMs": svms_model.use_binary_svms,
        "Use Softmax with Binary SVMs": svms_model.use_binary_svms_with_softmax,
        "Number of Binary SVMs": len(svms_model.binary_svms),
        "Binary SVM Classes": str(list(svms_model.binary_svms.keys()))  # Convert list to string
    }

    # Extract preprocessing and PCA metadata
    preprocessing_metadata = svms_model.preprocessing_metadata
    pca = svms_model.pca if svms_model.pca else "Not Used"

    # Extract binary SVM reports
    binary_svm_reports = svms_model.binary_svms_reports

    # Print structured output
    print("\n===== SVMS Model Information =====")
    
    print("\n--- Experiment Parameters ---")
    print_dataframe(pd.DataFrame(experiment_params.items(), columns=["Parameter", "Value"]), title="Experiment Parameters")

    print("\n--- Class Information ---")
    print_dataframe(pd.DataFrame(class_info_cleaned.items(), columns=["Attribute", "Value"]), title="Class Information")

    print("\n--- Multiclass SVM Information ---")
    print_dataframe(pd.DataFrame(multiclass_info.items(), columns=["Attribute", "Value"]), title="Multiclass SVM")

    print("\n--- Binary SVM Information ---")
    print_dataframe(pd.DataFrame(binary_svm_info.items(), columns=["Attribute", "Value"]), title="Binary SVM Information")

    print("\n--- Preprocessing Metadata ---")
    print_dataframe(pd.DataFrame(preprocessing_metadata.items(), columns=["Step", "Details"]), title="Preprocessing Metadata")

    print("\n--- PCA Metadata ---")
    print_dataframe(pd.DataFrame({"PCA": [pca]}), title="PCA Metadata")

    print("\n--- Binary SVM Performance Reports ---")
    for cls, report in binary_svm_reports.items():
        print(f"\nBinary SVM Report for Class: {cls}")
        print_dataframe(pd.DataFrame(report).T, title=f"Performance Report for {cls}")

svms_model_path = "/media/datapart/lucazanolo/SVM/lc_maps/best_svm/svm.pkl"
load_and_display_svms(svms_model_path)


## Grid Search

In [None]:
parameters = fix_paths_for_nb(read_yaml("/home/lucazanolo/luca-zanolo/scripts/config_files/6.svm_pipeline.yaml"))

grid_search(parameters=parameters)

## Inspect grid search results

In [None]:
df = pd.read_csv('/media/datapart/lucazanolo/SVM/evaluation/experiments/grid_search.csv')
print_dataframe(df[['SVMS_AVGF1','SVM_AVGF1']].sort_values(by=['SVMS_AVGF1'], ascending=False), limit=3)
print_dataframe(df[['SVMS_AVGF1','SVM_AVGF1']].sort_values(by=['SVM_AVGF1'], ascending=False), limit=3)

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def plot_avg_f1_structured(df, fields, score_col="SVM_AVGF1", output_path="avg_f1_structured_fixed.png", aggregate_fn="mean", title="", top_k=20):
    if aggregate_fn not in ["mean", "max"]:
        raise ValueError("aggregate_fn must be either 'mean' or 'max'")

    records = []

    for field in fields:
        if field not in df.columns:
            print(f"Skipping '{field}' — not in DataFrame.")
            continue

        if aggregate_fn == "mean" and top_k is not None:
            grouped_scores = (
                df[[field, score_col]]
                .dropna(subset=[score_col])
                .groupby(field, dropna=False)
                .apply(lambda g: g.nlargest(top_k, score_col)[score_col].mean())
                .reset_index(name=score_col)
            )
        else:
            grouped_scores = getattr(df.groupby(field, dropna=False)[score_col], aggregate_fn)().reset_index()

        for _, row in grouped_scores.iterrows():
            val = row[field]
            val_str = "NaN" if pd.isna(val) else str(val)
            records.append({
                "field": field,
                "value": val_str,
                "score": row[score_col]
            })

    if not records:
        print("No data to plot.")
        return

    records.sort(key=lambda r: (r["field"], r["value"]))
    max_field_len = max(len(r["field"]) for r in records)

    plot_data = []
    label_styles = []

    for i, field in enumerate(fields):
        if i != 0:
            plot_data.append((" ", None))
            label_styles.append("spacer")

        header_indent = ' ' * (max_field_len // 2)
        plot_data.append((f" {field}", None))
        label_styles.append("header")

        for r in [r for r in records if r["field"] == field]:
            label = f"  {r['value']:<25} {(r['score'] * 100):.3f}%"  # Nome + punteggio nella stessa label
            plot_data.append((label, r["score"]))
            label_styles.append("value")


    if plot_data[-1][1] is None and label_styles[-1] == "spacer":
        plot_data.pop()
        label_styles.pop()

    labels = [label for label, _ in plot_data]
    scores = [score for _, score in plot_data]
    y_pos = list(range(len(labels)))

    fig, ax = plt.subplots(figsize=(8, max(6, len(labels) * 0.35)))
    bar_values = [s if s is not None else 0 for s in scores]
    bar_colors = ["cornflowerblue" if style == "value" else "white" for style in label_styles]

    bars = ax.barh(y_pos, bar_values, color=bar_colors)
    ax.set_xlim(0, 1) 

    ax.set_yticks(y_pos)
    ax.set_yticklabels([""] * len(labels))

    for i, (label, style) in enumerate(zip(labels, label_styles)):
        if style == "header":
            ax.text(0, i, label, va="center", ha="left", fontsize=10, fontweight="bold", family="monospace")
        elif style == "value":
            ax.text(0, i, label, va="center", ha="left", fontsize=9, family="monospace")

    ax.set_xlabel(f"{aggregate_fn.capitalize()} {score_col}")
    ax.set_title(f"Top-{top_k} {title}")
    ax.invert_yaxis()
    plt.tight_layout()
    plt.show()
    plt.savefig(output_path, dpi=300)
    plt.close()
    print(f"Plot saved to: {output_path}")


df["dataset_name"] = df["dataset_path"].apply(lambda p: os.path.basename(str(p))[:-4] if pd.notna(p) else p)
fields_to_check = [
    "dataset_type",
    "dataset_name",
    "use_glcm_features",
    "use_other_features",
    "band_features_preprocessing",
    "other_features_preprocessing",
    "features_selection_strategy",
    "fss_use_bands",
    "apply_pca",
    "pca_use_bands"
]


plot_avg_f1_structured(
    df,
    fields=fields_to_check,
    score_col="SVM_AVGF1",
    output_path="svm_avg_f1_structured_fixed.png",
    aggregate_fn='mean',
    title = "Multiclass SVM Grid Search average results",
    #title = "Binary SVMs Grid Search average results",
    top_k=10
)


#### Multiclass SVM

In [None]:

df = pd.read_csv("/media/datapart/lucazanolo/SVM/evaluation/experiments/grid_search.csv")
best_svm_idx = df["SVM_AVGF1"].idxmax()
best_svm = df.loc[best_svm_idx]

svm_eval_metrics = best_svm.filter(like="SVM_EVAL_")
svm_evalcal_metrics = best_svm.filter(like="SVM_EVALCAL_")

svm_params = best_svm[
    [
        "SVM_params_C",
        "SVM_params_gamma",
        "SVM_params_probability",
        "apply_pca",
        "pca_use_bands",
        "band_features_preprocessing",
        "dataset_path",
        "dataset_type",
        "drop_constant_features",
        "features_selection_strategy",
        "fss_use_bands",
        "id",
        "log_name",
        "ma_correlation_threshold",
        "non_features_columns",
        "other_features_preprocessing",
        "use_band_features",
        "use_glcm_features",
        "use_other_features",
    ]
]

# Function to format classification report
def format_classification_report(metrics, prefix):
    class_metrics = {}

    for col in metrics.index:
        parts = col.replace(prefix, "").split("_")
        if len(parts) >= 2:
            class_label = parts[0]
            metric_type = "_".join(parts[1:])

            if class_label not in class_metrics:
                class_metrics[class_label] = {}

            class_metrics[class_label][metric_type] = metrics[col]

    report_data = []
    for cls, metrics_dict in sorted(class_metrics.items()):
        if all(key in metrics_dict for key in ["precision", "recall", "f1-score", "support"]):
            report_data.append([
                cls,
                metrics_dict["precision"],
                metrics_dict["recall"],
                metrics_dict["f1-score"],
                metrics_dict["support"],
            ])

    report_df = pd.DataFrame(report_data, columns=["Class", "Precision", "Recall", "F1-Score", "Support"])
    
    return report_df

eval_report = format_classification_report(svm_eval_metrics, prefix="SVM_EVAL_")
evalcal_report = format_classification_report(svm_evalcal_metrics, prefix="SVM_EVALCAL_")

print("\n===== Best Multi-Class SVM Performance =====")
print(f"SVM_AVGF1: {best_svm['SVM_AVGF1']}")

print("\n--- Uncalibrated SVM Performance (EVAL) ---")
print_dataframe(eval_report, title="Uncalibrated SVM Report")

print("\n--- Calibrated SVM Performance (EVALCAL) ---")
print_dataframe(evalcal_report, title="Calibrated SVM Report")

print("\n===== Best Multi-Class SVM Configuration Parameters =====")
print_dataframe(pd.DataFrame(svm_params), title="Best multiclass SVM Configuration Parameters")


#### Binary SVMs

In [None]:
df = pd.read_csv("/media/datapart/lucazanolo/SVM/evaluation/experiments/grid_search.csv")
best_svms_idx = df["SVMS_AVGF1"].idxmax()
best_svms = df.loc[best_svms_idx]

svms_eval_metrics = best_svms.filter(like="SVMS_EVAL_")
svms_evalcal_metrics = best_svms.filter(like="SVMS_EVALCAL_")

svms_params = best_svms[
    [
        "SVM_params_C",
        "SVM_params_gamma",
        "SVM_params_probability",
        "apply_pca",
        "pca_use_bands",
        "band_features_preprocessing",
        "dataset_path",
        "dataset_type",
        "drop_constant_features",
        "features_selection_strategy",
        "fss_use_bands",
        "id",
        "log_name",
        "ma_correlation_threshold",
        "non_features_columns",
        "other_features_preprocessing",
        "use_band_features",
        "use_glcm_features",
        "use_other_features",
    ]
]

# Function to extract class macro metrics
def extract_class_macro_metrics(eval_metrics, evalcal_metrics):
    class_macro_metrics = {}

    for col in eval_metrics.index:
        match = re.match(r"SVMS_EVAL_([\w\-]+)_macro-avg_(f1-score|precision|recall|support)", col)
        if match:
            class_name, metric_type = match.groups()

            if class_name not in class_macro_metrics:
                class_macro_metrics[class_name] = {}

            class_macro_metrics[class_name][f"{metric_type}_eval"] = eval_metrics[col]
            class_macro_metrics[class_name][f"{metric_type}_evalcal"] = evalcal_metrics.get(
                col.replace("SVMS_EVAL_", "SVMS_EVALCAL_"), None
            )

    report_data = []
    for cls, metrics_dict in sorted(class_macro_metrics.items()):
        if all(key in metrics_dict for key in ["f1-score_eval", "precision_eval", "recall_eval"]):
            report_data.append([
                cls,
                metrics_dict["precision_eval"],
                metrics_dict["recall_eval"],
                metrics_dict["f1-score_eval"],
                metrics_dict["support_eval"],
                metrics_dict["precision_evalcal"],
                metrics_dict["recall_evalcal"],
                metrics_dict["f1-score_evalcal"],
                metrics_dict["support_evalcal"],
            ])

    report_df = pd.DataFrame(report_data, columns=[
        "Class", 
        "Precision (EVAL)", "Recall (EVAL)", "F1-Score (EVAL)", "Support (EVAL)",
        "Precision (EVALCAL)", "Recall (EVALCAL)", "F1-Score (EVALCAL)", "Support (EVALCAL)"
    ])
    
    return report_df

svms_macro_report = extract_class_macro_metrics(svms_eval_metrics, svms_evalcal_metrics)

print("\n===== Best Binary SVM Performance =====")
print(f"SVMS_AVGF1: {best_svms['SVMS_AVGF1']}")

print("\n--- Binary SVM Macro Metrics (Before & After Calibration) ---")
print_dataframe(svms_macro_report, title="Binary SVM Macro Metrics (Before & After Calibration)")

print("\n===== Best Binary SVM Configuration Parameters =====")
print_dataframe(pd.DataFrame(svms_params), title="Best Binary SVM Configuration Parameters")


### .CSV writing tests

In [None]:
import os
import csv
import numpy as np

# Assume save_experiment_result and handle_value are already defined above

# Test CSV path
test_csv_path = "test_experiment_results.csv"

# Remove old file if it exists
if os.path.exists(test_csv_path):
    os.remove(test_csv_path)

# Test case 1: Initial write with basic keys
data1 = {
    'accuracy': 0.95,
    'loss': 0.1,
    'labels': ['cat', 'dog']
}
save_experiment_result(data1, test_csv_path)

# Test case 2: Add new data with a new key 'f1_score'
data2 = {
    'accuracy': 0.92,
    'loss': 0.15,
    'f1_score': 0.89
}
save_experiment_result(data2, test_csv_path)

# Test case 3: Add data missing 'loss', but with new key 'precision'
data3 = {
    'accuracy': 0.90,
    'precision': 0.87
}
save_experiment_result(data3, test_csv_path)

# Test case 4: Add data with all fields known so far
data4 = {
    'accuracy': 0.94,
    'loss': 0.12,
    'f1_score': 0.91,
    'precision': 0.88,
    'labels': ['horse']
}
save_experiment_result(data4, test_csv_path)

# Read back and print the CSV to verify
with open(test_csv_path, 'r') as f:
    print("\n--- Final CSV Content ---")
    for line in f:
        print(line.strip())

# 1. Basic normal row
save_experiment_result({
    'model': 'resnet',
    'accuracy': 0.91,
    'params': [128, 64, 32]
}, test_csv_path)

# 2. Special characters and strings with commas/newlines/quotes
save_experiment_result({
    'model': 'bert,base\n"quoted"',
    'accuracy': 0.88,
    'notes': 'Used batch_size=32, lr=0.001'
}, test_csv_path)

# 3. Missing key 'accuracy', added new key 'f1_score'
save_experiment_result({
    'model': 'efficientnet',
    'f1_score': 0.84
}, test_csv_path)

# 4. All fields missing except 'model'
save_experiment_result({
    'model': 'squeezenet'
}, test_csv_path)

# 5. Extra keys not seen before
save_experiment_result({
    'model': 'vgg16',
    'accuracy': 0.82,
    'precision': 0.80,
    'recall': 0.78,
    'complexity': np.array([10, 20, 30])
}, test_csv_path)

# 6. Empty dict (just test handling)
save_experiment_result({}, test_csv_path)

# 7. Key ordering test (should not break alignment)
save_experiment_result({
    'params': [32, 64],
    'accuracy': 0.90,
    'model': 'mlp'
}, test_csv_path)

# 8. Numpy types and set
save_experiment_result({
    'model': 'rnn',
    'accuracy': np.float64(0.85),
    'layers': {1, 2, 3}
}, test_csv_path)

# 9. None values explicitly
save_experiment_result({
    'model': None,
    'accuracy': None,
    'notes': None
}, test_csv_path)

# 10. Simulated high volume (light)
for i in range(5):
    save_experiment_result({
        'model': f'random_model_{i}',
        'accuracy': round(0.75 + 0.01 * i, 3)
    }, test_csv_path)

# Read and print
with open(test_csv_path, 'r') as f:
    print("\n--- Stress Test Output ---")
    for line in f:
        print(line.strip())
