In [None]:
from pathlib import Path

import pandas as pd

from utils.util import *
%matplotlib inline

discriminator_folder = "../experiment_results/discriminator_0.8_test"
csv_name = "discriminator_pred_on_test.csv"
output_dir = "classifier_analysis"

Path(output_dir).mkdir(parents=True, exist_ok=True)

In [2]:
experiments = {
    "categorical_crossentropy": ["../experiment_results/categorical_crossentropy/discriminator_metrics.csv",
                                 "../experiment_results/categorical_crossentropy_02/discriminator_metrics.csv",
                                 "../experiment_results/categorical_crossentropy_03/discriminator_metrics.csv"],
    "Polyloss_CE": ["../experiment_results/polyloss_ce/discriminator_metrics.csv",
                    "../experiment_results/polyloss_ce_02/discriminator_metrics.csv",
                    "../experiment_results/polyloss_ce_03/discriminator_metrics.csv"]}

In [3]:
def experiment_name_to_avg_df_without_outliers(experiments):
    results = {}
    for experiments_name, paths in experiments.items():
        df_list = []
        for path in paths:
            df_list.append(pd.read_csv(path))
        df = pd.concat(df_list, ignore_index=True)[["Accuracy", "F1-score", "epoch"]]
        Q5 = df["Accuracy"].quantile(0.05)
        Q95 = df["Accuracy"].quantile(0.95)
        IQR = Q95 - Q5
        before_len = len(df)
        df = df[~((df[["Accuracy"]] < (Q5 - IQR)) | (df[["Accuracy"]] > (Q95 + IQR))).any(axis=1)]
        after_len = len(df)
        print(f"{experiments_name}: removed {before_len-after_len} samples from {before_len}")
        df = df.groupby('epoch').agg('mean')
        epochs = df.index
        df = df.reset_index()
        df["epoch"] = epochs
        results[experiments_name] = df
    return results

experiment_name_to_avg_df = experiment_name_to_avg_df_without_outliers(experiments)
experiment_name_to_avg_df

categorical_crossentropy: removed 2 samples from 303
Polyloss_CE: removed 7 samples from 303


{'categorical_crossentropy':      epoch  Accuracy  F1-score
 0       50  0.862333  0.825562
 1      100  0.841667  0.801931
 2      150  0.833667  0.800052
 3      200  0.819667  0.767970
 4      250  0.815333  0.781866
 ..     ...       ...       ...
 96    4850  0.839333  0.817602
 97    4900  0.678667  0.628282
 98    4950  0.844667  0.819537
 99    5000  0.849667  0.825646
 100   5001  0.835667  0.809387
 
 [101 rows x 3 columns],
 'Polyloss_CE':      epoch  Accuracy  F1-score
 0       50  0.857667  0.820465
 1      100  0.860667  0.822256
 2      150  0.821000  0.784572
 3      200  0.858333  0.827406
 4      250  0.761000  0.726644
 ..     ...       ...       ...
 96    4850  0.867000  0.838243
 97    4900  0.870333  0.846100
 98    4950  0.864000  0.840443
 99    5000  0.871000  0.842650
 100   5001  0.863333  0.840205
 
 [101 rows x 3 columns]}

In [4]:

from scipy.stats import stats
import matplotlib.pyplot as plt


def plot_metrics(experiment_name_to_avg_df):
    # Set up figure and axes
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(40, 20))
    accuracy = []
    f1 = []
    # Plot accuracy and precision for each algorithm
    for experiments_name, df in experiment_name_to_avg_df.items():
        accuracy.append(df["Accuracy"])
        f1.append(df["F1-score"])
        x = df["epoch"]

        # Compute cumulative average every 3 epochs
        df["Accuracy_cumavg"] = df["Accuracy"].rolling(1).mean()
        df["F1_score_cumavg"] = df["F1-score"].rolling(1).mean()

        # Get the color of the line used for the experiment
        color = ax[0]._get_lines.get_next_color()

        # Add regression line for accuracy
        ax[0].plot(x, df["Accuracy_cumavg"], label=f"{experiments_name}", color=color, linestyle='dashdot')
        coefficients = np.polyfit(x, df["Accuracy"], 4)
        p = np.poly1d(coefficients)
        ax[0].plot(x, p(x), color=color, linewidth=4)

        # Add regression line for precision
        ax[1].plot(x, df["F1_score_cumavg"], label=f"{experiments_name}", color=color, linestyle='dashdot')
        coefficients = np.polyfit(x, df["F1-score"], 4)
        p = np.poly1d(coefficients)
        ax[1].plot(x, p(x), color=color, linewidth=4)

        # Add points to the accuracy graph
        ax[0].scatter(x, df["Accuracy"], color=color)

        # Add points to the precision graph
        ax[1].scatter(x, df["F1-score"], color=color)

        ax[0].legend(fontsize=30)
        ax[0].set_xlabel("Epoch", fontsize=30)
        ax[0].set_ylabel("Accuracy", fontsize=30)

        ax[1].legend(fontsize=30)
        ax[1].set_xlabel("Epoch", fontsize=30)
        ax[1].set_ylabel("F1-score", fontsize=30)


    ax[0].set_title(f"Accuracy P Value = {round(1 - stats.ttest_ind(accuracy[0], accuracy[1])[1], 5)}", fontsize=40)
    ax[1].set_title(f"F1-score P Value = {round(1 - stats.ttest_ind(f1[0], f1[1])[1], 5)}", fontsize=40)
    plt.savefig(os.path.join(output_dir, "discriminator_class_metrics.jpg"))
    plt.show()




In [None]:
plot_metrics(experiment_name_to_avg_df)

In [None]:
from sklearn.metrics import accuracy_score, f1_score

def plot_classifications_confusion_matrix(file_paths, output_dir, experiments_rename, prefix=""):
    # Create an empty list to store the confusion matrices and evaluation scores
    cm_list = []
    kappa_list = []
    accuracy_list = []
    f1_score_list = []

    # Loop through each file path
    for i, path in enumerate(file_paths):
        # Read in the CSV file
        data = pd.read_csv(path)

        # Extract the actual and predicted population values from the dataframe
        actual_pop = data['class_name_real'].values
        predicted_pop = data['class_name_pred'].values

        # Get the unique classes
        classes = np.unique(actual_pop)

        # Create the confusion matrix
        cm = confusion_matrix(actual_pop, predicted_pop, labels=classes)

        # Calculate evaluation scores
        kappa = cohen_kappa_score(actual_pop, predicted_pop)
        accuracy = accuracy_score(actual_pop, predicted_pop)
        f1 = f1_score(actual_pop, predicted_pop, average='weighted')

        # Append the confusion matrix and evaluation scores to the list
        cm_list.append(cm)
        kappa_list.append(kappa)
        accuracy_list.append(accuracy)
        f1_score_list.append(f1)

    # Calculate the number of rows and columns for the subplots
    n_plots = len(file_paths)
    n_cols = min(2, n_plots)
    n_rows = int(np.ceil(n_plots / n_cols))

    # Create the subplots
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(10, 5 * n_rows))
    # fig.suptitle('Confusion Matrix Comparison')

    # Flatten the axes array for indexing
    axes = axes.flatten()

    # Loop through each confusion matrix and subplot
    for i, (cm, kappa, accuracy, f1) in enumerate(zip(cm_list, kappa_list, accuracy_list, f1_score_list)):
        ax = axes[i]
        cm_percentage = cm / cm.sum(axis=1, keepdims=True) * 100  # Convert numbers to percentages
        im = ax.imshow(cm_percentage, cmap=plt.cm.terrain)
        ax.set_title(f'{experiments_rename[i]}\n\nKappa: {kappa:.2f}, Accuracy: {accuracy:.2f}, F1 Score: {f1:.2f}')
        ax.set_xlabel('Predicted Label')
        ax.set_ylabel('True Label')
        ax.set_xticks(np.arange(len(classes)))
        ax.set_yticks(np.arange(len(classes)))
        ax.set_xticklabels(classes)
        ax.set_yticklabels(classes)
        ax.grid(False)
        # Add a colorbar
        fig.colorbar(im, ax=ax, shrink=0.6)

        # Add the confusion matrix values as text annotations
        for j in range(len(classes)):
            for k in range(len(classes)):
                text = ax.text(k, j, f'{cm[j, k]}\n{cm_percentage[j, k]:.1f}%',
                               ha='center', va='center', color='black', weight='bold')

    # Hide any unused subplots
    # for i in range(n_plots, n_rows * n_cols):
    #     axes[i].axis('off')

    # Show the plot
    plt.savefig(os.path.join(output_dir, prefix + "compare_confusion_matrix.jpg"))
    plt.show()


In [None]:
plot_classifications_confusion_matrix(['../experiment_results/categorical_crossentropy_02/discriminator_pred_on_test.csv',
                                       '../experiment_results/polyloss_ce/discriminator_pred_on_test.csv'],
                                      output_dir, list(experiments.keys()), prefix="polyloss_ce_to_cce_")

In [None]:
def get_last_metrics(experiment_name_to_avg_df):
    last_metrics_dfs = []
    for experiments_name, df in experiment_name_to_avg_df.items():
        df = df[["Accuracy", "F1-score"]]
        def get_agg_df(total_df, tail):
            tmp_df = total_df.tail(tail)
            tmp_df.columns = [f"Accuracy\n last {tail}", f"F1-score\n last {tail}"]
            return pd.DataFrame(tmp_df.mean()).T
        df_1 = get_agg_df(df, 1)
        df_5 = get_agg_df(df, 5)
        df_10 = get_agg_df(df, 10)
        df_20 = get_agg_df(df, 20)
        df_50 = get_agg_df(df, 50)
        df_100 = get_agg_df(df, 100)
        output_df = pd.concat([df_1, df_5, df_10, df_20, df_50, df_100], axis=1)
        output_df["experiments_name"] = experiments_name
        last_metrics_dfs.append(output_df)
    # catboost_results = pd.read_csv(os.path.join(output_dir, "catboost_classification_results.csv"))
    # catboost_results["experiments_name"] = "CatBoostClassifier"
    # last_metrics_dfs.append(catboost_results.tail(1))
    return pd.concat(last_metrics_dfs, ignore_index=True)


all_experiments_scores = get_last_metrics(experiment_name_to_avg_df)
all_experiments_scores.set_index('experiments_name', inplace=True)

all_experiments_scores = all_experiments_scores.round(4)

df_transposed = all_experiments_scores.transpose()

# plot the bar chart
ax = df_transposed.plot(kind='bar', figsize=(30, 10), width=0.9, fontsize=16)

# set labels
ax.set_title('Performance of experiments - average recent epochs', fontsize=22)
ax.set_xlabel('Score Type')
ax.set_ylabel('Score')

# show the legend
ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))

# add numbers to bars
for i, rect in enumerate(ax.containers):
    ax.bar_label(rect, labels=all_experiments_scores.iloc[i].astype(str), fontsize=16)

plt.savefig(os.path.join(output_dir, "discriminator_compare_last_x.jpg"))
plt.show()