In [26]:
from pathlib import Path

from utils.util import *
%matplotlib inline

discriminator_folder = "../experiment_results/discriminator_0.8_test"
csv_name = "discriminator_pred_on_test.csv"
output_dir = "classifier_analysis"

Path(output_dir).mkdir(parents=True, exist_ok=True)

UsageError: Line magic function `%` not found.


In [None]:
experiments = {
    "categorical_crossentropy": ["../experiment_results/categorical_crossentropy/discriminator_metrics.csv",
                                 "../experiment_results/categorical_crossentropy_02/discriminator_metrics.csv",
                                 "../experiment_results/categorical_crossentropy_03/discriminator_metrics.csv"],
    "Polyloss_CE": ["../experiment_results/polyloss_ce/discriminator_metrics.csv",
                    "../experiment_results/polyloss_ce_02/discriminator_metrics.csv",
                    "../experiment_results/polyloss_ce_03/discriminator_metrics.csv"]}

In [None]:
def experiment_name_to_avg_df_without_outliers(experiments):
    results = {}
    for experiments_name, paths in experiments.items():
        df_list = []
        for path in paths:
            df_list.append(pd.read_csv(path))
        df = pd.concat(df_list, ignore_index=True)[["Accuracy", "F1-score", "epoch"]]
        before_len = len(df)
        # Sort the DataFrame by the "Accuracy" column in descending order
        df_sorted = df.sort_values("Accuracy", ascending=False)

        # Calculate the number of rows to remove (1% of the total rows)
        n_rows = int(len(df_sorted) * 0.025)

        # Remove the top and bottom rows
        df = df_sorted.iloc[n_rows:-n_rows]

        # Reset the index of the filtered DataFrame
        df = df.reset_index(drop=True)

        after_len = len(df)
        print(f"{experiments_name}: removed {before_len - after_len} samples from {before_len}")
        # df = df.groupby('epoch').agg('mean')
        # epochs = df.index
        # df = df.reset_index()
        # df["epoch"] = epochs
        results[experiments_name] = df
    return results


experiment_name_to_avg_df = experiment_name_to_avg_df_without_outliers(experiments)
experiment_name_to_avg_df

In [None]:
import matplotlib.pyplot as plt
import numpy as np


def plot_loss_comparison(loss_function_to_df):
    fig, ax = plt.subplots(figsize=(10, 6))

    # Iterate over each loss function and its corresponding dataframe
    for loss_function, df in loss_function_to_df.items():
        df = df.sort_values("epoch")

        epochs = df["epoch"]
        accuracy = df["Accuracy"].rolling(1).mean()

        # Calculate standard deviation of accuracy values
        accuracy_std = np.std(accuracy)
        accuracy_mean = np.mean(accuracy)
        # Plot the Accuracy by epoch for each loss function
        ax.plot(epochs, accuracy, label=f"{loss_function} (Mean: {accuracy_mean:.4f}, Std: {accuracy_std:.4f})")

    ax.legend(fontsize=12)
    ax.set_xlabel("Epoch", fontsize=12)
    ax.set_ylabel("Accuracy", fontsize=12)
    ax.set_title("Accuracy Comparison by Loss Function", fontsize=14)

    plt.tight_layout()
    plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np


def plot_loss_comparison(loss_function_to_df):
    # Create a list to store the accuracy values for each loss function
    accuracy_data = []

    # Iterate over each loss function and its corresponding dataframe
    for loss_function, df in loss_function_to_df.items():
        df = df.sort_values("epoch")
        accuracy = df["Accuracy"]
        accuracy_data.append(accuracy)

    # Plot the boxplot for the accuracy data
    fig, ax = plt.subplots(figsize=(5, 6))

    # Define the boxplot properties
    boxprops = dict(linewidth=2, color='black')
    medianprops = dict(linewidth=2, color='black')
    meanprops = dict(linewidth=2, color='black')

    # Create the boxplot
    boxplot = ax.boxplot(accuracy_data, labels=loss_function_to_df.keys(), patch_artist=True,
                         showfliers=False, boxprops=boxprops, medianprops=medianprops, meanprops=meanprops)

    # Set a different color for each boxplot
    colors = ['Blue', 'Orange']
    for patch, color in zip(boxplot['boxes'], colors):
        patch.set_facecolor(color)

    # Add labels and title to the plot
    # ax.set_xlabel("Loss Function", fontsize=12)
    ax.set_ylabel("Accuracy", fontsize=12)

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "box_plot_loss_function_classes.jpg"), bbox_inches='tight', dpi=300)
    plt.show()


In [None]:
plot_loss_comparison(experiment_name_to_avg_df)

In [None]:
def get_accuracy_list(experiment_name_to_avg_df):
    accuracy = []
    # Plot accuracy and precision for each algorithm
    for index, (experiments_name, df) in enumerate(experiment_name_to_avg_df.items()):
        df = df.sort_values("epoch")
        accuracy.append(df["Accuracy"])
    return accuracy


In [None]:
accuracy_lists = get_accuracy_list(experiment_name_to_avg_df)

In [None]:
import scipy.stats as stats

# Assuming you have a tuple called 'accuracy' with two lists
experiments_names = list(experiments.keys())

# Plotting histograms for the two distributions
plt.hist(accuracy_lists[0], bins=20, alpha=0.5, label=experiments_names[0], density=True)
plt.hist(accuracy_lists[1], bins=20, alpha=0.5, label=experiments_names[1], density=True)

# Adding labels and title to the plot
plt.xlabel('Accuracy')
plt.ylabel('Frequency')

t_statistic, p_value = stats.ttest_ind(accuracy_lists[0], accuracy_lists[1])

plt.title(f'p-value: {p_value:.8f}', fontweight='bold')

# Move the legend to the upper right corner
plt.legend(loc='upper left')
plt.savefig(os.path.join(output_dir, "accuracy_distribution.jpg"), bbox_inches='tight', dpi=300)
# Display the plot
plt.show()


In [None]:
from sklearn.metrics import accuracy_score, f1_score


def plot_classifications_confusion_matrix(file_paths, output_dir, experiments_rename, prefix=""):
    # Create an empty list to store the confusion matrices and evaluation scores
    cm_list = []
    kappa_list = []
    accuracy_list = []
    f1_score_list = []

    # Loop through each file path
    for i, path in enumerate(file_paths):
        # Read in the CSV file
        data = pd.read_csv(path)

        # Extract the actual and predicted population values from the dataframe
        actual_pop = data['class_name_real'].values
        predicted_pop = data['class_name_pred'].values

        # Get the unique classes
        classes = np.unique(actual_pop)

        # Create the confusion matrix
        cm = confusion_matrix(actual_pop, predicted_pop, labels=classes)

        # Calculate evaluation scores
        kappa = cohen_kappa_score(actual_pop, predicted_pop)
        accuracy = accuracy_score(actual_pop, predicted_pop)
        f1 = f1_score(actual_pop, predicted_pop, average='weighted')

        # Append the confusion matrix and evaluation scores to the list
        cm_list.append(cm)
        kappa_list.append(kappa)
        accuracy_list.append(accuracy)
        f1_score_list.append(f1)

    # Calculate the number of rows and columns for the subplots
    n_plots = len(file_paths)
    n_cols = min(2, n_plots)
    n_rows = int(np.ceil(n_plots / n_cols))

    # Create the subplots
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(10, 5 * n_rows))
    # fig.suptitle('Confusion Matrix Comparison')

    # Flatten the axes array for indexing
    axes = axes.flatten()

    # Loop through each confusion matrix and subplot
    for i, (cm, kappa, accuracy, f1) in enumerate(zip(cm_list, kappa_list, accuracy_list, f1_score_list)):
        ax = axes[i]
        cm_percentage = cm / cm.sum(axis=1, keepdims=True) * 100  # Convert numbers to percentages
        im = ax.imshow(cm_percentage, cmap=plt.cm.gray_r)  # Reverse the color scale
        ax.set_title(f'{experiments_rename[i]}\n\nKappa: {kappa:.2f}, Accuracy: {accuracy:.2f}, F1 Score: {f1:.2f}',
                     fontweight='bold')
        ax.set_xlabel('Predicted Label')
        ax.set_ylabel('True Label')
        ax.set_xticks(np.arange(len(classes)))
        ax.set_yticks(np.arange(len(classes)))
        ax.set_xticklabels(classes)
        ax.set_yticklabels(classes)
        ax.grid(False)
        # Add a colorbar
        fig.colorbar(im, ax=ax, shrink=0.6)

        # Add the confusion matrix values as text annotations
        for j in range(len(classes)):
            for k in range(len(classes)):
                c = "black" if cm_percentage[j, k] < 40 else "white"
                w = "normal" if cm_percentage[j, k] < 40 else "bold"
                text = ax.text(k, j, f'{cm[j, k]}\n{cm_percentage[j, k]:.1f}%',
                               ha='center', va='center', color=c, weight=w)

    # Show the plot
    plt.savefig(os.path.join(output_dir, prefix + "compare_confusion_matrix.jpg"), bbox_inches='tight', dpi=300)
    plt.show()


In [None]:
plot_classifications_confusion_matrix(
    ['../experiment_results/categorical_crossentropy_02/discriminator_pred_on_test.csv',
     '../experiment_results/polyloss_ce/discriminator_pred_on_test.csv'],
    output_dir, list(experiments.keys()), prefix="polyloss_ce_to_cce_")

In [None]:
def get_last_metrics(experiment_name_to_avg_df):
    last_metrics_dfs = []
    for experiments_name, df in experiment_name_to_avg_df.items():
        df = df[["Accuracy", "F1-score"]]

        def get_agg_df(total_df, tail):
            tmp_df = total_df.tail(tail)
            tmp_df.columns = [f"Accuracy\n last {tail}", f"F1-score\n last {tail}"]
            return pd.DataFrame(tmp_df.mean()).T

        df_1 = get_agg_df(df, 1)
        df_5 = get_agg_df(df, 5)
        df_10 = get_agg_df(df, 10)
        df_20 = get_agg_df(df, 20)
        df_50 = get_agg_df(df, 50)
        df_100 = get_agg_df(df, 100)
        output_df = pd.concat([df_1, df_5, df_10, df_20, df_50, df_100], axis=1)
        output_df["experiments_name"] = experiments_name
        last_metrics_dfs.append(output_df)
    # catboost_results = pd.read_csv(os.path.join(output_dir, "catboost_classification_results.csv"))
    # catboost_results["experiments_name"] = "CatBoostClassifier"
    # last_metrics_dfs.append(catboost_results.tail(1))
    return pd.concat(last_metrics_dfs, ignore_index=True)


all_experiments_scores = get_last_metrics(experiment_name_to_avg_df)
all_experiments_scores.set_index('experiments_name', inplace=True)

all_experiments_scores = all_experiments_scores.round(4)

df_transposed = all_experiments_scores.transpose()

# plot the bar chart
ax = df_transposed.plot(kind='bar', figsize=(30, 10), width=0.9, fontsize=16)

# set labels
ax.set_title('Performance of experiments - average recent epochs', fontsize=22)
ax.set_xlabel('Score Type')
ax.set_ylabel('Score')

# show the legend
ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))

# add numbers to bars
for i, rect in enumerate(ax.containers):
    ax.bar_label(rect, labels=all_experiments_scores.iloc[i].astype(str), fontsize=16)

plt.savefig(os.path.join(output_dir, "discriminator_compare_last_x.jpg"))
plt.show()