In [None]:
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA

import numpy as np
import pandas as pd
import random
import re
import psutil
import gc
import time
import pickle
import pynvml
import itertools
import os

random.seed(42)
np.random.seed(42)

In [None]:
# Function to clean folder names
def clean_folder_name(folder_name):
    # Remove invalid characters
    cleaned_name = re.sub(r'[<>:"/\\|?*]', '', folder_name)
    # Remove trailing dots and spaces
    cleaned_name = cleaned_name.rstrip('. ')
    return cleaned_name


def CPU_monitor_memory_usage():
    memory_info = psutil.virtual_memory()
    memory_usage = memory_info.percent
        
    print(f"CPU Current memory usage: {memory_usage}%")

    if memory_usage >= 95:
        print("CPU Memory usage is too high. Pausing execution...")
        gc.collect()  # Trigger garbage collection manually
        while memory_usage > 30:
            time.sleep(10)
            memory_info = psutil.virtual_memory()
            memory_usage = memory_info.percent
        print("CPU Memory usage is low enough. Resuming execution...")

    # time.sleep(5)

def monitor_gpu_memory():
    # Initialize NVML
    pynvml.nvmlInit()
    
    try:
        # Get handle for the first GPU
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)

        # Get memory info
        mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        total_memory = mem_info.total
        used_memory = mem_info.used

        # Calculate the percentage of GPU memory used
        memory_usage = (used_memory / total_memory) * 100
        print(f"Current GPU memory usage: {memory_usage:.2f}%")

        # Check if memory usage is too high
        if memory_usage >= 95:
            print("GPU memory usage is too high. Pausing execution...")
            while memory_usage > 30:
                time.sleep(10)
                mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
                used_memory = mem_info.used
                memory_usage = (used_memory / total_memory) * 100
            print("GPU memory usage is low enough. Resuming execution...")

    finally:
        # Clean up
        pynvml.nvmlShutdown()

In [None]:
def classification_report_to_df(report, y_true, y_pred):
    global bch_class_df
    global topic_dict
    df = pd.DataFrame(report).transpose()

    order_labels = list(topic_dict.values())

    # Calculate the confusion matrix
    labels = df.index[:-3]  # Exclude 'accuracy', 'macro avg', 'weighted avg'
    # Calculate the confusion matrix
    cm = confusion_matrix(y_true, y_pred, labels=labels)

    # Extracting TP, FP, TN, FN for each class
    TP = cm.diagonal()
    FP = cm.sum(axis=0) - TP
    FN = cm.sum(axis=1) - TP
    TN = cm.sum() - (FP + FN + TP)

    sens = sum(TP) / (sum(TP)+sum(FN))
    spec = sum(TN) / (sum(TN)+sum(FP))
    
    # Calculate Sensitivity (same as recall)
    df['Sensitivity'] = df['recall']
    
    # Calculate Specificity
    tn = cm.sum() - (cm.sum(axis=0) + cm.sum(axis=1) - np.diag(cm))
    fp = cm.sum(axis=0) - np.diag(cm)
    specificity = tn / (tn + fp)
    
    # Assign computed specificity to dataframe except for the last three rows
    df.loc[df.index[:-3], 'Specificity'] = specificity
    
    # Handling special cases
    # Set 'accuracy' row sensitivity and specificity to the accuracy value
    accuracy = df.loc['accuracy', 'precision']  # assuming 'precision' contains the accuracy
    df.loc['accuracy', ['Sensitivity', 'Specificity']] = sens, spec
    
    # Calculate 'macro avg' and 'weighted avg' for sensitivity and specificity
    df.loc['macro avg', 'Sensitivity'] = df.iloc[:-3]['Sensitivity'].mean()
    df.loc['weighted avg', 'Sensitivity'] = np.average(df.iloc[:-3]['Sensitivity'], weights=df.iloc[:-3]['support'])
    
    df.loc['macro avg', 'Specificity'] = df.iloc[:-3]['Specificity'].mean()
    df.loc['weighted avg', 'Specificity'] = np.average(df.iloc[:-3]['Specificity'], weights=df.iloc[:-3]['support'])

    # Calculate Balanced Accuracy for each row, including special averages
    df['Balanced Accuracy'] = (df['Sensitivity'] + df['Specificity']) / 2

    df.loc['accuracy', 'precision'] = sum(TP) / (sum(TP) + sum(FP))
    df.loc['accuracy', 'recall'] = sum(TP) / (sum(TP) + sum(FN))
    df.loc['accuracy', 'f1-score'] = 2* sum(TP) / (2 * sum(TP) + sum(FP) + sum(FN))

    columns_to_drop = [col for col in ['TP', 'FP', 'TN', 'FN'] if col in bch_class_df.columns]
    bch_class_df_noFr = bch_class_df.drop(columns=columns_to_drop)

    diff_df = df - bch_class_df_noFr
    # Renaming columns for clarity
    diff_df.columns = ['Diff ' + col for col in diff_df.columns]

    # Concatenating the original dataframe with the differences
    combined_df = pd.concat([df, diff_df], axis=1)

    class_accuracy = cm.diagonal() / cm.sum(axis=1)
    combined_df.loc[labels, 'Accuracy'] = class_accuracy
    # Copying f1-score to 'Accuracy' for the last three rows
    combined_df.loc[['accuracy', 'macro avg', 'weighted avg'], 'Accuracy'] = combined_df.loc[['accuracy', 'macro avg', 'weighted avg'], 'f1-score']

    # Calculate and append TP, FP, TN, FN metrics
    metrics_df = pd.DataFrame({
        "TP": TP,
        "FP": FP,
        "TN": TN,
        "FN": FN
    }, index=labels)

    # Merge the new metrics into the existing DataFrame
    combined_df = combined_df.merge(metrics_df, left_index=True, right_index=True, how='left')

    # Reorder DataFrame based on specified order labels
    combined_df = combined_df.reindex(order_labels + ['accuracy', 'macro avg', 'weighted avg'])

    return combined_df

In [None]:
def segment_retraining(data_syn, individual_Segment_dict):
    global dots_mode
    global history_segments_dict
    global X_train_r
    global Y_train_r
    global X_test_re
    global Y_test_re
    global catboost_params
    global sum_GPU_seconds
    global total_gpu_seconds
    global GPU_limit

    global topic_name
    global topic_number
    global random_results_path
    global random_results_name
    global fold_results_df

    CPU_monitor_memory_usage()
    monitor_gpu_memory()

    if dots_mode == "False":
        syn_original_list = individual_Segment_dict["red_dots_list"]
    elif dots_mode == "Both":
        syn_original_list = individual_Segment_dict["red_dots_list"] + individual_Segment_dict["blue_dots_list"]

    segment_key = (topic_name, topic_number, frozenset(syn_original_list))

    if len(syn_original_list) == 0:
        individual_Segment_dict['model'] = None
        individual_Segment_dict['true_labels'] = None
        individual_Segment_dict['predicted_labels'] = None
        individual_Segment_dict['classification_df'] = None
        individual_Segment_dict['fitness_score'] = (None, None)
        individual_Segment_dict['number_of_syn_sample'] = None
        individual_Segment_dict['retraining_time'] = None
        individual_Segment_dict['retrained_dots_list'] = []
        return individual_Segment_dict

    for previous_segment_key, previous_Segment_dict in history_segments_dict.items():
        if syn_original_list == previous_Segment_dict['retrained_dots_list']:
            individual_Segment_dict['model'] = previous_Segment_dict['model']
            individual_Segment_dict['true_labels'] = previous_Segment_dict['true_labels']
            individual_Segment_dict['predicted_labels'] = previous_Segment_dict['predicted_labels']
            individual_Segment_dict['classification_df'] = previous_Segment_dict['classification_df']
            individual_Segment_dict['fitness_score'] = previous_Segment_dict['fitness_score']
            individual_Segment_dict['number_of_syn_sample'] = previous_Segment_dict['number_of_syn_sample']
            individual_Segment_dict['retraining_time'] = previous_Segment_dict['retraining_time']
            individual_Segment_dict['retrained_dots_list'] = syn_original_list
            return individual_Segment_dict
    
    filtered_syn_df = data_syn[data_syn['index_meta'].isin(syn_original_list)]

    X_train_re = pd.concat([X_train_r, filtered_syn_df.drop(columns=['topic_name'])])
    Y_train_re = pd.concat([Y_train_r, filtered_syn_df['topic_name']])

    train_pool_re = Pool(
        X_train_re[["text", "area_TEIS"]],
        Y_train_re,
        text_features=["text"],
        cat_features=["area_TEIS"]
    )
    valid_pool_re = Pool(
        X_test_re[["text", "area_TEIS"]],
        Y_test_re,
        text_features=["text"],
        cat_features=["area_TEIS"]
    )

    catboost_params = catboost_params
            
    # Model Training
    model_re = CatBoostClassifier(**catboost_params)
    start_time = time.time()  # Start timing
    model_re.fit(train_pool_re, eval_set=valid_pool_re)
    training_time = time.time() - start_time  # End timing

    sum_GPU_seconds += training_time
    if sum_GPU_seconds >= total_gpu_seconds:
        GPU_limit = True

    # Save the retrain performances
    predictions = model_re.predict(X_test_re[["text", "area_TEIS"]])
    accuracy = accuracy_score(Y_test_re, predictions)
    report = classification_report(Y_test_re, predictions, digits=3, output_dict=True)
    classification_df = classification_report_to_df(report, Y_test_re, predictions)
    
    fitness_score = (accuracy, classification_df.loc[segment_key[0], 'recall'])
            
    # Save the trained model, classification_df, and fitness_score
    individual_Segment_dict['model'] = model_re
    individual_Segment_dict['true_labels'] = []  # Convert to list if Y_test_re is a pandas Series or numpy array
    individual_Segment_dict['predicted_labels'] = []  # Convert to list for consistency
    individual_Segment_dict['classification_df'] = classification_df
    individual_Segment_dict['fitness_score'] = fitness_score
    individual_Segment_dict['number_of_syn_sample'] = len(filtered_syn_df)
    individual_Segment_dict['retraining_time'] = training_time  # Save the training time
    individual_Segment_dict['retrained_dots_list'] = syn_original_list

    class_DF_path = f'{random_results_path}/Class_DF'
    os.makedirs(class_DF_path, exist_ok=True)

    new_row_index = len(fold_results_df)
    new_ParamCV_row = {
            "topic_name": topic_name,
            "topic_number": topic_number,
            'fitness_score': fitness_score,
            "accuracy": fitness_score[0],
            "topic_recall": fitness_score[1],
            'balanced_fitness_score': (classification_df.loc['accuracy', 'Balanced Accuracy'], classification_df.loc[topic_name, 'Balanced Accuracy']),
            'overall_balanced_accuracy': classification_df.loc['accuracy', 'Balanced Accuracy'],
            'topic_balanced_accuracy': classification_df.loc[topic_name, 'Balanced Accuracy'],
            'balanced_acc_rec_score': (classification_df.loc[topic_name, 'Balanced Accuracy'], classification_df.loc[topic_name, 'recall']),
            'topic_F1': classification_df.loc[topic_name, 'f1-score'],
            'overall_F1': classification_df.loc['accuracy', 'f1-score'],
            'overall_recall': classification_df.loc['accuracy', 'recall'],
            "retraining_time": training_time,
            "number_of_syn_sample": len(filtered_syn_df),
            "retrained_dots_list": syn_original_list,
            'true_labels': [],
            'predicted_labels': [],
            "segment_key": (topic_name, topic_number, frozenset(syn_original_list)),
            'classDF_path': f'{class_DF_path}/{topic_number}_Rand-Bch_{new_row_index}_ClassDF.csv'
        }
    
    classification_df.to_csv(f'{class_DF_path}/{topic_number}_Rand-Bch_{new_row_index}_ClassDF.csv', index=True)
    classification_df.to_pickle(f'{class_DF_path}/{topic_number}_Rand-Bch_{new_row_index}_ClassDF.pkl')

    # Convert the dictionary to a DataFrame for a single row
    new_ParamCV_row_df = pd.DataFrame([new_ParamCV_row])
    # Concatenate this new row DataFrame to the existing DataFrame
    fold_results_df = pd.concat([fold_results_df, new_ParamCV_row_df], ignore_index=True)
    fold_results_df.to_csv(f'{random_results_path}/{random_results_name}_AllEval.csv', index=False)
    fold_results_df.to_pickle(f'{random_results_path}/{random_results_name}_AllEval.pkl')

    if individual_Segment_dict['fitness_score'] != (None, None):
        history_segments_dict[segment_key] = individual_Segment_dict
        print(individual_Segment_dict['fitness_score'])

    return individual_Segment_dict

In [None]:
def create_dots_lists(class_data_pool, train_PCA_YZ_df, topic_name, iteration, base_seed=42):
    """
    Randomly select index_meta values and classify them into blue/red dots lists based on topic prediction.
    Different random selections for each iteration while maintaining reproducibility.
    
    Parameters:
    class_data_pool: DataFrame with 'index_meta' column for the specific topic
    train_PCA_YZ_df: DataFrame with 'topic_name' and 'pred_topic_name' columns
    topic_name: String of the current topic name
    iteration: Current iteration number (used to generate different but reproducible selections)
    base_seed: Base random seed for reproducibility
    
    Returns:
    dict: Contains 'blue_dots_list' and 'red_dots_list'
    """
    # Generate a new seed for this iteration that's dependent on both base_seed and iteration
    current_seed = base_seed + iteration
    
    # Set random seed for reproducibility
    random.seed(current_seed)
    np.random.seed(current_seed)
    
    # Get list of all possible index_meta values
    index_meta_pool = class_data_pool['index_meta'].tolist()
    
    # Randomly decide how many items to select (between 1 and length of pool)
    # Using a different seed derivative for the size selection
    random.seed(current_seed + 1000)  # Different seed for size selection
    num_selections = random.randint(1, len(index_meta_pool))
    
    # Reset seed for the actual selection
    random.seed(current_seed)
    np.random.seed(current_seed)
    
    # Randomly select index_meta values (with replacement allowed)
    selected_indices = np.random.choice(index_meta_pool, 
                                      size=num_selections, 
                                      replace=True).tolist()
    
    # Initialize dictionary for results
    individual_Segment_dict = {
        "blue_dots_list": [],
        "red_dots_list": []
    }
    
    # Classify each selected index_meta
    for index_meta in selected_indices:
        # Find the corresponding row in train_PCA_YZ_df
        matching_row = train_PCA_YZ_df[train_PCA_YZ_df['index_meta'] == index_meta].iloc[0]
        
        # Check if predicted topic matches actual topic
        if matching_row['topic_name'] == matching_row['pred_topic_name']:
            individual_Segment_dict["blue_dots_list"].append(index_meta)
        else:
            individual_Segment_dict["red_dots_list"].append(index_meta)
    
    return individual_Segment_dict

In [None]:
def dominates(score1, score2):
    """
    Determines if one score dominates another.
    A score1 dominates score2 if it is better in all the objectives or equal in some and better in at least one.
    """
    return (score1[0] > score2[0] and score1[1] >= score2[1]) or (score1[0] >= score2[0] and score1[1] > score2[1])

def find_pareto_front(df):
    """
    Marks rows as 'Yes' if they are on the Pareto front, 'No' otherwise.
    """
    df = df.copy()  # Copy DataFrame to avoid modifying the original
    df['Pareto'] = 'No'  # Initialize the Pareto column with 'No'
    
    scores = df['balanced_acc_rec_score'].tolist()
    is_pareto = np.ones(len(scores), dtype=bool)  # Initialize all as True
    
    for i1 in range(len(scores)):
        for i2 in range(len(scores)):
            if i1 != i2 and dominates(scores[i2], scores[i1]):
                is_pareto[i1] = False
                break

    # Update the 'Pareto' column based on the Pareto front
    df.loc[is_pareto, 'Pareto'] = 'Yes'
    
    return df

def find_best_values(df):
    # Identify the maximum values for each specified column
    max_values = {
        'accuracy': df['accuracy'].max(),
        'topic_recall': df['topic_recall'].max(),
        'overall_balanced_accuracy': df['overall_balanced_accuracy'].max(),
        'topic_balanced_accuracy': df['topic_balanced_accuracy'].max(),
        'topic_F1': df['topic_F1'].max(),
        'overall_F1': df['overall_F1'].max(),
        'overall_recall': df['overall_recall'].max()
    }
    
    # Function to apply to each row to determine the best columns
    def check_best(row):
        return [col for col, max_val in max_values.items() if row[col] == max_val]

    # Apply the function to each row
    df['best'] = df.apply(check_best, axis=1)
    
    return df

def find_best_values(df):
    # Identify the maximum values for each specified column
    max_values = {
        'accuracy': df['accuracy'].max(),
        'topic_recall': df['topic_recall'].max(),
        'overall_balanced_accuracy': df['overall_balanced_accuracy'].max(),
        'topic_balanced_accuracy': df['topic_balanced_accuracy'].max(),
        'topic_F1': df['topic_F1'].max(),
        'overall_F1': df['overall_F1'].max(),
        'overall_recall': df['overall_recall'].max()
    }
    
    # Function to apply to each row to determine the best columns
    def check_best(row):
        return [col for col, max_val in max_values.items() if row[col] == max_val]

    # Apply the function to each row
    df['best'] = df.apply(check_best, axis=1)
    
    return df

In [None]:
if __name__ == "__main__":
    for topic_number in ["T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9", "T10", "T11", "T12", "T13", "T14", "T15"]:
        fold_results_df = pd.DataFrame()

        history_segments_dict = {}

        # Load Data
        data = pd.read_csv(f'D:/AutoGeTS/Data/tickets_topics.csv',lineterminator='\n')
        data_topic = data.dropna().reset_index()
        data_topic = data_topic.rename(columns={'index': 'index_meta'})

        X_train_r_both, X_test_re, Y_train_r_both, Y_test_re = train_test_split(data_topic, data_topic.topic_name, test_size = 0.2,random_state = 42)
        
        # Further split the training set to create a validation set
        X_train_r, X_test_re_Test, Y_train_r, Y_test_re_Test = train_test_split(
            X_train_r_both, 
            Y_train_r_both, 
            test_size=0.2,  # 20% of the initial training set, which is 16% of the original data
            random_state=42
        )

        bch_class_df = pd.read_pickle("D:/AutoGeTS/Models_and_Performances/Benchmark_M0_Classdf_0.pkl")

        train_PCA_YZ_df = pd.read_pickle("D:/AutoGeTS/Data/Train_PCA_YZ_withPred_0.pkl")
        # train_PCA_YZ_df = train_PCA_YZ_df.rename(columns={'index': 'index_meta'})
        pca_columns = [col for col in train_PCA_YZ_df.columns if 'PCA_' in col]
        pca_pairs = list(itertools.combinations(pca_columns, 2))

        topic_dict = {"T1": "IT support and assistance.","T2": "Account activation and access issues.","T3": "Password and device security.",
                    "T4": "Printer issues and troubleshooting.","T5": "HP Dock connectivity issues.","T6": "Employee documentation and errors.",
                    "T7": "\"Access and login issues\"","T8": "Opening and managing files/devices.","T9": "Mobile email and VPN setup.",
                    "T10": "IT support and communication.","T11": "Error handling in RPG programming.","T12": "Email security and attachments.",
                    "T13": "Humanitarian aid for Ukraine.","T14": "Internet connectivity issues in offices.","T15": "Improving integration with Infojobs."}

        """Parameters and Input Section ----------------"""
        # topic_number = "T13"
        syn_number = 1

        dots_mode = "Both"

        gpu_hours = 1
        total_gpu_seconds = gpu_hours * 60 * 60

        data_syn_raw = pd.read_pickle(f'D:/AutoGeTS/Synthetic_Data/{topic_number}-synthesis-{syn_number}.pkl')
        # # Added synthetic data path
        # if topic_number in ["T1", "T2"]:
        #     data_syn_raw = pd.read_pickle(f'D:/AutoGeTS/Synthetic_Data/{topic_number}-synthesis-{syn_number}.pkl')
        # else:
        #     data_syn_raw = pd.read_csv(f'D:/AutoGeTS/Synthetic_Data/{topic_number}-synthesis-{syn_number}.csv',lineterminator='\n')
        data_syn = data_syn_raw[["index_meta", "text", 'topic_name', "sample", "area_TEIS"]].dropna()

        used_pca_pairs = pca_pairs # pca_pairs, [("PCA_0", "PCA_5")]

        random_results_path = f"D:/AutoGeTS/LLM_1GPUh/Random_Bch_Results/GPU{gpu_hours}h"
        os.makedirs(random_results_path, exist_ok=True)
        random_results_name = f"Random-Bch_{topic_number}_Mode{dots_mode}"

        catboost_params = {'iterations': 300, 'learning_rate': 0.2, 'depth': 8, 'l2_leaf_reg': 1, 
                            'bagging_temperature': 1, 'random_strength': 1, 'border_count': 254, 
                            'eval_metric': 'TotalF1', 'task_type': 'GPU', 'early_stopping_rounds': 20, 'use_best_model': True, 'verbose': 0, 'random_seed': 2}

        """------------------------------------------------"""
        topic_name = topic_dict[topic_number]
        clean_topic_name = clean_folder_name(topic_name)

        class_data_pool = X_train_r[X_train_r['topic_name'] == topic_name]
        class_index_meta_pool = class_data_pool['index_meta'].tolist()

        sum_GPU_seconds = 0
        GPU_limit = False

        # Initialize iteration counter before the loop
        iteration = 0

        while GPU_limit is False:
            individual_Segment_dict = create_dots_lists(class_data_pool, train_PCA_YZ_df, topic_name, iteration, base_seed=42)

            individual_random_bch_dict = segment_retraining(data_syn, individual_Segment_dict)
            iteration += 1

        if GPU_limit == True:
            fold_results_df  = find_pareto_front(fold_results_df)
            fold_results_df = find_best_values(fold_results_df)
            fold_results_df.to_csv(f'{random_results_path}/RandBchSegs_{random_results_name}.csv', index=False)
            fold_results_df.to_pickle(f'{random_results_path}/RandBchSegs_{random_results_name}.pkl')
            # break
            df = find_best_values(fold_results_df)

            bch_topic_recall = bch_class_df.loc[topic_name, 'recall']
            bch_topic_balanced_accuracy = bch_class_df.loc[topic_name, 'Balanced Accuracy']
            bch_overall_balanced_accuracy = bch_class_df.loc['accuracy', 'Balanced Accuracy']
            bch_overall_F1_score = bch_class_df.loc['accuracy', 'f1-score']

            # Calculate improvements
            df['imp_topic_recall'] = df['topic_recall'] - bch_topic_recall
            df['imp_topic_balanced_accuracy'] = df['topic_balanced_accuracy'] - bch_topic_balanced_accuracy
            df['imp_overall_balanced_accuracy'] = df['overall_balanced_accuracy'] - bch_overall_balanced_accuracy
            df['imp_overall_F1'] = df['overall_F1'] - bch_overall_F1_score

            # Calculate cumulative retraining_time
            df['cumulative_time'] = df['retraining_time'].cumsum()

            # Calculate max and average improvements
            df['max_topic_recall_imp'] = df[['imp_topic_recall']].max(axis=1).cummax()
            df['average_topic_recall_imp'] = df[['imp_topic_recall']].mean(axis=1).expanding().mean()

            df['max_topic_balanced_acc_imp'] = df[['imp_topic_balanced_accuracy']].max(axis=1).cummax()
            df['average_topic_balanced_acc_imp'] = df[['imp_topic_balanced_accuracy']].mean(axis=1).expanding().mean()

            df['max_overall_balanced_acc_imp'] = df[['imp_overall_balanced_accuracy']].max(axis=1).cummax()
            df['average_overall_balanced_acc_imp'] = df[['imp_overall_balanced_accuracy']].mean(axis=1).expanding().mean()

            df['max_overall_F1_improvement'] = df[['imp_overall_F1']].max(axis=1).cummax()
            df['average_overall_F1_improvement'] = df[['imp_overall_F1']].mean(axis=1).expanding().mean()

            # Calculate percentage improvements
            def calculate_percentage_improvement(row):
                topic_name = row['topic_name']
                topic_benchmark = bch_class_df.loc[topic_name, "Balanced Accuracy"]
                topic_recall_benchmark = bch_class_df.loc[topic_name, "recall"]
                overall_benchmark = bch_class_df.loc["accuracy", "Balanced Accuracy"]
                overall_f1_benchmark = bch_class_df.loc["accuracy", "f1-score"]
                
                row['imp_topic_balanced_accuracy_pct'] = (row['imp_topic_balanced_accuracy'] / topic_benchmark) * 100
                row['imp_overall_balanced_accuracy_pct'] = (row['imp_overall_balanced_accuracy'] / overall_benchmark) * 100
                row['imp_topic_recall_pct'] = (row['imp_topic_recall'] / topic_recall_benchmark) * 100
                row['imp_overall_F1_pct'] = (row['imp_overall_F1'] / overall_f1_benchmark) * 100

                row['max_imp_topic_balanced_accuracy_pct'] = (row['max_topic_balanced_acc_imp'] / topic_benchmark) * 100
                row['max_imp_overall_balanced_accuracy_pct'] = (row['max_overall_balanced_acc_imp'] / overall_benchmark) * 100
                row['max_imp_topic_recall_pct'] = (row['max_topic_recall_imp'] / topic_recall_benchmark) * 100
                row['max_imp_overall_F1_pct'] = (row['max_overall_F1_improvement'] / overall_f1_benchmark) * 100
                
                return row

            df = df.apply(calculate_percentage_improvement, axis=1)

            df.to_csv(f'{random_results_path}/Processed_{random_results_name}_AllEval.csv', index=False)
            df.to_pickle(f'{random_results_path}/Processed_{random_results_name}_AllEval.pkl')