### 8.Evaluation regression task

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error


#load validation results
val_class_results = pd.read_csv(r'.\Results\acsess_class_results_val.csv', sep = '\t')
#load test results
test_class_results = pd.read_csv(r'.\Results\acsess_class_results_test.csv', sep = '\t')
 

### Change according to number of examples used during training
k=0  #zero-shot
#k=1 #one-shot
#k=5 #few-shot

val_class_results.columns = test_class_results.columns.str.replace('binary ', '', regex=False)
test_class_results.columns = test_class_results.columns.str.replace('binary ', '', regex=False)
val_class_results['Baseline all 0'] = len(val_class_results) * [0]
val_class_results = val_class_results[val_class_results['relevance_manual'] == 1]
test_class_results['Baseline all 0'] = len(test_class_results) * [0]
test_class_results = test_class_results[test_class_results['relevance_manual'] == 1]

In [None]:
def calculate_metrics(df, target_column, prediction_columns):
    # Initialize a dictionary to store the results
    results = {col: {'mae': [], 'mse': [], 'rmse': []} for col in prediction_columns}

    # Loop over each fold and each prediction column
    for fold in ['fold1', 'fold2', 'fold3', 'fold4', 'fold5']:
        for col in prediction_columns:
            # Filter the data for the current fold
            fold_data = df[df['fold'] == fold]

            if fold_data.empty:
                continue

            # Get the ground truth and predictions, ignoring NaN values
            y_true = fold_data[target_column]
            y_pred = fold_data[col].dropna()
            y_true = y_true.loc[y_pred.index]

            if y_true.empty or y_pred.empty:
                continue

            # Calculate MAE, MSE, and RMSE
            mae = mean_absolute_error(y_true, y_pred)
            mse = mean_squared_error(y_true, y_pred)
            rmse = np.sqrt(mse)

            # Append the scores to the results dictionary
            results[col]['mae'].append(mae)
            results[col]['mse'].append(mse)
            results[col]['rmse'].append(rmse)

    return results

def generate_results_table(df, target_column, prediction_columns):
    # Calculate metrics
    results = calculate_metrics(df, target_column, prediction_columns)

    # Calculate averages and standard deviations for each prediction column
    mae_avg_list, mae_std_list = [], []
    mse_avg_list, mse_std_list = [], []
    rmse_avg_list, rmse_std_list = [], []

    for col in prediction_columns:
        if results[col]['mae']:
            mae_avg = np.mean(results[col]['mae'])
            mae_std = np.std(results[col]['mae'])
        else:
            mae_avg = np.nan
            mae_std = np.nan
        
        if results[col]['mse']:
            mse_avg = np.mean(results[col]['mse'])
            mse_std = np.std(results[col]['mse'])
        else:
            mse_avg = np.nan
            mse_std = np.nan
        
        if results[col]['rmse']:
            rmse_avg = np.mean(results[col]['rmse'])
            rmse_std = np.std(results[col]['rmse'])
        else:
            rmse_avg = np.nan
            rmse_std = np.nan

        mae_avg_list.append(mae_avg)
        mae_std_list.append(mae_std)
        mse_avg_list.append(mse_avg)
        mse_std_list.append(mse_std)
        rmse_avg_list.append(rmse_avg)
        rmse_std_list.append(rmse_std)

    # Create a DataFrame to store the results
    results_table = pd.DataFrame({
        'Prediction Column': prediction_columns,
        'MAE (Avg)': mae_avg_list,
        'MAE (Std)': mae_std_list,
        'MSE (Avg)': mse_avg_list,
        'MSE (Std)': mse_std_list,
        'RMSE (Avg)': rmse_avg_list,
        'RMSE (Std)': rmse_std_list
    })

    return results_table

In [None]:
# Assuming `class_results` is your dataframe and it has a column 'fold' and a target column 'relevance_manual'
target_column = 'relevance_manual'
prediction_columns = [f'k={i}' for i in range(k)] 
results_table = generate_results_table(val_class_results, 'manual_sentence_labels', ['k=0', 'k=1', 'k=2','k=3','k=4', 'k=5', 'Baseline all 0'])
print('Results regression task on validation set')
print(results_table.to_string(index=False))

In [None]:
# Assuming `class_results` is your dataframe and it has a column 'fold' and a target column 'relevance_manual'
target_column = 'relevance_manual'
prediction_columns = [f'k={i}' for i in range(k)] 
results_table = generate_results_table(test_class_results, 'manual_sentence_labels', ['k=0', 'k=1', 'k=2','k=3','k=4', 'k=5', 'Baseline all 0'])
print('Results regression task on test set')
print(results_table.to_string(index=False))