# Evaluate predictions
This notebook serves as a general evaluation of the predictions made on the test dataset.
The results will be added to the evaluation file with the given NAME as index.

Constants to change:
- PRED_FILE : the name of the csv containing the predictions
- EVAL_FILE : the name of the csv to save the evaluation
- NAME : the name of the evaluation (used as index in the evaluation file)


The prediction file should have the following columns:
- contributor
- true_bot (Human, Bot)
- predicted_bot (Human, Bot)

We will compare the predictions made with the real labels and compute the following metrics:
- recall
- precision
- weighted f1-score
- accuracy
- roc-auc
- num_contributors
- tn_fp_fn_tp


In [36]:
import pandas as pd

from sklearn.metrics import confusion_matrix, make_scorer, precision_recall_fscore_support, classification_report
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_auc_score
from sklearn.metrics import precision_recall_curve, auc


## Parameters
The metrics will be added at the end the given evaluation file. The "NAME" variable is used to identify the evaluation in the file.

In [37]:
RESOURCE_FOLDER = '../resources'
DATA_FOLDER = f'{RESOURCE_FOLDER}/data'
EVAL_FOLDER = f'{RESOURCE_FOLDER}/evals'

PRED_FILE = 'bimbas2_predictions.csv'
EVAL_FILE = 'evaluation.csv'
NAME = 'bimbas2'

# 1 - Load data

## 1.1 - Load predictions

In [38]:
df_prediction = pd.read_csv(f"{EVAL_FOLDER}/{PRED_FILE}", index_col=0)
df_prediction.drop(columns=['confidence'], inplace=True)
display(df_prediction.head())



Unnamed: 0_level_0,true_bot,predicted_bot
contributor,Unnamed: 1_level_1,Unnamed: 2_level_1
otc-zuul[bot],Bot,Bot
ljharb,Human,Human
juancarlospaco,Human,Human
tldr-bot,Bot,Bot
jakirkham,Human,Human


Remove the unknown (< 5 events) and invalid (not found) contributors.

In [39]:
# Remove the 'Unknown' and Invalid types
df_prediction = df_prediction[df_prediction['predicted_bot'] != 'Unknown']
df_prediction = df_prediction[df_prediction['predicted_bot'] != 'Invalid']

Convert Bot and Human to 1 and 0 respectively.

In [40]:
contributor_map = {
    'Bot': 1,
    'Human': 0
}

df_prediction['predicted_bot'] = df_prediction['predicted_bot'].map(contributor_map)

# Set column type to int
df_prediction['predicted_bot'] = df_prediction['predicted_bot'].astype(int)
df_prediction.head()

Unnamed: 0_level_0,true_bot,predicted_bot
contributor,Unnamed: 1_level_1,Unnamed: 2_level_1
otc-zuul[bot],Bot,1
ljharb,Human,0
juancarlospaco,Human,0
tldr-bot,Bot,1
jakirkham,Human,0


## 1.2 - Load labels

In [41]:
df_prediction['true_bot'] = df_prediction['true_bot'].map(contributor_map)
df_prediction['true_bot'] = df_prediction['true_bot'].astype(int)
df_prediction.head()

Unnamed: 0_level_0,true_bot,predicted_bot
contributor,Unnamed: 1_level_1,Unnamed: 2_level_1
otc-zuul[bot],1,1
ljharb,0,0
juancarlospaco,0,0
tldr-bot,1,1
jakirkham,0,0


# 2 - Evaluate predictions

## 2.1 - Define the metrics

In [42]:
def evaluate_model(y_pred, y_true, target_names):
    """
    Compute the classification metrics and return them as a DataFrame. The accuracy is also returned.
    :return: report_df and the accuracy.
    """
    report = classification_report(y_true, y_pred, target_names=target_names, output_dict=True)
    return {
        'precision': [report['weighted avg']['precision']],
        'bot_precision': [report['Bot']['precision']],
        'human_precision': [report['Human']['precision']],
        'recall': [report['weighted avg']['recall']],
        'bot_recall': [report['Bot']['recall']],
        'human_recall': [report['Human']['recall']],
        'weighted_f1': [report['weighted avg']['f1-score']],
        'accuracy': [report['accuracy']],
        'roc_auc': [roc_auc_score(y_true, y_pred)],
        'tn_fp_fn_tp': [confusion_matrix(y_true, y_pred).ravel()],
        'num_contrib': [df_prediction.index.nunique()],
    }

## 2.2 - Evaluate predictions

In [43]:
y_pred = df_prediction['predicted_bot']
y_test = df_prediction['true_bot']


performance = pd.DataFrame.from_dict(evaluate_model(y_pred, y_test, ['Human', 'Bot']))
performance.index = [NAME]


In [44]:
display(performance)

Unnamed: 0,precision,bot_precision,human_precision,recall,bot_recall,human_recall,weighted_f1,accuracy,tn_fp_fn_tp,num_contrib,roc_auc
bimbas2,0.90047,0.905405,0.895674,0.900393,0.890957,0.909561,0.900376,0.900393,"[352, 35, 41, 335]",763,0.900259


## 2.3 - Save the evaluation

In [45]:
df_eval = pd.read_csv(f"{EVAL_FOLDER}/{EVAL_FILE}", index_col=0)

In [46]:
# If the evaluation already exists, replace the line
if NAME in df_eval.index:
    df_eval.drop(index=NAME, inplace=True)
df_eval = pd.concat([df_eval, performance], axis=0)
df_eval.index.name = 'model'
df_eval.to_csv(f"{EVAL_FOLDER}/{EVAL_FILE}", index=True)
