<a href="https://colab.research.google.com/github/ChrisBagdon/Citation_Classification/blob/main/eval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

In [8]:
def evaluate(predictions, gold_standard):
    # Collect all unique labels from predictions and gold_std
    labels_set = set(predictions + gold_standard)
    labels = {}
    for i, label in enumerate(labels_set):
        labels[label] = i
    # Create confusion matrix
    confusion_matrix = np.zeros((len(labels_set),len(labels_set)))
    for pred, gold in zip(predictions, gold_standard):
        confusion_matrix[labels[pred]][labels[gold]] += 1
    labels_index = list(labels_set); labels_index.append('overall')
    columns = []
    # Create scores table
    scores = pd.DataFrame(np.zeros((len(labels_set), 3)))
    scores.columns = ['Precision', 'Recall', 'F1']
    overall_TP = 0
    # Calculate P, R, F1 and populate scores table
    for label in labels_set:
        i = labels[label]
        scores['Precision'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=0)[i]
        scores['Recall'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=1)[i]
        # Possible error case: P == 0 == R; divide by 0
        if scores['Precision'][i] == 0 and scores['Recall'][i] == 0:
            scores['F1'][i] = 0
        else:
            scores['F1'][i] = 2 * (scores['Precision'][i]*scores['Recall'][i]/(scores['Precision'][i]+scores['Recall'][i]))
        overall_TP += confusion_matrix[i][i]
    scores.loc[len(labels_set)] = [overall_TP / np.sum(confusion_matrix)] * 3
    scores.index = labels_index
    return (confusion_matrix, scores)

In [3]:
### Saves a scores DataFrame labeled by which model and feature values were used as a CSV
# Inputs: scores DataFrame, model_name string, feature_names list
# Outputs: model_name_feature_names CSV file
def save_scores(scores, model_name, feature_names):
    # Create full filepath and pass to df.to_csv
    output_directory = 'output_files_CSV/'
    filename = model_name
    for feature in feature_names:
        filename += '_' + feature
    filepath = output_directory + filename
    scores.to_csv(filepath)

In [13]:
### Testing evaluate methods
Y = ['red','red','green','green','blue','blue']

# Prediction-Truth 100% case
X_100 = ['red','red','green','green','blue','blue']
test_100_confusionMatrix, test_100_scores = evaluate(X_100, Y)
print(test_100_scores)

# Prediction-Truth 0% case
X_0 = ['green', 'green', 'blue', 'blue', 'red', 'red']
test_0_confusionMatrix, test_0_scores = evaluate(X_0, Y)
print(test_0_scores)

# Prediction-Truth 50% case
X_50 = ['red', 'green', 'blue', 'green', 'red', 'blue']
test_50_confusionMatrix, test_50_scores = evaluate(X_50, Y)
print(test_50_scores)

# Prediction-Truth varying labels
X_diffLabels = ['asdf', 'red', '1234', 'green', '!@#$', 'blue']
test_diffLabels_confusionMatrix, test_diffLabels_scores = evaluate(X_diffLabels, Y)
print(test_diffLabels_scores)

         Precision  Recall   F1
blue           1.0     1.0  1.0
green          1.0     1.0  1.0
red            1.0     1.0  1.0
overall        1.0     1.0  1.0
         Precision  Recall   F1
red            0.0     0.0  0.0
blue           0.0     0.0  0.0
green          0.0     0.0  0.0
overall        0.0     0.0  0.0
         Precision  Recall   F1
blue           0.5     0.5  0.5
green          0.5     0.5  0.5
red            0.5     0.5  0.5
overall        0.5     0.5  0.5
         Precision  Recall        F1
asdf           NaN     0.0       NaN
blue           0.5     1.0  0.666667
!@#$           NaN     0.0       NaN
1234           NaN     0.0       NaN
green          0.5     1.0  0.666667
red            0.5     1.0  0.666667
overall        0.5     0.5  0.500000


  scores['Precision'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=0)[i]


In [14]:
print(test_diffLabels_confusionMatrix)

[[0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]]


In [None]:
# Test save_scores method with three cases above
