<a href="https://colab.research.google.com/github/ChrisBagdon/Citation_Classification/blob/main/eval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
def evaluate(predictions, gold_standard):
    labels_set = set(gold_standard)
    labels = {}
    for i, label in enumerate(labels_set):
        labels[label] = i
    confusion_matrix = np.zeros((len(labels_set),len(labels_set)))
    for pred, gold in zip(predictions, gold_standard):
        confusion_matrix[labels[pred]][labels[gold]] += 1
    labels_index = list(labels_set); labels_index.append('overall')
    columns = []
    scores = pd.DataFrame(np.zeros((len(labels_set), 3)))
    scores.columns = ['Precision', 'Recall', 'F1']
    overall_TP = 0
    for label in labels_set:
        i = labels[label]
        scores['Precision'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=0)[i]
        scores['Recall'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=1)[i]
        if scores['Precision'][i] == 0 and scores['Recall'][i] == 0:
            scores['F1'][i] = 0
        else:
            scores['F1'][i] = 2 * (scores['Precision'][i]*scores['Recall'][i]/(scores['Precision'][i]+scores['Recall'][i]))
        overall_TP += confusion_matrix[i][i]
    scores.loc[len(labels_set)] = [overall_TP / np.sum(confusion_matrix)] * 3
    scores.index = labels_index
    return (confusion_matrix, scores)

In [None]:
### Saves a scores DataFrame labeled by which model and feature values were used as a CSV
# Inputs: scores DataFrame, model_name string, feature_names list
# Outputs: model_name_feature_names CSV file
def save_scores(scores, model_name, feature_names):
    output_directory = 'output_files_CSV/'
    filename = model_name
    for feature in feature_names:
        filename += '_' + feature
    filepath = output_directory + filename
    scores.to_csv(filepath)

In [None]:
X_100 = ['red','red','green','green','blue','blue']
X_0 = ['green', 'green', 'blue', 'blue', 'red', 'red']
X_50 = ['red', 'green', 'blue', 'green', 'red', 'blue']
Y = ['red','red','green','green','blue','blue']

In [None]:
### Prediction-Truth 100% case
print(evaluate(X_100,Y)[1])

In [None]:
### Prediction-Truth 0% case
print(evaluate(X_0, Y)[1])

In [None]:
### Prediction-Truth 50% case
print(evaluate(X_50, Y)[1])