In [None]:
from collections import defaultdict

def calculate_prf(matrix):
    '''
    Calculates precision, recall, and F1 score for the given
    confusion matrix. MUST be a simple 2x2 confusion matrix.
    
    Handles division by zero cases based on the following
    methodology: https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
    
    returns (precision, recall, F1 score) tuple
    '''
    if matrix['tp'] == 0:
        if matrix['fp'] == 0 and matrix['fn'] == 0:
            return 1.0, 1.0, 1.0
        if matrix['fp'] == 0 or matrix['fn'] == 0:
            return 0, 0, 0
    
    precision = matrix['tp'] / (matrix['tp'] + matrix['fp'])
    recall = matrix['tp'] / (matrix['tp'] + matrix['fn'])
    f1 = (2 * precision * recall) / (precision + recall)
    
    return precision, recall, f1

def test_model(model, training_data, test_data):
    '''
    Function to test `model` on `test_data` given `training_data`.
    `test_data` is a list of (text, label) tuples.
    
    return (micro_f1, macro_f1, confusion_matrix) tuple
    '''
    model.train(training_data)
    
    # confusion matrix to store results used for calculating precision and recall
    # confusion_matrix[gold_label][model_label] = number of occurrences
    confusion_matrix = defaultdict(lambda: defaultdict(lambda: 0))
    
    for text, gold_label in test_data:
        model_label = model.identify(text)[0][1]        
        confusion_matrix[gold_label][model_label] += 1
    
    # compute individual class matrices
    class_matrix = {}
    num_results = sum([sum(confusion_matrix[auth].values()) for auth in confusion_matrix])
    
    for auth in confusion_matrix:
        class_matrix[auth] = {
            'tp': confusion_matrix[auth][auth],
            'fp': sum([confusion_matrix[i][auth] for i in confusion_matrix if i != auth]),
            'fn': sum(confusion_matrix[auth].values()) - confusion_matrix[auth][auth]
        }
        class_matrix[auth]['tn'] = num_results - sum(class_matrix[auth].values())
        
    # compute a pooled matrix
    pooled_matrix = {
        'tp': sum([class_matrix[auth]['tp'] for auth in class_matrix]),
        'fp': sum([class_matrix[auth]['fp'] for auth in class_matrix]),
        'tn': sum([class_matrix[auth]['tn'] for auth in class_matrix]),
        'fn': sum([class_matrix[auth]['fn'] for auth in class_matrix])
    }
    
    # micro precision, recall, and F1
    micro_precision, micro_recall, micro_f1 = calculate_prf(pooled_matrix)

    # macro precision, recall, and F1
    class_precision = {}
    class_recall = {}
    
    for auth in class_matrix:
        p, r, _ = calculate_prf(class_matrix[auth])
        class_precision[auth] = p
        class_recall[auth] = r
    
    macro_precision = sum(class_precision.values()) / len(class_precision)
    macro_recall = sum(class_recall.values()) / len(class_recall)
    macro_f1 = (2 * macro_precision * macro_recall) / (macro_precision + macro_recall)
    
    # return F1 scores and confusion matrix
    return micro_f1, macro_f1, confusion_matrix

In [None]:
from models.ensemble_model import Ensemble
from models.compression_model import CompressionModel
from utils.data_reader import read_data

# initialize ensemble model
model = CompressionModel()

# read in the data
data = read_data()
# training data is everything but the iliad
# NOTE: should almost certainly change this, this is just to get us started
training_data = { auth: texts[1:] for auth, texts in data.items() }

# make test data the iliad split on different sections
test_data = []
i = 0
for auth in data:
    iliad = data[auth][0]
    for section in iliad.split('\n\n'):
        test_data.append((section, auth))

test_model(model, training_data, test_data)