# This notebook creates confusion matrices and computes F1 scores for the FedNLP Hawk Dove project

In [38]:
import pandas as pd
import numpy as np
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score

In [39]:
def confusion_matrix_f1_score(data, reference):
    # Convert continuous data to categorical data
    data = pd.cut(data, bins=[-np.inf, 0, np.inf], labels=[0, 1])
    reference = pd.cut(reference, bins=[-np.inf, 0, np.inf], labels=[0, 1])

    # Compute confusion matrix and F1 score
    cm = confusion_matrix(reference, data)
    f1 = f1_score(reference, data)

    return cm, f1

In [40]:
gold_label = pd.read_csv('../results/Manual_Labels.csv')
statement = pd.read_csv('../results/statement_full_score.csv')
statement_fewshot = pd.read_csv('../data/statement_fewshot_mean_score.csv')

In [41]:
statement_fewshot

Unnamed: 0.1,Unnamed: 0,year,mean_score
0,0,19941115,0.500000
1,1,19950201,0.500000
2,2,19950706,-0.500000
3,3,19951219,-0.833333
4,4,19960131,0.000000
...,...,...,...
155,155,20160615,-0.420000
156,156,20160727,-0.275862
157,157,20160921,-0.296296
158,158,20161102,-0.296296


In [42]:
result = pd.DataFrame({'year':list(statement['date']),
                      'manual':list(gold_label['manual_label']),
                      'gpt4':list(statement['result'])})

In [43]:
confusion_mat = confusion_matrix_f1_score(data = result['gpt4'], reference = result['manual'])
confusion_mat

(array([[115,  11],
        [ 19,  20]]),
 0.5714285714285714)

In [46]:
def convert_date(input_date):
    try:
        # Parse the input date string
        date_obj = datetime.strptime(input_date, "%m/%d/%Y")

        # Format the date as "yyyymmdd"
        formatted_date = date_obj.strftime("%Y%m%d")

        return formatted_date
    except ValueError:
        return "Invalid date format. Please use m/d/yyyy format."

In [47]:
# gold_label['date'] = gold_label['date'].apply(lambda x: convert_date(x))

In [53]:
gold_label

Unnamed: 0,date,manual_label
0,19940204,0.5
1,19940322,0.5
2,19940418,0.5
3,19940517,0.5
4,19940816,1.0
...,...,...
160,20160615,-0.5
161,20160727,-0.5
162,20160921,-0.5
163,20161102,-0.5


In [62]:
gold_label = gold_label.rename(columns={"date": "year"})
gold_label['year'] = pd.to_numeric(gold_label['year'])

In [64]:
fewshot = statement_fewshot.merge(gold_label, on='year')

In [68]:
confusion_mat_fewshot = confusion_matrix_f1_score(data = fewshot['mean_score'], reference = fewshot['manual_label'])
confusion_mat_fewshot

(array([[123,   3],
        [ 19,  15]]),
 0.576923076923077)

In [69]:
confusion_mat

(array([[115,  11],
        [ 19,  20]]),
 0.5714285714285714)