In [None]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import vjp.preprocess as preprocess
from vjp.zero_shot import OpenAiClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
DF_FILENAME = 'connected_components.parquet'
if os.path.exists(DF_FILENAME):
    print(f'Reading from {DF_FILENAME}...')
    df = pd.read_parquet(DF_FILENAME)
else:           # Generate on the go
    print('File not found, generating dataframe...')
    namespace = preprocess.Namespace()
    namespace.connected_component_tags = ('req', 'arg', 'claim', 'mot', 'dec')
    namespace.use_child_text_tag_names = ('mot', 'dec')
    namespace.level = preprocess.PreprocessingLevels.CONNECTED_COMPONENTS
    df = preprocess.preprocess(namespace)

df.head()

In [None]:
inputs = [
    ['fact', 'req', 'arg', 'claim'],
    ['fact', 'req', 'arg', 'claim', 'mot'],
    ['fact', 'req', 'arg', 'claim', 'dec'],
    ['fact', 'req', 'arg', 'claim', 'mot', 'dec']
]
api_key = os.environ["OPENAI_API_KEY"]
print(f"API KEY: {api_key}")
results = [dict() for _ in inputs]
for features, result in zip(inputs, results):
    zero_shot_classifier = OpenAiClassifier(api_key, features=features)
    predicted = zero_shot_classifier.predict(df.to_dict('records'))
    cm = confusion_matrix(df['label'].tolist(), predicted)

    result['tp'] = cm[0][0]
    result['fp'] = cm[0][1]
    result['tn'] = cm[1][1]
    result['fn'] = cm[1][0]

    print(result)

In [None]:
results

In [None]:
cm = confusion_matrix([1,0,0,1], [1,0,0,0])
print(cm)
disp = ConfusionMatrixDisplay(confusion_matrix=cm).plot()