In [11]:
import pandas as pd, os, numpy as np
from sklearn.metrics import f1_score, accuracy_score
from transformers import pipeline

In [2]:
# Load the trained model into Hugging Face pipeline
model_path = 'bias results/supervised_stance_model'
classifier = pipeline('text-classification', model=model_path, tokenizer="bert-base-cased", device_map=0,
                     truncation=True, max_length=512)

In [29]:
# Read the dataframe from the pickle file
DATASET = 'basil'

df = pd.read_csv(os.path.join(DATASET, "data_merged.csv"))

if DATASET != 'basil':
    df = df[df['train_stance'].isin(['against', 'for', 'neutral', 'denies', 'supports'])]
    df['train_stance'] = df['train_stance'].replace({'for': 'supports', 'denies': 'against'})
else:
    df['train_stance'] = df['stance']

In [30]:
# Modify the data based on the 'dataset' column
df['formatted_text'] = df.apply(lambda row: f"target: {row['event']} [SEP] statement: {row['full_text']}", axis=1)

In [31]:
# Run each example through the pipeline and track the results
results = classifier(df['formatted_text'].tolist())

In [32]:
results[0:10]

[{'label': 'against', 'score': 0.3994285762310028},
 {'label': 'against', 'score': 0.39923912286758423},
 {'label': 'against', 'score': 0.4041491448879242},
 {'label': 'against', 'score': 0.40471965074539185},
 {'label': 'against', 'score': 0.4045639634132385},
 {'label': 'against', 'score': 0.4003814458847046},
 {'label': 'against', 'score': 0.39940518140792847},
 {'label': 'against', 'score': 0.39904874563217163},
 {'label': 'against', 'score': 0.3990795910358429},
 {'label': 'against', 'score': 0.399034321308136}]

In [33]:
df['preds'] = [result['label'] for result in results]
df['pred_scores'] = [result['score'] for result in results]

In [34]:
np.unique(df['preds'], return_counts=True)

(array(['against', 'for'], dtype=object), array([609,   3]))

In [35]:
df[['full_text', 'event','train_stance','preds','pred_scores']].sample(20)

Unnamed: 0,full_text,event,train_stance,preds,pred_scores
529,House Panel Seeks Private Talk With Hillary Cl...,Hillary Clinton,neutral,against,0.398989
210,"Nebraska, Oklahoma File Federal Suit Against C...",Nebraska officials,neutral,against,0.398943
253,"Facing Congress, Clinton Defends Her Actions B...",Republican Lawmakers,neutral,against,0.400303
604,Defense Secretary James Mattis to leave as of ...,Donald Trump,agree,against,0.399271
132,Obama and Boehner Talk Deficits at the White H...,Barack Obama,neutral,against,0.398997
12,Supreme Court Hears Argument In Wal-Mart Sex B...,Walmart,neutral,against,0.402297
517,Susan Rice calls Netanyahu's Capitol Hill spee...,Susan Rice,neutral,against,0.399278
161,Senate Republicans Vow to Block Dem Legislatio...,Republicans,neutral,against,0.399114
332,"GOP Whip Steve Scalise, Others Shot At Early M...",Democratic Lawmakers,neutral,for,0.388322
27,Perry TV Ad: I Can Defend Faith from Obama’s “...,Rick Perry,neutral,against,0.399879


In [36]:
# Get the unweighted F1 score
f1 = f1_score(df['train_stance'], df['preds'], average='macro')

# Display the F1 score
print(f"Unweighted F1 Score: {f1}")

Unweighted F1 Score: 0.0


In [None]:
df.to_csv(os.path.join("bias results",DATASET+"_supervised.csv"))