In [1]:
import pandas as pd
from datasets import load_dataset
from transformers import pipeline
from tqdm import tqdm

In [2]:
test = pd.read_csv('../data/polnli_test_results.csv')
alts = pd.read_csv('../data/hypothesis_variants.csv')

In [3]:
# merge with alternative hypotheses
test = pd.merge(test, alts, how = 'left', left_on = 'hypothesis', right_on = 'original')

In [4]:
# drop observations that don't have AI generated alternatives
test = test[~test['original'].isna()].reset_index(drop = True)

In [5]:

# drop columns I don't need
test.drop(['augmented_hypothesis', 'base_nli', 'large_nli', 'base_polnli', 'llama', 'large_polnli', 'sonnet', 'original'], axis = 1, inplace = True)

# DEBATE Base

In [6]:
model = "mlburnham/Political_DEBATE_base_v1.0"
pipe = pipeline("zero-shot-classification", model = model, device = 'mps', batch_size = 64)

In [7]:
colnames = ['hypothesis', 'alt1', 'alt2', 'alt3', 'alt4']
for col in tqdm(colnames, desc = 'Classifying columns'):
    colname = col + '_label'
    test[colname] = 0
    for i in test.index:
        hypothesis = test.loc[i, col]
        sample = test.loc[i, 'premise']
        res = pipe(sample, hypothesis, hypothesis_template = '{}', multi_label = False)
        test.loc[i, colname] = round(res['scores'][0])
    test[colname].replace({0:1, 1:0}, inplace = True)

    test[colname] = test[colname].astype(int)

Classifying columns:   0%|          | 0/5 [00:00<?, ?it/s]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


Classifying columns:  20%|██        | 1/5 [07:33<30:15, 453.88s/it]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


Classifying columns:  40%|████      | 2/5 [11:14<15:50, 316.75s/it]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


Classifying columns:  60%|██████    | 3/5 [14:52<09:03, 271.79s/it]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


Classifying columns:  80%|████████  | 4/5 [17:55<03:56, 236.52s/it]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


Classifying columns: 100%|██████████| 5/5 [20:51<00:00, 214.70s/it]

Classifying columns: 100%|██████████| 5/5 [20:51<00:00, 250.28s/it]




In [8]:
alt_columns = ['alt1_label', 'alt2_label', 'alt3_label', 'alt4_label']

# Initialize a counter for differences
total_differences = 0

# Iterate through each alternative label column
for col in alt_columns:
    # Compare the values with 'hypothesis_label' and sum the differences
    total_differences += (test[col] != test['hypothesis_label']).sum()

# Print the result
print(f"Total number of differences: {total_differences}")

Total number of differences: 894
Total number of differences: 894


In [11]:
total_differences/(8980*4)

0.024888641425389754

In [12]:
test.to_csv('hypothesis_stability.csv', index = False)

# DEBATE Large

In [23]:
model = "mlburnham/Political_DEBATE_large_v1.0"
pipe = pipeline("zero-shot-classification", model = model, device = 'mps', batch_size = 64)

In [24]:
%%time
colname = 'original_label'
test[colname] = 0

for i in test.index:
    hypothesis = test.loc[i, 'hypothesis']
    sample = test.loc[i, 'premise']
    res = pipe(sample, hypothesis, hypothesis_template = '{}', multi_label = False)
    test.loc[i, colname] = round(res['scores'][0])
test[colname].replace({0:1, 1:0}, inplace = True)

test[colname] = test[colname].astype(int)

CPU times: user 9min 19s, sys: 54.9 s, total: 10min 14s
Wall time: 24min 4s
CPU times: user 9min 19s, sys: 54.9 s, total: 10min 14s
Wall time: 24min 4s


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




In [25]:
colnames = ['alt1', 'alt2', 'alt3', 'alt4']
for col in colnames:
    colname = col + '_label'
    test[colname] = 0
    for i in test.index:
        hypothesis = test.loc[i, col]
        sample = test.loc[i, 'premise']
        res = pipe(sample, hypothesis, hypothesis_template = '{}', multi_label = False)
        test.loc[i, colname] = round(res['scores'][0])
    test[colname].replace({0:1, 1:0}, inplace = True)

    test[colname] = test[colname].astype(int)

Unnamed: 0,premise,hypothesis,entailment,dataset,task,alt1,alt2,alt3,alt4,original_label
0,Transport workers strike to protest rising fue...,The event described in this text is a strike.,0,mlburnham/scad_event_entailment,event extraction,The occurrence detailed in this passage is a s...,The situation outlined in this document is a s...,The incident mentioned in this text is a strike.,The event described in this text is a strike.,0
1,Municipal workers strike over pay.,The event described in this text is a strike.,0,mlburnham/scad_event_entailment,event extraction,The occurrence detailed in this passage is a s...,The situation outlined in this document is a s...,The incident mentioned in this text is a strike.,The event described in this text is a strike.,0
2,Niger's mining sector strikes.,The event described in this text is a strike.,0,mlburnham/scad_event_entailment,event extraction,The occurrence detailed in this passage is a s...,The situation outlined in this document is a s...,The incident mentioned in this text is a strike.,The event described in this text is a strike.,0
3,Separatist movement protests detention of lead...,The event described in this text is a strike.,0,mlburnham/scad_event_entailment,event extraction,The occurrence detailed in this passage is a s...,The situation outlined in this document is a s...,The incident mentioned in this text is a strike.,The event described in this text is a strike.,0
4,Janitors and hospital support staff staged a s...,The event described in this text is a strike.,0,mlburnham/scad_event_entailment,event extraction,The occurrence detailed in this passage is a s...,The situation outlined in this document is a s...,The incident mentioned in this text is a strike.,The event described in this text is a strike.,0
...,...,...,...,...,...,...,...,...,...,...
8975,Encouraging Public Offerings Act of 2019\nThis...,This text is about finance and financial sector.,0,mlburnham/bill_summary_entailment,topic classification,This document discusses finance and the financ...,The content here pertains to finance and the f...,This writing covers topics related to finance ...,This text is about finance and financial sector.,0
8976,Cesar Chavez Commemorative Coin Act\nThis bill...,This text is about finance and financial sector.,0,mlburnham/bill_summary_entailment,topic classification,This document discusses finance and the financ...,The content here pertains to finance and the f...,This writing covers topics related to finance ...,This text is about finance and financial sector.,1
8977,Outsourcing Accountability Act of 2017\nThis b...,This text is about finance and financial sector.,0,mlburnham/bill_summary_entailment,topic classification,This document discusses finance and the financ...,The content here pertains to finance and the f...,This writing covers topics related to finance ...,This text is about finance and financial sector.,0
8978,Prioritizing Our Workers Act\nThis bill includ...,This text is about finance and financial sector.,0,mlburnham/bill_summary_entailment,topic classification,This document discusses finance and the financ...,The content here pertains to finance and the f...,This writing covers topics related to finance ...,This text is about finance and financial sector.,0
