In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv("/content/drive/MyDrive/contradiction/Sdata_annotated.csv")

In [None]:
tokenizer = AutoTokenizer.from_pretrained("ynie/roberta-large_conv_contradiction_detector_v0")
model = AutoModelForSequenceClassification.from_pretrained("ynie/roberta-large_conv_contradiction_detector_v0")

Some weights of the model checkpoint at ynie/roberta-large_conv_contradiction_detector_v0 were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
def process_row(row):
    row_number = row.name
    prediction = model(**tokenizer(row['hypothesis'], row['premise'], return_tensors="pt")).logits.argmax().item()
    print(f"Row {row_number} done after prediction with result: {prediction}")
    return prediction

In [None]:
df.shape

(47975, 11)

In [None]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,paper_id,pair_id,hypothesis,premise,aspect,s1,s2,line_pair,label
0,0,0,ICLR_2019_1401,3,further the paper makes several misleading cla...,the paper is rather well written but it strong...,clarity,positive,negative,"(6, 2)",n
1,1,1,NIPS_2016_89,3,4 .i like the key idea and the speedup is very...,review scores reflect this reviewers impressio...,originality,negative,positive,"(5, 20)",n
2,2,2,NIPS_2016_89,4,the idea to use sampling is nice but the analy...,review scores reflect this reviewers impressio...,originality,negative,positive,"(5, 18)",n
3,3,3,NIPS_2016_89,5,to summarize i think this paper give some empi...,in my opinion the overall quality of the paper...,soundness,positive,negative,"(4, 10)",n
4,4,4,NIPS_2016_89,5,to summarize i think this paper give some empi...,the context and relevance as well as the contr...,soundness,positive,negative,"(5, 10)",n


In [None]:
df = df.sample(frac=1).head(100)
df['contradiction'] = df.apply(process_row, axis=1)

Row 33786 done after prediction with result: 0
Row 27168 done after prediction with result: 0
Row 9190 done after prediction with result: 0
Row 34217 done after prediction with result: 0
Row 46968 done after prediction with result: 0
Row 16854 done after prediction with result: 0
Row 18768 done after prediction with result: 1
Row 32961 done after prediction with result: 0
Row 24457 done after prediction with result: 0
Row 42842 done after prediction with result: 0
Row 16181 done after prediction with result: 0
Row 2522 done after prediction with result: 0
Row 21024 done after prediction with result: 0
Row 14512 done after prediction with result: 0
Row 31346 done after prediction with result: 0
Row 47859 done after prediction with result: 0
Row 45590 done after prediction with result: 0
Row 3717 done after prediction with result: 0
Row 40230 done after prediction with result: 0
Row 2146 done after prediction with result: 0
Row 37143 done after prediction with result: 0
Row 10720 done af

In [None]:
df['label'] = df['label'].apply(lambda x: 0 if x == "c" else 1)


In [None]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,paper_id,pair_id,hypothesis,premise,aspect,s1,s2,line_pair,label,contradiction
33786,33786,33786,ICLR_2020_165,1,2 for the locomotion task examples i am confu...,overall the paper is well written real-world e...,soundness,positive,negative,"(38, 7)",1,0
27168,27168,27168,NIPS_2019_772,2,however i still think that the paper requires ...,clarity the paper is clear for the most part e...,clarity,positive,negative,"(5, 55)",1,0
9190,9190,9190,NIPS_2017_115,2,the experiments have been limited to stereo ma...,the paper is really well written and curated ...,substance,positive,negative,"(4, 6)",1,0
34217,34217,34217,ICLR_2019_1051,2,numerical results for the link prediction tas...,can you explain why this is the case,soundness,negative,positive,"(16, 2)",1,0
46968,46968,46968,ICLR_2020_332,3,3 as for the model itself i donot find very s...,significance - this paper proposes an interes...,originality,positive,negative,"(20, 7)",1,0


In [None]:
# prompt: confusion matrix between label and contradiction

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(df['label'], df['contradiction'])

ax = plt.subplot()
sns.heatmap(cm, annot=True, ax=ax)  # Annotate cells with values

ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['No Contradiction', 'Contradiction'])
ax.yaxis.set_ticklabels(['No Contradiction', 'Contradiction'])

plt.show()
