In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import json
import pandas as pd
import numpy as np 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained("RicoBorra/DREAM-t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("RicoBorra/DREAM-t5-small")

In [65]:
i = "A man is running after a thief. love"
t = tokenizer(i, return_tensors='pt').input_ids
o = model.generate(t, max_new_tokens = 100)
d = tokenizer.decode(o[0])
d

'<pad> A man is running after a thief.</s>'

## Tests on FLUTE

The idea here was to see if it is possible to type a figurative (here sarcastic) sentence by comparing their BERTscore similarity with serious sentences and their sarcastic equivalent, drawn from the FLUTE dataset. We might expect the similiarity tends to be higher between sarcastic sentences and inversely.
--> After testing this idea, it clearly doesn't work (On 100 random samples, the classifier detected sarcasm with only 36% accuracy)

In [3]:
dataset = []
path = "D:\Documents\PoliTo\Deep NLP\Project\FLUTE-Dataset\model-in-the-loop-fig-lang-main\\"
for type, file in {'Sarcasm' : 'SarcasmNLI\sarcasm_train.jsonl',
                   'Simile' : 'simileNLI\simile_train.jsonl', 
                   'Idiom' : 'idiomNLI\idiom_train.jsonl',
                   'Metaphor' : 'metaphorNLI\metaphor_train.jsonl'}.items() :

    with open(path + file, 'r', encoding='utf8') as f :
        typed_dataset = []
        for line in f :
            typed_dataset.append(json.loads(line))
    for row in typed_dataset :
        dataset.append([row['premise'], row['hypothesis'], row['label'], row['explanation'], type])

dataset = pd.DataFrame(dataset, columns=['Sentence1', 'Sentence2', 'Label', 'Explanation', 'Type'])

In [4]:
dataset.sample(frac=1)

Unnamed: 0,Sentence1,Sentence2,Label,Explanation,Type
3388,I get why they would stay away from her if she...,i'd understand if they stayed away from her be...,Entailment,A coyote is a wild animal that is known to be ...,Simile
1939,I was upset to find emails between my ex wife ...,I was absolutely destroyed to find emails betw...,Entailment,Being cheated on is one of the most painful th...,Sarcasm
1142,I crashed my favorite classic car which I've h...,I totally wrecked my favorite classic car that...,Entailment,To crash a car is to cause significant damage ...,Sarcasm
985,I'm waiting on some test results from the Dr. ...,I'm anxiously waiting for my test results from...,Entailment,It is common to feel anxious while waiting for...,Sarcasm
3137,I had to have some tests run due to an unclear...,Having to get some tests done because of an un...,Contradiction,Having to get some tests done because of an un...,Sarcasm
...,...,...,...,...,...
1334,I ate a box of donuts yesterday when I promise...,I feel terrible about eating a box of donuts w...,Entailment,Cheating on a diet often leads to feelings of ...,Sarcasm
795,I heard a knock on my door at 2 am in the morn...,I heard a knock on my door at 2 am in the morn...,Contradiction,Unknown people knocking on someone's door at 2...,Sarcasm
830,Once I went into a store only to realize that ...,The time I went into a store wearing two diffe...,Entailment,It is considered very sloppy and unprofessiona...,Sarcasm
5387,"I know its not really an excuse, but that was ...","I know its not really an excuse, but that was ...",Contradiction,To lead to believe means to cause one to belie...,Idiom


In [6]:
from evaluate import load
bertscore = load("bertscore")


In [17]:
sarcasm_dataset = dataset[np.logical_and(dataset['Type'] == 'Sarcasm', dataset['Label'] == 'Contradiction')]

In [36]:
pred = ['I truly love being crushed by a bunch of elephants' for _ in range(50)]
ref = np.array(sarcasm_dataset['Sentence1'][50:100])
np.mean(bertscore.compute(predictions = pred, references = ref, lang="en")['precision'])

0.8625892686843872

In [48]:
from tqdm import tqdm 

def test_sarcasm_similarity(sarcasm_train_dataset, sarcasm_test_dataset) :
    true_sarcasm_dataset = sarcasm_test_dataset[sarcasm_test_dataset['Label'] == 'Contradiction']
    good_predictions = 0
    bad_predictions = 0
    # First, test every serious sentence
    for serious_sentence in tqdm(true_sarcasm_dataset.sample(frac=1)['Sentence1'][:50]) :
        # Shuffle dataset at each iteration
        sarcasm_train_dataset.sample(frac=1)
        pred = np.array([serious_sentence for _ in range(50)])
        ref1 = np.array(sarcasm_train_dataset['Sentence1'][:50])
        mean1 = np.mean(bertscore.compute(predictions = pred, references = ref1, lang="en")['precision'])
        ref2 = np.array(sarcasm_train_dataset['Sentence2'][:50])
        mean2 = np.mean(bertscore.compute(predictions=pred, references=ref2, lang='en')['precision'])
        # If mean1 > mean2, the model would consider the sentence to be serious as it is more similar to serious sentences than sarcastic ones
        if mean1 > mean2 :
            good_predictions += 1
        else :
            bad_predictions += 1
    # Then, test every sarcastic sentence
    for sarcastic_sentence in tqdm(true_sarcasm_dataset.sample(frac=1)['Sentence2'][:50]) :
        # Shuffle dataset at each iteration
        sarcasm_train_dataset.sample(frac=1)
        pred = np.array([serious_sentence for _ in range(50)])
        ref1 = np.array(sarcasm_train_dataset['Sentence1'][:50])
        mean1 = np.mean(bertscore.compute(predictions = pred, references = ref1, lang="en")['precision'])
        ref2 = np.array(sarcasm_train_dataset['Sentence2'][:50])
        mean2 = np.mean(bertscore.compute(predictions=pred, references=ref2, lang='en')['precision'])
        if mean2 > mean1 :
            good_predictions += 1
        else :
            bad_predictions += 1
    
    final_precision = good_predictions / (good_predictions + bad_predictions)
    return good_predictions, bad_predictions, final_precision

with open(path + 'testgolddata\sarcasm_test.jsonl') as f :
    test_dataset = []
    for line in f :
        row = json.loads(line)
        test_dataset.append([row['premise'], row['hypothesis'], row['label']])
test_dataset = pd.DataFrame(test_dataset, columns=['Sentence1', 'Sentence2', 'Label'])

good, bad, precision = test_sarcasm_similarity(sarcasm_dataset, test_dataset)
print(good, bad, precision)

100%|██████████| 50/50 [05:51<00:00,  7.03s/it]
100%|██████████| 50/50 [05:52<00:00,  7.05s/it]

36 64 0.36



