In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, optimization, Pipeline
import os 
from datasets import load_dataset
from datasets import Dataset
import pandas as pd
import numpy as np 
from copy import deepcopy
from torch.optim import AdamW
import torch
from tqdm.notebook import tqdm
import gc

In [None]:
checkpoint = "t5-small"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)

In [None]:
dream_checkpoint = "RicoBorra/DREAM-t5-small"

dream_tokenizer = AutoTokenizer.from_pretrained(dream_checkpoint)
dream_model = AutoModelForSeq2SeqLM.from_pretrained(dream_checkpoint)
dream_data_collator = DataCollatorForSeq2Seq(tokenizer=dream_tokenizer, model=dream_checkpoint)

## FLUTE Data extraction and processing
Using the few instructions from the Git Readme to have the same initial data

In [None]:
# Only the train split is available on HuggingFace
dataset = load_dataset("ColumbiaNLP/FLUTE")

In [None]:
def compute_dream_elaborations(dataset, dream_model, path_to_save, batch_size=32):
    if os.path.exists(path_to_save):
        elaborations = pd.read_csv(path_to_save)
    else:
        elaborations = pd.DataFrame(columns=['premise_emotion', 'premise_motivation', 'premise_consequence', 'premise_rot',
                                            'hypothesis_emotion', 'hypothesis_motivation', 'hypothesis_consequence', 'hypothesis_rot'])
        for sentence_type in tqdm(['premise', 'hypothesis'], desc="Processing Sentence Type"):
            for dream_dimension in tqdm(['emotion', 'motivation', 'consequence', 'rot'], desc="Processing Dream Dimension"):
                # Split the dataset into batches to prevent the hard disk to be filled
                inputs_batches = [dataset[sentence_type][i:i+batch_size] for i in range(0, len(dataset), batch_size)]
                output_sentences = []

                # Process each batch
                for inputs_batch in tqdm(inputs_batches, desc="Processing Batches", leave=False):
                    inputs = ['[SITUATION] ' + sentence + ' [QUERY] ' + dream_dimension for sentence in inputs_batch]
                    tokens = torch.tensor(tokenizer(inputs, padding='longest').input_ids)
                    output_tokens = dream_model.generate(tokens, max_new_tokens=100)
                    output_sentences.extend(tokenizer.batch_decode(output_tokens, skip_special_tokens=True))

                elaborations[sentence_type + '_' + dream_dimension] = output_sentences

        # Make sure each sentence ends with a point
        elaborations = elaborations.applymap(lambda x: x + '.' if not x.endswith('.') else x)

        elaborations.to_csv(path_to_save)

    return elaborations

In [None]:
dreams = compute_dream_elaborations(dataset['train'], dream_model, "dream_elaborations.csv")

In [None]:
complete_dataset = pd.concat([pd.DataFrame(dataset['train']), dreams], axis=1)
complete_dataset = Dataset.from_pandas(complete_dataset)

In [None]:
complete_dataset

In [None]:
complete_dataset.to_csv("complete_dataset.csv")

In [None]:
def add_combined_cols(entry):
    
    premise = entry["premise"].strip()
    hypothesis = entry["hypothesis"].strip()
    
    if not premise.endswith("."):
        premise += "."
    assert(premise.endswith("."))
    if not hypothesis.endswith("."):
        hypothesis += "."
    assert(hypothesis.endswith("."))
    
    # Columns for System 1
    entry["premise_hypothesis"] = 'Premise: ' + premise + ' Hypothesis: ' + hypothesis + ' Is there a contradiction or entailment between the premise and hypothesis ?'
    entry["label_explanation"] = 'Label: ' + entry["label"] + '. Explanation: ' + entry["explanation"]

    # Columns for System 2
    entry["premise_hypothesis_system_2"] = 'Premise: ' + premise + ' Hypothesis: ' + hypothesis + ' What is the type of figurative language involved? Is there a contradiction or entailment between the premise and hypothesis ?'
    entry["type_label_explanation"] = 'Type: ' + entry["type"] + '. Label: ' + entry["label"] + '. Explanation: ' + entry["explanation"]
    
    # Columns for Systems 3
    for dream_dimension in ['emotion', 'motivation', 'consequence', 'rot'] :
        entry["premise_hypothesis_" + dream_dimension] = 'Premise: ' + premise + ' [' + dream_dimension.capitalize() + '] ' + entry['premise_' + dream_dimension].strip() + \
                    ' Hypothesis: ' + hypothesis + ' [' + dream_dimension.capitalize() + '] ' + entry['hypothesis_' + dream_dimension] + ' Is there a contradiction or entailment between the premise and hypothesis ?'
    entry["premise_hypothesis_all_dims"] = 'Premise: ' + premise + \
                ' [Emotion] ' + entry['premise_emotion'].strip() + \
                ' [Motivation] ' + entry['premise_motivation'].strip() + \
                ' [Consequence] ' + entry['premise_consequence'].strip() + \
                ' [Rot] ' + entry['premise_rot'].strip() + \
                ' Hypothesis: ' + hypothesis + \
                ' [Emotion] ' + entry['hypothesis_emotion'].strip() + \
                ' [Motivation] ' + entry['hypothesis_motivation'].strip() + \
                ' [Consequence] ' + entry['hypothesis_consequence'].strip() + \
                ' [Rot] ' + entry['hypothesis_rot'].strip()
    
    # Columns for System 4 (For the explanation part)
    '''As the specific input isn't indicated in the paper, the question tries to formalize at best what is expected'''
    entry["premise_hypothesis_label"] = 'Premise: ' + premise + ' Hypothesis: ' + hypothesis + ' Label : ' + entry['label'] + '. What is the explanation of the label associated to the premise and the hypothesis ?'
    
    return entry
# combine columns
combined_cols_dataset = complete_dataset.map(add_combined_cols)

# create train test split because given data has only train data
# splits are shuffled by default
dataset_train_test = combined_cols_dataset.train_test_split(test_size=0.2, seed=42)

In [None]:
import evaluate
rouge = evaluate.load("rouge")

import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predicted_token_ids = torch.argmax(torch.from_numpy(predictions[0]), dim=-1)
    decoded_preds = tokenizer.batch_decode(predicted_token_ids, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

## System 1 : Normal classifier

In [None]:
model_s1 = deepcopy(model)

In [None]:
def preprocess_dataset_s1(examples):
    model_inputs = tokenizer(examples['premise_hypothesis'])
    labels = tokenizer(examples['label_explanation'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s1 = dataset_train_test.map(preprocess_dataset_s1, batched=True).remove_columns(dataset_train_test['train'].column_names)
tokenized_ds_s1 = tokenized_ds_s1.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S1Model_more_accurate",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s1,
    args=training_args,
    train_dataset=tokenized_ds_s1["train"],
    eval_dataset=tokenized_ds_s1["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
# Delete variables after training to prevent memory overflow for the next trainings
del tokenized_ds_s1
del model_s1
gc.collect()

In [None]:
model_s1 = AutoModelForSeq2SeqLM.from_pretrained("D:\Documents\PoliTo\Deep NLP\Project\S1Model_more_accurate\checkpoint-24112")
i = "Premise: Today I crashed my car. Hypothesis: I felt like a champion when I crashed my car."
t = tokenizer(i, return_tensors='pt').input_ids
t = t.to(model_s1.device)
o = model_s1.generate(t, max_new_tokens = 100)
d = tokenizer.decode(o[0])
d 

## System 2 : Predict type of figurative language

In [None]:
model_s2 = deepcopy(model)

In [None]:
def preprocess_dataset_s2(examples):
    model_inputs = tokenizer(examples['premise_hypothesis_system_2'])
    labels = tokenizer(examples['type_label_explanation'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s2 = dataset_train_test.map(preprocess_dataset_s2, batched=True)
tokenized_ds_s2 = tokenized_ds_s2.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S2Model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s2,
    args=training_args,
    train_dataset=tokenized_ds_s2["train"],
    eval_dataset=tokenized_ds_s2["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
del tokenized_ds_s2
del model_s2
gc.collect()

## System 3 : Include DREAM elaborations

### 3.1 : Emotion

In [None]:
model_s31 = deepcopy(model)

In [None]:
def preprocess_dataset_s31(examples):
    model_inputs = tokenizer(examples['premise_hypothesis_emotion'])
    labels = tokenizer(examples['label_explanation'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s31 = dataset_train_test.map(preprocess_dataset_s31, batched=True)
tokenized_ds_s31 = tokenized_ds_s31.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S3-1Model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s31,
    args=training_args,
    train_dataset=tokenized_ds_s31["train"],
    eval_dataset=tokenized_ds_s31["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
del tokenized_ds_s31
del model_s31
gc.collect()

### 3.2 : Motivation

In [None]:
model_s32 = deepcopy(model)

In [None]:
def preprocess_dataset_s32(examples):
    model_inputs = tokenizer(examples['premise_hypothesis_motivation'])
    labels = tokenizer(examples['label_explanation'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s32 = dataset_train_test.map(preprocess_dataset_s32, batched=True)
tokenized_ds_s32 = tokenized_ds_s32.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S3-2Model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s32,
    args=training_args,
    train_dataset=tokenized_ds_s32["train"],
    eval_dataset=tokenized_ds_s32["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
del tokenized_ds_s32
del model_s32
gc.collect()

### 3.3 : Consequence

In [None]:
model_s33 = deepcopy(model)

In [None]:
def preprocess_dataset_s33(examples):
    model_inputs = tokenizer(examples['premise_hypothesis_consequence'])
    labels = tokenizer(examples['label_explanation'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s33 = dataset_train_test.map(preprocess_dataset_s33, batched=True)
tokenized_ds_s33 = tokenized_ds_s33.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S3-3Model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s33,
    args=training_args,
    train_dataset=tokenized_ds_s33["train"],
    eval_dataset=tokenized_ds_s33["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
del tokenized_ds_s33
del model_s33
gc.collect()

### 3.4 : Rule of Thumb (Social norm)

In [None]:
model_s34 = deepcopy(model)

In [None]:
def preprocess_dataset_s34(examples):
    model_inputs = tokenizer(examples['premise_hypothesis_rot'])
    labels = tokenizer(examples['label_explanation'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s34 = dataset_train_test.map(preprocess_dataset_s34, batched=True)
tokenized_ds_s34 = tokenized_ds_s34.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S3-4Model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s34,
    args=training_args,
    train_dataset=tokenized_ds_s34["train"],
    eval_dataset=tokenized_ds_s34["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
del tokenized_ds_s34
del model_s34
gc.collect()

### 3.5 : All Dimensions

In [None]:
model_s35 = deepcopy(model)

In [None]:
def preprocess_dataset_s35(examples):
    model_inputs = tokenizer(examples['premise_hypothesis_all_dims'])
    labels = tokenizer(examples['label_explanation'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s35 = dataset_train_test.map(preprocess_dataset_s35, batched=True)
tokenized_ds_s35 = tokenized_ds_s35.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S3-5Model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s35,
    args=training_args,
    train_dataset=tokenized_ds_s35["train"],
    eval_dataset=tokenized_ds_s35["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
del tokenized_ds_s35
del model_s35
gc.collect()

## System 4 : Two-step Pipeline

### 4.1 : Classify

In [None]:
model_s41 = deepcopy(model)

In [None]:
def preprocess_dataset_s41(examples):
    model_inputs = tokenizer(examples['premise_hypothesis'])
    labels = tokenizer(examples['label'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s41 = dataset_train_test.map(preprocess_dataset_s41, batched=True)
tokenized_ds_s41 = tokenized_ds_s41.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S4-1Model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s41,
    args=training_args,
    train_dataset=tokenized_ds_s41["train"],
    eval_dataset=tokenized_ds_s41["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
del tokenized_ds_s41
del model_s41
gc.collect()

### 4.2 : Explain

In [None]:
model_s42 = deepcopy(model)

In [None]:
def preprocess_dataset_s42(examples):
    model_inputs = tokenizer(examples['premise_hypothesis_label'])
    labels = tokenizer(examples['explanation'])
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [None]:
tokenized_ds_s42 = dataset_train_test.map(preprocess_dataset_s42, batched=True)
tokenized_ds_s42 = tokenized_ds_s42.remove_columns(dataset_train_test['train'].column_names)

In [None]:
'''The following parameters were taken from the DREAM-FLUTE paper (only the number of epochs has been increased because the model is smaller)'''
training_args = Seq2SeqTrainingArguments(
    output_dir="D:\Documents\PoliTo\Deep NLP\Project\S4-2Model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    seed=42,
    #weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=8,
    load_best_model_at_end=True,
    #eval_accumulation_steps=8,
    #fp16=True,
    #push_to_hub=True,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-08,
    lr_scheduler_type='linear'
)

trainer = Seq2SeqTrainer(
    model=model_s42,
    args=training_args,
    train_dataset=tokenized_ds_s42["train"],
    eval_dataset=tokenized_ds_s42["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics
)

trainer.train()
del tokenized_ds_s42
del model_s42
gc.collect()

### System 4 Pipeline

In [None]:
model_s41_path = "D:\Documents\PoliTo\Deep NLP\Project\S4-1Model\checkpoint-24112"
model_s41 = AutoModelForSeq2SeqLM.from_pretrained(model_s41_path)

model_s42_path = "D:\Documents\PoliTo\Deep NLP\Project\S4-2Model\checkpoint-24112"
model_s42 = AutoModelForSeq2SeqLM.from_pretrained(model_s42_path)

In [None]:
'''Class encapsulating the two steps of System 4 (Classify, then Explain)'''
class DREAM_FLUTE_System4 :
    def __init__(self, tokenizer = None, model_s41_path = None, model_s42_path = None) -> None:
        self.tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained("t5-small")
        self.model_s41 = AutoModelForSeq2SeqLM.from_pretrained(model_s41_path) if model_s41_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S4-Classify")
        self.model_s42 = AutoModelForSeq2SeqLM.from_pretrained(model_s42_path) if model_s42_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S4-Explain")

    '''Expected input for function : "Premise : ... . Hypothesis : ... . Is there a contradiction or entailment between the premise and hypothesis ?" 
    Or list of strings in this format'''
    def prediction_pipeline(self, inputs) :
        if isinstance(inputs, str) :
            tok_input = self.tokenizer(inputs, return_tensors='pt').input_ids
            output_model_1 = self.model_s41.generate(tok_input, max_new_tokens=100)
            decoded_output_model_1 = "Label : " + self.tokenizer.decode(output_model_1[0], skip_special_tokens=True)
            intermediate_input = inputs[:inputs.find("Is there a contradiction or entailment between the premise and hypothesis ?")] + decoded_output_model_1 + ". What is the explanation of the label associated to the premise and the hypothesis ?"
            tok_intermediate_input = self.tokenizer(intermediate_input, return_tensors='pt').input_ids
            output_model_2 = self.model_s42.generate(tok_intermediate_input, max_new_tokens=100)

            return decoded_output_model_1 + ". Explanation : " + self.tokenizer.decode(output_model_2[0], skip_special_tokens=True)
        
        elif isinstance(inputs, list) and all(isinstance(input, str) for input in inputs) :
            predictions = []
            for input in inputs :
                predictions.append(self.prediction_pipeline(input))
            
            return predictions
        
        else :
            raise TypeError('Inputs should be either a list of two strings or a list of lists of two strings')


## Ensembling algorithm

In [None]:
'''Ensemble class that loads all models from HuggingFace (or from the device if a path to the model is indicated) 
and implements the ensembling algorithm given in the DREAM-FLUTE paper'''
class DREAM_FLUTE_Ensemble :
    def __init__(self, tokenizer_path = None, s1_path = None, s2_path = None,
                 s3_emo_path = None, s3_mot_path = None, s3_cons_path = None,
                 s3_rot_path = None, s3_alldims_path = None, s4_clas_path = None, 
                 s4_exp_path = None, dream_path = None) -> None:
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) if tokenizer_path is not None else AutoTokenizer.from_pretrained("t5-small")
        self.model_s1 = AutoModelForSeq2SeqLM.from_pretrained(s1_path) if s1_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S1")
        self.model_s2 = AutoModelForSeq2SeqLM.from_pretrained(s2_path) if s2_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S2")
        self.model_s3_emo = AutoModelForSeq2SeqLM.from_pretrained(s3_emo_path) if s3_emo_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S3-Emotion")
        self.model_s3_mot = AutoModelForSeq2SeqLM.from_pretrained(s3_mot_path) if s3_mot_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S3-Motivation")
        self.model_s3_cons = AutoModelForSeq2SeqLM.from_pretrained(s3_cons_path) if s3_cons_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S3-Consequence")
        self.model_s3_rot = AutoModelForSeq2SeqLM.from_pretrained(s3_rot_path) if s3_rot_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S3-ROT")
        self.model_s3_alldims = AutoModelForSeq2SeqLM.from_pretrained(s3_alldims_path) if s3_alldims_path is not None else AutoModelForSeq2SeqLM.from_pretrained("YoanBOUTE/DREAM-FLUTE-S3-AllDims")
        self.model_s4 = DREAM_FLUTE_System4(self.tokenizer, s4_clas_path, s4_exp_path)
        self.model_dream = AutoModelForSeq2SeqLM.from_pretrained(dream_path) if dream_path is not None else AutoModelForSeq2SeqLM.from_pretrained("RicoBorra/DREAM-t5-small")
    
    '''Tokenizes the input, then feeds it to the given model, and decodes the output to have a string as result.
    This method is callable for all models except System 4 (Use the method defined in the class of System 4)'''
    def _prediction_pipeline(self, input : str, model) -> str :
        tokenized_input = self.tokenizer(input, return_tensors='pt').input_ids
        model_output = model.generate(tokenized_input, max_new_tokens=100)
        decoded_output = self.tokenizer.decode(model_output[0], skip_special_tokens=True)
        return decoded_output
    
    '''Preprocesses the input for each model, then feeds it to the pipeline.
    Returns a dictionary of all models' predictions.'''
    def _get_all_predictions(self, input : list) :
        prem, hyp = input 
        prem = prem.strip()
        hyp = hyp.strip()
        if not prem.endswith('.') :
            prem += '.'
        if not hyp.endswith('.') :
            hyp += '.' 

        predictions = dict()

        input_1 = f"Premise : {prem} Hypothesis : {hyp} Is there a contradiction or entailment between the premise and hypothesis ?"
        predictions['S1'] = self._prediction_pipeline(input_1, self.model_s1)

        input_2 = f"Premise : {prem} Hypothesis : {hyp} What is the type of figurative language involved? Is there a contradiction or entailment between the premise and hypothesis ?"
        predictions['S2'] = self._prediction_pipeline(input_2, self.model_s2)

        # DREAM elaborations for system 3
        input_dream_prem = f"[SITUATION] {prem} [QUERY] "
        input_dream_hyp = f"[SITUATION] {hyp} [QUERY] "
        prem_elaborations = {key : self._prediction_pipeline(input_dream_prem + key, self.model_dream) for key in ['emotion', 'motivation', 'consequence', 'rot']}
        for key, elab in prem_elaborations.items() :
            elab = elab.strip()
            if not elab.endswith('.') :
                prem_elaborations[key] += '.' 
        hyp_elaborations = {key : self._prediction_pipeline(input_dream_hyp + key, self.model_dream) for key in ['emotion', 'motivation', 'consequence', 'rot']}
        for key, elab in hyp_elaborations.items() :
            elab = elab.strip()
            if not elab.endswith('.') :
                hyp_elaborations[key] += '.' 

        input_3_emo = f"Premise : {prem} [Emotion] {prem_elaborations['emotion']} Hypothesis : {hyp} [Emotion] {hyp_elaborations['emotion']} Is there a contradiction or entailment between the premise and hypothesis ?"
        predictions['S3-emo'] = self._prediction_pipeline(input_3_emo, self.model_s3_emo)

        input_3_mot = f"Premise : {prem} [Motivation] {prem_elaborations['motivation']} Hypothesis : {hyp} [Motivation] {hyp_elaborations['motivation']} Is there a contradiction or entailment between the premise and hypothesis ?"
        predictions['S3-mot'] = self._prediction_pipeline(input_3_mot, self.model_s3_mot)

        input_3_cons = f"Premise : {prem} [Consequence] {prem_elaborations['consequence']} Hypothesis : {hyp} [Consequence] {hyp_elaborations['consequence']} Is there a contradiction or entailment between the premise and hypothesis ?"
        predictions['S3-cons'] = self._prediction_pipeline(input_3_cons, self.model_s3_cons)

        input_3_rot = f"Premise : {prem} [Rot] {prem_elaborations['rot']} Hypothesis : {hyp} [Rot] {hyp_elaborations['rot']} Is there a contradiction or entailment between the premise and hypothesis ?"
        predictions['S3-rot'] = self._prediction_pipeline(input_3_rot, self.model_s3_rot)

        input_3_all = f"Premise : {prem} "
        for key, elab in prem_elaborations.items() :
            input_3_all += f"[{key.capitalize()}] {elab} "
        input_3_all += f"Hypothesis : {hyp} "
        for key, elab in hyp_elaborations.items() :
            input_3_all += f"[{key.capitalize()}] {elab} "
        input_3_all += "Is there a contradiction or entailment between the premise and hypothesis ?"
        predictions['S3-all'] = self._prediction_pipeline(input_3_all, self.model_s3_alldims)

        # The input for system 4 is in the same format as for system 1
        predictions['S4'] = self.model_s4.prediction_pipeline(input_1)

        return predictions
    
    '''Uses the predictions from each model to compute the final prediction of the ensemble'''
    def _ensemble_algorithm(self, model_outputs) :
        # Firstly, the label is selected based on the majority between the 5 best models (according to the paper : systems 1, 2, 3-motivation, 3-alldims, 4)
        labels = [model_outputs[key].split('.')[0] for key in ['S1', 'S2', 'S3-mot', 'S3-all', 'S4']]
        # Sometimes, it might happen with the small models that the generated label is a mix of words, like 'Contratailment' or 'Endiction'
        for label in labels :
            if label not in ['Label : Contradiction', 'Label : Entailment'] :
                labels.remove(label)
        unique, counts = np.unique(labels, return_counts=True)
        ix = np.argmax(counts)
        major_label = unique[ix]

        # Then, pick the explanation of the first system agreeing with the majority label, following an order indicated in the paper
        for key in ['S3-cons', 'S3-emo', 'S2', 'S3-all', 'S3-mot', 'S4', 'S1'] :
            substrings = model_outputs[key].split('.')
            label = substrings[0]
            explanation = substrings[1]

            if label == major_label :
                break

        return major_label + '.' + explanation + '.'

    '''Expected input : [Premise_sentence, hypothesis_sentence] or list of inputs'''
    def predict(self, inputs) :
        if isinstance(inputs, list) and all(isinstance(input, str) for input in inputs) and len(inputs) == 2 :
            preds = self._get_all_predictions(inputs)
            final_pred = self._ensemble_algorithm(preds)
        
            return final_pred 
        
        elif isinstance(inputs, list) and all(isinstance(input, list) for input in inputs) :
            predictions = []
            for input in inputs :
                predictions.append(self.predict(input))
            
            return predictions
        else :
            raise TypeError('Inputs should be either a list of two strings or a list of lists of two strings')