In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from imblearn.over_sampling import RandomOverSampler, SMOTE
from simpletransformers.classification import ClassificationModel
import sklearn
import os
from scipy.stats import spearmanr
from scipy.stats import pearsonr

In [2]:
# read the dev set
df = pd.read_json("snli_1.0/snli_1.0_dev.jsonl",lines=True)
agreement = [labels.count(gold_label) for labels, gold_label in zip(list(df['annotator_labels'].values), list(df['gold_label'].values)) if gold_label != '' and labels]

# calculate the agreement
agr = pd.DataFrame(agreement, columns=['agreement'])
# apparently 0.0, 0.6, 0.8, 1.0 are the only possible options, so mapping them to classes
#labels = {'key_0.0': 0, 'key_0.6': 1, 'key_0.8': 2, 'key_1.0': 3}

In [3]:
# create the dataframe
df = pd.DataFrame({
    'text_a': df['sentence1'],
    'text_b': df['sentence2'],
    'labels': agr['agreement']/5
})

# for index, row in df.iterrows():
#     df['labels'][index] = int(labels['key_' + str(row['labels'])])


In [4]:
models = ['roberta-large', 'roberta-base']
epochs = [1,2,3,4]
lrs = [1e-3,1e-4,1e-5,1e-6,5e-3,5e-4,5e-5,5e-6]
bss = [8,16,32]
kfold = KFold(10, True, 1)


for model_name in models:
    for epoch in epochs:
        for lr in lrs:
            for bs in bss:
                file = open("nn_regression_snli.txt","a")
                result = f'Model: {model_name} Epoch: {epoch} LR: {lr} Batch: {bs} \n'
                file.write(result)
                for train, test in kfold.split(df):
                    
                    train_df = df.iloc[train] 
                    test_df = df.iloc[test]
                    
                    train_args={
                        "output_dir": "outputs_regression/",
                        "best_model_dir": "outputs_regression/best_model/",
                        'reprocess_input_data': True,
                        'overwrite_output_dir': True,
                        'num_train_epochs': epoch,
                        'train_batch_size': bs,
                        'eval_batch_size': bs,
                        'learning_rate': lr,
                        'regression': True

                    }

                    # Create a ClassificationModel
                    model = ClassificationModel('roberta', model_name, num_labels=1, use_cuda=True, cuda_device=2, args=train_args)

                    # Train the model
                    model.train_model(train_df, eval_df=test_df)
                    result, model_outputs, wrong_predictions = model.eval_model(test_df)
                   # print(pearsonr(model_outputs, test_df['labels'].values))
                   # print(spearmanr(model_outputs, test_df['labels'].values))
                    file.write("  " + str(pearsonr(model_outputs, test_df['labels'].values)))
                    file.write('\n')
                    file.write("  " + str(spearmanr(model_outputs, test_df['labels'].values)))
                    file.write('\n')
                    
                    os.system('rm -r outputs_regression/')
                    
                
                file.write('\n')
                file.close()

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.o

HBox(children=(FloatProgress(value=0.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=13.0, style=ProgressStyle(desc…











HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=125.0, style=ProgressStyle(descr…


(-0.0245600605610014, 0.4378653425951808)
SpearmanrResult(correlation=-0.03632682395304889, pvalue=0.2510936362646193)


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.o

HBox(children=(FloatProgress(value=0.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=13.0, style=ProgressStyle(desc…





HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=125.0, style=ProgressStyle(descr…


(-0.047103502201703326, 0.1366172948111847)
SpearmanrResult(correlation=-0.03282610353392554, pvalue=0.2997201507706112)


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.o

HBox(children=(FloatProgress(value=0.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=13.0, style=ProgressStyle(desc…





RuntimeError: [enforce fail at inline_container.cc:262] . unexpected pos 340245504 vs 340245392