In [1]:
import pickle
import os
import re
import unicodedata
import itertools

import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, f1_score, classification_report
import numpy as np

import keras
from keras.models import Model
import keras.backend as K
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

# from transformers import *
from transformers import BertTokenizer, TFBertModel, BertConfig, TFBertForSequenceClassification

# import emoji
# import matplotlib.pyplot as plt
# import torch
from tqdm.notebook import tqdm
import nltk
from nltk.corpus import stopwords

import spacy
nlp = spacy.load('en_core_web_sm')      

tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [2]:
def preprocess_text(text):
    if text == '':
        return ''
    else:
        text = text.lower()
        text_cleaned = re.sub(r'@[A-Za-z0-9_]+', '', text)
        text_cleaned = re.sub(r'#[A-Za-z0-9_]+', '', text_cleaned)
        text_cleaned = re.sub(r'https?:\/\/\S*', '', text_cleaned)
        text_cleaned = text_cleaned.replace(',', '')
        
        tokenized = nlp(text_cleaned)
        output_list = []
        for token in tokenized:
            if not token.is_stop:
                output_list.append(token.lemma_)
        
        output = ' '.join([x for x in output_list if x != ''])
        return output

In [3]:
def create_bert_tokenizer_model(num_classes):
    bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    bert_model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_classes)
    return bert_tokenizer, bert_model

In [4]:
def create_sentence_embeddings(sentences, bert_tokenizer):
    input_ids=[]
    attention_masks=[]

    for sent in sentences:
        bert_inp = bert_tokenizer.encode_plus(sent, add_special_tokens=True, max_length=64, pad_to_max_length=True,
                                            return_attention_mask = True)
        input_ids.append(bert_inp['input_ids'])
        attention_masks.append(bert_inp['attention_mask'])
        
    input_ids=np.asarray(input_ids)
    attention_masks=np.array(attention_masks)
    return input_ids, attention_masks

In [24]:
callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='../models/output',save_weights_only=True,
                                                monitor='val_loss', mode='min',save_best_only=True),
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1)
            ]
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-7, epsilon=5e-9)

In [25]:
def compile_fit_bert_model(bert_model, input_ids, attention_masks, labels, input_ids_val, attention_masks_val, labels_val):
    bert_model.compile(loss=loss, optimizer=optimizer, metrics=[metric])
    # First fit for a minimum number of epochs
    # bert_model.fit([input_ids, attention_masks], labels, batch_size=32,
    #                    epochs=10, validation_data=([input_ids_val, attention_masks_val], labels_val))
    # Then do early stopping
    bert_model.fit([input_ids, attention_masks], labels, batch_size=32,
                       epochs=100, callbacks=callbacks, validation_data=([input_ids_val, attention_masks_val], labels_val))
    return bert_model

# Kennedy+2020 data

In [7]:
import os

datasets = [
    'kennedy2020'
]
splits = {dataset: {'hegsplits': {}, 'controlsplits': {}} for dataset in datasets} 
hate_ratio = 0.3

def load_heg_control(splits):                                                                                  
    """ Load heg and control dataset splits """                                                                                                                                                                                                  
    for dataset_name, s in splits.items():                                                                                                                                                                                                 
        # Load csv                                                                                                                                                                                                                                   
        dataset_path = f'../data/{dataset_name}/processed'                                                                                                                                                               
        for splits_name in s:                                                                                                                                                                                                                       
            for split_name in ['with_special', 'no_special']:                                                                                                                                                                                                
                csvpath = os.path.join(dataset_path, f'{dataset_name}_{hate_ratio}hate_{splits_name}_{split_name}.csv')                                                                                                                                 
                splits[dataset_name][splits_name][split_name] = pd.read_csv(csvpath, index_col=0) 
    return splits

splits = load_heg_control(splits)
print(splits.keys())
splits['kennedy2020'].keys()

dict_keys(['kennedy2020'])


dict_keys(['hegsplits', 'controlsplits'])

## hegsplits

In [8]:
# Split into train, dev, test (50/10/40)
# May have duplicates, so should split on indices
import numpy as np

df = splits[datasets[0]]['hegsplits']['with_special'].sample(frac=1, random_state=9)
inds = {}
inds['train'], inds['dev'], inds['test'] = np.split(list(df.index.unique()), [int(0.5*len(df)), int(0.6*len(df))])

folds = {}
for fold in inds:
    folds[fold] = df[df.index.isin(inds[fold])]
train, dev, test = folds['train'], folds['dev'], folds['test']
print(len(train))
print(len(dev))
print(len(test))

8251
1650
6602


In [9]:
from tqdm.notebook import tqdm
tqdm.pandas()
# Process data
# df_contextual_train = pd.read_csv('contextual_abuse_dataset/cad_v1_1_train.tsv', sep='\t')
# df_contextual_train = pd.read_csv('/storage2/mamille3/data/hate_speech/contextual_abuse_dataset/cad_v1_1_train.tsv', sep='\t')

# df_contextual_train = df_contextual_train.dropna(subset=['text'])
# df_contextual_train['label_bin'] = df_contextual_train['labels'].apply(cad_off_or_not) # did assign it to df_contextual_test (bug?)
# train = kennedy_data['no_heg']['train']
# train['preprocess_text'] = train['text'].apply(preprocess_text) # takes a long time
train['preprocess_text'] = train['text'].progress_apply(preprocess_text) # takes a long time

# df_contextual_test = pd.read_csv('contextual_abuse_dataset/cad_v1_1_test.tsv', sep='\t')
# df_contextual_test = pd.read_csv('/storage2/mamille3/data/hate_speech/contextual_abuse_dataset/cad_v1_1_test.tsv', sep='\t')
# def cad_off_or_not(label):
#     if label == 'Neutral':
#         return 0
#     else:
#         return 1

dev['preprocess_text'] = dev['text'].progress_apply(preprocess_text) # takes a long time

# df_contextual_test = df_contextual_test.dropna(subset=['text'])
# df_contextual_test['label_bin'] = df_contextual_test['labels'].apply(cad_off_or_not)
# test = kennedy_data['with_heg']['test']
# test = kennedy_data['no_heg']['test']
test['preprocess_text'] = test['text'].progress_apply(preprocess_text)

  0%|          | 0/8251 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['preprocess_text'] = train['text'].progress_apply(preprocess_text) # takes a long time


  0%|          | 0/1650 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dev['preprocess_text'] = dev['text'].progress_apply(preprocess_text) # takes a long time


  0%|          | 0/6602 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['preprocess_text'] = test['text'].progress_apply(preprocess_text)


In [26]:
num_classes=len(train['hate'].unique())
kennedy_bert_tokenizer, kennedy_bert_model = create_bert_tokenizer_model(num_classes)

# sentences_cad_train = df_contextual_train['preprocess_text']
# labels_cad_train = df_contextual_train['label_bin']

input_ids_train, attention_masks_train = create_sentence_embeddings(train['preprocess_text'], kennedy_bert_tokenizer)
input_ids_dev, attention_masks_dev = create_sentence_embeddings(dev['preprocess_text'], kennedy_bert_tokenizer)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [27]:
# kennedy_model_with_heg = compile_fit_bert_model(kennedy_bert_model, input_ids_train, attention_masks_train, train['hate'], epochs=5)
kennedy_model_with_heg = compile_fit_bert_model(kennedy_bert_model, input_ids_train, attention_masks_train, train['hate'], input_ids_dev, attention_masks_dev, dev['hate'])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 00014: early stopping


In [28]:
input_ids_test, attention_masks_test = create_sentence_embeddings(test['preprocess_text'], kennedy_bert_tokenizer)
new_preds = kennedy_model_with_heg.predict([input_ids_test, attention_masks_test],batch_size=32)
new_pred_labels = new_preds['logits'].argmax(axis=1)
test['label_pred'] = new_pred_labels
test_classification = classification_report(test['hate'].tolist(), test['label_pred'].tolist(), output_dict=True)

scores = pd.DataFrame(test_classification)
scores

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['label_pred'] = new_pred_labels


Unnamed: 0,False,True,accuracy,macro avg,weighted avg
precision,0.815278,0.677337,0.782642,0.746307,0.77372
recall,0.890744,0.531926,0.782642,0.711335,0.782642
f1-score,0.851342,0.595888,0.782642,0.723615,0.774381
support,4613.0,1989.0,0.782642,6602.0,6602.0


In [30]:
# Save out classification report
model_settings = 'bert_5epochs_no_heg'
outpath = f'{model_settings}_scores.csv'
scores.to_csv(outpath)

# Contextual Abuse Dataset

In [7]:
df_contextual_test = pd.read_csv('contextual_abuse_dataset/cad_v1_1_test.tsv', sep='\t')
# df_contextual_test = pd.read_csv('/storage2/mamille3/data/hate_speech/contextual_abuse_dataset/cad_v1_1_test.tsv', sep='\t')
def cad_off_or_not(label):
    if label == 'Neutral':
        return 0
    else:
        return 1

df_contextual_test = df_contextual_test.dropna(subset=['text'])
df_contextual_test['label_bin'] = df_contextual_test['labels'].apply(cad_off_or_not)
df_contextual_test['preprocess_text'] = df_contextual_test['text'].apply(preprocess_text)

In [12]:
# Prediction using SemEval model
sentences_cad = df_contextual_test['preprocess_text']
input_ids_cad, attention_masks_cad = create_sentence_embeddings(sentences_cad, civ_bert_tokenizer)

NameError: name 'civ_bert_tokenizer' is not defined

In [None]:
cad_preds = civ_bert_model.predict([input_ids_cad, attention_masks_cad],batch_size=32)
cad_pred_labels = cad_preds['logits'].argmax(axis=1)
df_contextual_test['label_pred'] = cad_pred_labels
df_dev_classification = classification_report(df_contextual_test['label_bin'].tolist(), df_contextual_test['label_pred'].tolist(), output_dict=True)
pd.DataFrame(df_dev_classification)

In [8]:
# CAD own model
df_contextual_train = pd.read_csv('contextual_abuse_dataset/cad_v1_1_train.tsv', sep='\t')
# df_contextual_train = pd.read_csv('/storage2/mamille3/data/hate_speech/contextual_abuse_dataset/cad_v1_1_train.tsv', sep='\t')

df_contextual_train = df_contextual_train.dropna(subset=['text'])
df_contextual_train['label_bin'] = df_contextual_train['labels'].apply(cad_off_or_not) # did assign it to df_contextual_test (bug?)
df_contextual_train['preprocess_text'] = df_contextual_train['text'].apply(preprocess_text)

In [9]:
num_classes=len(df_contextual_train['label_bin'].unique()) # originally df_cad_train
cad_bert_tokenizer, cad_bert_model = create_bert_tokenizer_model(num_classes)

sentences_cad_train = df_contextual_train['preprocess_text']
labels_cad_train = df_contextual_train['label_bin']

input_ids_train_cad, attention_masks_train_cad = create_sentence_embeddings(sentences_cad_train, cad_bert_tokenizer)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [10]:
cad_model = compile_fit_bert_model(cad_bert_model, input_ids_train_cad, attention_masks_train_cad, labels_cad_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
input_ids_cad, attention_masks_cad = create_sentence_embeddings(sentences_cad, cad_bert_tokenizer)
cad_new_preds = cad_model.predict([input_ids_cad, attention_masks_cad],batch_size=32)
cad_new_pred_labels = cad_new_preds['logits'].argmax(axis=1)
df_contextual_test['label_pred_new'] = cad_new_pred_labels
df_dev_classification = classification_report(df_contextual_test['label_bin'].tolist(), df_contextual_test['label_pred_new'].tolist(), output_dict=True)
pd.DataFrame(df_dev_classification)

Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,0.864842,0.505988,0.819672,0.685415,0.799589
recall,0.923998,0.350259,0.819672,0.637129,0.819672
f1-score,0.893442,0.413962,0.819672,0.653702,0.806255
support,4342.0,965.0,0.819672,5307.0,5307.0


# Lynnette's data

### Civility Data

In [27]:
df_train = pd.read_csv('./civility_data/civility_data/train.tsv', sep='\t', encoding='utf-8')
df_train['preprocess_text'] = df_train['text'].apply(preprocess_text)
df_train['label_bin'] = df_train['label'].apply(lambda x: 0 if x=='OFF' else 1)
df_train.head()

Unnamed: 0,text,label,category,preprocess_text,label_bin
0,@USER @USER You are an embarrassing citizen!!,OFF,TIN,embarrassing citizen ! !,0
1,@USER Seems hard to believe that you stood nex...,OFF,TIN,hard believe stand guy wear short masturbate...,0
2,@USER @USER @USER Wow !!! no wonder the Libera...,OFF,TIN,wow ! ! ! wonder liberal get bad party bul...,0
3,@USER @USER And not all idiots grandstands lik...,OFF,TIN,idiot grandstand like,0
4,@USER Bring on the hypocrite gungrabber. MAGA,OFF,TIN,bring hypocrite gungrabber . maga,0


In [28]:
num_classes=len(df_train['label_bin'].unique())
civ_bert_tokenizer, civ_bert_model = create_bert_tokenizer_model(num_classes)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [43]:
sentences = df_train['preprocess_text']
civ_labels = df_train['label_bin']
civ_labels = np.array(civ_labels)
len(sentences), len(civ_labels)

(10592, 10592)

In [30]:
civ_input_ids, civ_attention_masks = create_sentence_embeddings(sentences, civ_bert_tokenizer)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [44]:
civ_bert_model = compile_fit_bert_model(civ_bert_model, civ_input_ids, civ_attention_masks, civ_labels, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [32]:
# Get civ dev data
df_dev = pd.read_csv('./civility_data/civility_data/dev.tsv', sep='\t', encoding='utf-8')
df_dev['preprocess_text'] = df_dev['text'].apply(preprocess_text)
df_dev['label_bin'] = df_dev['label'].apply(lambda x: 0 if x=='OFF' else 1)
df_dev.head()

Unnamed: 0,text,label,category,perspective_score,preprocess_text,label_bin
0,@USER She should ask a few native Americans wh...,OFF,UNT,0.311852,ask native americans .,0
1,@USER @USER Go home you’re drunk!!! @USER #MAG...,OFF,TIN,0.566334,home drunk ! ! ! 👊 🇺 🇸 👊 url,0
2,Amazon is investigating Chinese employees who ...,NOT,,0.110361,amazon investigate chinese employee sell inter...,1
3,"@USER Someone should'veTaken"" this piece of sh...",OFF,UNT,0.927032,"should'vetaken "" piece shit volcano . 😂 """,0
4,@USER @USER Obama wanted liberals &amp; illega...,NOT,,0.319764,obama want liberal & amp ; illegal red state,1


In [33]:
sentences_dev = df_dev['preprocess_text']
labels_dev = np.array(df_dev['label_bin'])

dev_input_ids, dev_attention_masks = create_sentence_embeddings(sentences_dev, civ_bert_tokenizer)
preds = civ_bert_model.predict([dev_input_ids, dev_attention_masks], batch_size=32)
pred_labels = preds['logits'].argmax(axis=1)
df_dev['pred'] = pred_labels
df_dev_classification = classification_report(df_dev['label_bin'].tolist(), df_dev['pred'].tolist(), output_dict=True)
pd.DataFrame(df_dev_classification)



Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,0.671642,0.815618,0.771903,0.74363,0.767771
recall,0.613636,0.850679,0.771903,0.732158,0.771903
f1-score,0.64133,0.83278,0.771903,0.737055,0.769156
support,440.0,884.0,0.771903,1324.0,1324.0


In [None]:
# civ test data
df_test = pd.read_csv('./civility_data/civility_data/test.tsv', sep='\t', encoding='utf-8')
df_test['preprocess_text'] = df_test['text'].apply(preprocess_text)
sentences_test = df_test['preprocess_text']

test_input_ids, test_attention_masks = create_sentence_embeddings(sentences_test, civ_bert_tokenizer)

test_preds = bert_model.predict([input_ids_test, attention_masks_test],batch_size=32)
test_pred_labels = test_preds['logits'].argmax(axis=1)
df_test['label'] = test_pred_labels
df_test.to_csv('./civility_data/civility_data/lynnette_ng_test_final.tsv', sep='\t', index=False)

## Gab data 

In [34]:
df_gab_test = pd.read_csv('/storage2/mamille3/data/hate_speech/gab_hate_corpus//gab_test.tsv', sep='\t')
df_gab_test['label_bin'] = df_gab_test['vo'].apply(lambda x: 1 if x==1 else 0)
df_gab_test['preprocess_text'] = df_gab_test['text'].apply(preprocess_text)
df_gab_test.head()

Unnamed: 0,text_id,text,purity,harm,im,cv,ex,degradation,fairness,hd,...,rel,sxo,rae,nat,pol,authority,vo,idl,label_bin,preprocess_text
0,29574085,People think bones are made of calcium They a...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,people think bone calcium bone high calciu...
1,37547580,Why hasn't this disgusting illegal Obama polic...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,disgusting illegal obama policy completely rem...
2,21542473,"Persecution for righteousness' sake, part 2: ...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,persecution righteousness ' sake 2 :
3,26888094,Blasphemy will get you Everywhere.......,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,blasphemy .......
4,23041627,"I don't see protecting our borders, as ALL oth...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,protect border nation ! have resemble martial ...


In [35]:
sentences_gab = df_gab_test['preprocess_text']
input_ids_gab, attention_masks_gab = create_sentence_embeddings(sentences_gab, civ_bert_tokenizer)



In [37]:
# Predict with SemEval classifier 
gab_preds = civ_bert_model.predict([input_ids_gab, attention_masks_gab],batch_size=32)
gab_pred_labels = gab_preds['logits'].argmax(axis=1)
df_gab_test['label_pred'] = gab_pred_labels
df_dev_classification = classification_report(df_gab_test['label_bin'].tolist(), df_gab_test['label_pred'].tolist(), output_dict=True)
pd.DataFrame(df_dev_classification)

Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,0.794562,0.015104,0.21495,0.404833,0.744752
recall,0.217625,0.175758,0.21495,0.196691,0.21495
f1-score,0.341669,0.027818,0.21495,0.184744,0.321613
support,2417.0,165.0,0.21495,2582.0,2582.0


In [38]:
# Gab's own classifier
df_gab_train = pd.read_csv('./gab_data/gab_data/gab_train.tsv', sep='\t')
df_gab_train['label_bin'] = df_gab_train['vo'].apply(lambda x: 1 if x==1 else 0)
df_gab_train['preprocess_text'] = df_gab_train['text'].apply(preprocess_text)

num_gab_classes=len(df_gab_train['label_bin'].unique())
gab_bert_tokenizer, gab_bert_model = create_bert_tokenizer_model(num_classes)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [39]:
sentences_gab_train = df_gab_train['preprocess_text']
labels_gab_train = np.array(df_gab_train['label_bin'])

input_ids_train_gab, attention_masks_train_gab = create_sentence_embeddings(sentences_gab_train, gab_bert_tokenizer)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [None]:
gab_model = compile_fit_bert_model(gab_bert_model, input_ids_train_gab, attention_masks_train_gab, labels_gab_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5

In [None]:
gab_new_preds = gab_model.predict([input_ids_gab, attention_masks_gab],batch_size=32)
gab_new_pred_labels = gab_new_preds['logits'].argmax(axis=1)
df_gab_test['label_pred_new'] = gab_new_pred_labels
df_dev_classification = classification_report(df_gab_test['label_bin'].tolist(), df_gab_test['label_pred_new'].tolist(), output_dict=True)
pd.DataFrame(df_dev_classification)