In [2]:
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [3]:
df_test = pd.read_csv("./go_emotion_dataset/goemotion_train_i2.csv", sep=';')

not_chosen_columns = ['ID', 'Tweet']
label_columns = [col for col in df_test.columns if col not in not_chosen_columns]

df_labels_test = df_test[label_columns]

list_labels_test = df_labels_test.values.tolist()

test_id = df_test['ID'].tolist()

test_texts = df_test['Tweet'].tolist()
test_labels = list_labels_test

tokenizer = BertTokenizer.from_pretrained("saved_model_try_6_i2")
model = BertForSequenceClassification.from_pretrained("saved_model_try_6_i2")

In [4]:
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

all_predictions = []

# Process each text
for text in test_texts:
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    inputs = inputs.to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.sigmoid(logits)  # Apply sigmoid to convert logits to probabilities
    probabilities = probabilities.squeeze(0)
    probabilities = probabilities.tolist()

    all_predictions.append(probabilities)

In [5]:
a=np.array(all_predictions)

anger = a[:,0].tolist()
anticipation = a[:,1].tolist()
disgust = a[:,2].tolist()
fear = a[:,3].tolist()
joy = a[:,4].tolist()
love = a[:,5].tolist()
optimism = a[:,6].tolist()
pessimism = a[:,7].tolist()
sadness = a[:,8].tolist()
surprise = a[:,9].tolist()
trust = a[:,10].tolist()

pseudo_labeled_dataset = pd.DataFrame({
    'ID': test_id,
    'Tweet': test_texts,
    'anger': anger,
    'anticipation': anticipation,
    'disgust': disgust,
    'fear': fear,
    'joy': joy,
    'love': love,
    'optimism': optimism,
    'pessimism': pessimism,
    'sadness': sadness,
    'surprise': surprise,
    'trust': trust
})

pseudo_labeled_dataset.to_csv('pseudo_labeled_dataset_i1.csv', encoding='utf_8', index=False, sep=';')


In [6]:
pseudo_labeled_dataset.sort_values(by='trust', ascending=False)[pseudo_labeled_dataset['trust'] > 0.65]

  pseudo_labeled_dataset.sort_values(by='trust', ascending=False)[pseudo_labeled_dataset['trust'] > 0.65]


Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
11708,ed19t6a,Anything is a dildo of you're brave enough,0.077933,0.122461,0.046029,0.173902,0.108056,0.110631,0.501548,0.072085,0.054894,0.046368,0.852423
18457,ee2flql,Alway protect yourself,0.090108,0.139424,0.051258,0.156635,0.099426,0.103484,0.469578,0.075968,0.064776,0.046545,0.838971
16187,ed8ma5b,Thank you for providing an unbiased & sensible...,0.078899,0.140619,0.052820,0.121172,0.099130,0.104772,0.321970,0.105711,0.081425,0.068242,0.838406
14876,eefvvg0,Respect for the pedes,0.107745,0.111379,0.058363,0.107954,0.075680,0.135611,0.324404,0.083293,0.068631,0.070732,0.826014
12620,efcqm9e,Thank you for your advice!,0.071449,0.109628,0.045809,0.110659,0.094731,0.127368,0.294822,0.108275,0.082804,0.063430,0.823668
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4284,ed5vgdb,Thank you for helping me realize that p&r bloo...,0.041191,0.101370,0.027307,0.037667,0.133593,0.112044,0.326325,0.048431,0.041619,0.039853,0.650492
9400,edbbiz0,"Find yourself another pretty, nice girl that d...",0.084577,0.125696,0.046439,0.032907,0.150147,0.105333,0.690024,0.047982,0.046906,0.021315,0.650340
7405,ef6oldj,You've violated rules 4 and 5. Please adhere t...,0.203712,0.118281,0.101460,0.139305,0.037536,0.047997,0.339312,0.076705,0.063593,0.034201,0.650183
17183,eevq1jv,"Don't worry, neither have I",0.035832,0.125915,0.030481,0.192093,0.067923,0.084309,0.368459,0.072853,0.044483,0.025330,0.650166
