In [2]:
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [3]:
df_test = pd.read_csv("./go_emotion_dataset/goemotion_train_i0.csv", sep=';', names=["Tweet", "Label", "ID"])

not_chosen_columns = ['ID', 'Tweet']
label_columns = [col for col in df_test.columns if col not in not_chosen_columns]

df_labels_test = df_test[label_columns]

list_labels_test = df_labels_test.values.tolist()

test_id = df_test['ID'].tolist()

test_texts = df_test['Tweet'].tolist()
test_labels = list_labels_test

tokenizer = BertTokenizer.from_pretrained("saved_model")
model = BertForSequenceClassification.from_pretrained("saved_model")

In [4]:
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

all_predictions = []

# Process each text
for text in test_texts:
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    inputs = inputs.to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.sigmoid(logits)  # Apply sigmoid to convert logits to probabilities
    probabilities = probabilities.squeeze(0)
    probabilities = probabilities.tolist()

    all_predictions.append(probabilities)

In [5]:
a=np.array(all_predictions)

anger = a[:,0].tolist()
anticipation = a[:,1].tolist()
disgust = a[:,2].tolist()
fear = a[:,3].tolist()
joy = a[:,4].tolist()
love = a[:,5].tolist()
optimism = a[:,6].tolist()
pessimism = a[:,7].tolist()
sadness = a[:,8].tolist()
surprise = a[:,9].tolist()
trust = a[:,10].tolist()

pseudo_labeled_dataset = pd.DataFrame({
    'ID': test_id,
    'Tweet': test_texts,
    'anger': anger,
    'anticipation': anticipation,
    'disgust': disgust,
    'fear': fear,
    'joy': joy,
    'love': love,
    'optimism': optimism,
    'pessimism': pessimism,
    'sadness': sadness,
    'surprise': surprise,
    'trust': trust
})

pseudo_labeled_dataset.to_csv('pseudo_labeled_dataset_i1.csv', encoding='utf_8', index=False, sep=';')


In [6]:
pseudo_labeled_dataset.sort_values(by='trust', ascending=False)[pseudo_labeled_dataset['trust'] > 0.65]

  pseudo_labeled_dataset.sort_values(by='trust', ascending=False)[pseudo_labeled_dataset['trust'] > 0.65]


Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
16576,eefol3s,Thank you very much! I will get some courage a...,0.058643,0.609365,0.038845,0.257820,0.745301,0.370875,0.947640,0.107150,0.065455,0.149945,0.880466
13700,eemuo16,"You are in the right here, and I support your ...",0.178232,0.608967,0.107609,0.244979,0.524449,0.327627,0.935036,0.250255,0.125072,0.121978,0.875115
12749,edgmw36,I have faith that she will realise she deserve...,0.079014,0.356259,0.056929,0.154618,0.778449,0.596587,0.955365,0.147550,0.118148,0.110304,0.870399
21019,edk8w8o,"In such a situation, I would save the mother. ...",0.074318,0.601026,0.048922,0.212808,0.719007,0.535755,0.946936,0.193453,0.110098,0.149231,0.860793
1163,edjcm7b,You are the expert.,0.132078,0.726475,0.085330,0.190831,0.523937,0.223086,0.909723,0.175530,0.071775,0.158331,0.854071
...,...,...,...,...,...,...,...,...,...,...,...,...,...
40341,eedc5i8,"Being a woman is not an excuse, you do not rep...",0.181335,0.614432,0.108581,0.116716,0.318613,0.112834,0.856997,0.189490,0.131754,0.114092,0.650330
18370,edin1tm,Thank you brother,0.052580,0.198246,0.041112,0.029715,0.921881,0.622709,0.914690,0.068242,0.117199,0.096216,0.650246
22045,ee0no3h,Just subscribed! Keep strong bro focus on the ...,0.062262,0.416136,0.041791,0.060347,0.854602,0.218283,0.902340,0.037940,0.045123,0.093324,0.650230
2088,eekskp9,My second doc! I’m a student from Cleveland an...,0.046301,0.664109,0.042986,0.067009,0.966748,0.688298,0.890951,0.038431,0.027850,0.325048,0.650152
