In [1]:
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df_test = pd.read_csv("./go_emotion_dataset/goemotion_train_i1.csv", sep=';')

not_chosen_columns = ['ID', 'Tweet']
label_columns = [col for col in df_test.columns if col not in not_chosen_columns]

df_labels_test = df_test[label_columns]

list_labels_test = df_labels_test.values.tolist()

test_id = df_test['ID'].tolist()

test_texts = df_test['Tweet'].tolist()
test_labels = list_labels_test

tokenizer = BertTokenizer.from_pretrained("saved_model_try_6_i1")
model = BertForSequenceClassification.from_pretrained("saved_model_try_6_i1")

In [3]:
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

all_predictions = []

# Process each text
for text in test_texts:
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    inputs = inputs.to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.sigmoid(logits)  # Apply sigmoid to convert logits to probabilities
    probabilities = probabilities.squeeze(0)
    probabilities = probabilities.tolist()

    all_predictions.append(probabilities)

In [4]:
a=np.array(all_predictions)

anger = a[:,0].tolist()
anticipation = a[:,1].tolist()
disgust = a[:,2].tolist()
fear = a[:,3].tolist()
joy = a[:,4].tolist()
love = a[:,5].tolist()
optimism = a[:,6].tolist()
pessimism = a[:,7].tolist()
sadness = a[:,8].tolist()
surprise = a[:,9].tolist()
trust = a[:,10].tolist()

pseudo_labeled_dataset = pd.DataFrame({
    'ID': test_id,
    'Tweet': test_texts,
    'anger': anger,
    'anticipation': anticipation,
    'disgust': disgust,
    'fear': fear,
    'joy': joy,
    'love': love,
    'optimism': optimism,
    'pessimism': pessimism,
    'sadness': sadness,
    'surprise': surprise,
    'trust': trust
})

pseudo_labeled_dataset.to_csv('pseudo_labeled_dataset_i1.csv', encoding='utf_8', index=False, sep=';')


In [7]:
pseudo_labeled_dataset.sort_values(by='trust', ascending=False)[pseudo_labeled_dataset['trust'] > 0.8]

  pseudo_labeled_dataset.sort_values(by='trust', ascending=False)[pseudo_labeled_dataset['trust'] > 0.8]


Unnamed: 0,ID,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
10040,eezt0pn,Good advice right here and good things to thin...,0.066324,0.111311,0.039576,0.068549,0.083272,0.085656,0.465097,0.068230,0.038447,0.044471,0.925164
5593,eczwaho,Keep it simple. Do what makes the money so you...,0.074127,0.128603,0.044822,0.063565,0.069112,0.066058,0.453204,0.079745,0.041813,0.043595,0.912670
13889,eeh9yrs,"Just be GOOD at what you do, and fade the chase",0.063635,0.142689,0.037850,0.066879,0.108172,0.071422,0.560705,0.069646,0.043537,0.037941,0.911696
28267,ee57yyc,You don't hand your baby to perfect strangers ...,0.085825,0.132648,0.053225,0.088124,0.061347,0.058942,0.416417,0.073257,0.041171,0.044883,0.908177
10544,ed0x9ao,"Choose wisely, you ....",0.072521,0.108473,0.046730,0.079624,0.055185,0.075861,0.346473,0.071350,0.037466,0.044514,0.906057
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14465,ee6zh74,"Meta. If you feel otherwise, please speak now,...",0.074955,0.269901,0.041623,0.057371,0.067633,0.039318,0.403106,0.056035,0.041194,0.039992,0.800263
35492,eda8yr3,Also thank you for the link.,0.043216,0.102398,0.033006,0.042573,0.040754,0.089835,0.129747,0.055728,0.028020,0.066281,0.800154
5435,eelal46,You're a big guy.,0.045103,0.064932,0.030731,0.042700,0.053281,0.122771,0.198026,0.056332,0.026111,0.049600,0.800110
10826,ee8cwq6,It took 8 years of healing before I could open...,0.029905,0.139895,0.018373,0.047767,0.212665,0.092412,0.729715,0.085610,0.056780,0.034466,0.800088
