In [None]:
# !pip install transformers torch scikit-learn unidecode datasets

In [None]:
# Importar las librerías
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from torch.optim import SGD, Adam, AdamW
from transformers import RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer,  AutoModelForSequenceClassification


In [None]:
# Detectar dispositivo (GPU o CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
lang = "ukr"
global_max_token_len = 71

input_file = f'/content/drive/MyDrive/Proyectos/semeval/data/newest/test/{lang}.csv'
output_file = f'/content/drive/MyDrive/Proyectos/semeval/final_predictions/pred_{lang}.csv'

In [None]:
class MultilabelModel:
    def __init__(self, model_path):
        # Cargar el modelo y el tokenizador
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
        self.model.to(device)
        self.model.eval()  # Configurar el modelo en modo de evaluación

    def predict(self, text):
        # Tokenizar el texto
        inputs = self.tokenizer(text, padding='max_length', truncation=True, max_length=global_max_token_len, return_tensors="pt").to(device)

        # Realizar inferencia
        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits
            # Aplicar sigmoide para obtener probabilidades
            probs = torch.sigmoid(logits).cpu().numpy()

        # Convertir probabilidades a etiquetas (0 o 1) con un umbral de 0.5
        preds = (probs > 0.5).astype(int)

        # Asignar los nombres de las emociones a las predicciones
        emotions = ['anger', 'disgust' ,'fear', 'joy', 'sadness', 'surprise']
        predictions = {emotion: int(preds[0][i]) for i, emotion in enumerate(emotions)}

        return predictions



In [None]:
class LevelModel:
    def __init__(self, model_path):
        # Cargar el modelo y el tokenizador
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
        self.model.to(device)
        self.model.eval()  # Configurar el modelo en modo de evaluación

    def predict(self, text):
        # Tokenizar el texto
        inputs = self.tokenizer(text, padding='max_length', truncation=True, max_length=global_max_token_len, return_tensors="pt").to(device)

        # Realizar inferencia
        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits
            # Obtener la clase con la probabilidad más alta
            pred_class = torch.argmax(logits, dim=1).item()

        return int(pred_class) +1




In [None]:

model_folder = f'/content/drive/MyDrive/Servicio social/Proyectos/semeval/models/{lang}/'


multilabel_model = MultilabelModel( model_folder + "multilabel")
anger_model = LevelModel( model_folder + "anger_3_level")
disgust_model = LevelModel( model_folder + "disgust_3_level")
fear_model = LevelModel( model_folder + "fear_3_level")
joy_model = LevelModel( model_folder + "joy_3_level")
sadness_model = LevelModel( model_folder + "sadness_3_level")
surprise_model = LevelModel( model_folder + "surprise_3_level")

In [None]:



def get_complete_prediction(text):
    predictions = multilabel_model.predict(text)
    anger = predictions["anger"]
    disgust = predictions["disgust"]
    fear = predictions["fear"]
    joy = predictions["joy"]
    sadness = predictions["sadness"]
    surprise = predictions["surprise"]

    if anger != 0:
        predictions["anger"] = anger_model.predict(text)
    if disgust != 0:
        predictions["disgust"] = disgust_model.predict(text)
    if fear != 0:
        predictions["fear"] = fear_model.predict(text)
    if joy != 0:
        predictions["joy"] = joy_model.predict(text)
    if sadness != 0:
        predictions["sadness"] = sadness_model.predict(text)
    if surprise != 0:
        predictions["surprise"] = surprise_model.predict(text)

    return predictions


df = pd.read_csv(input_file)
results = []
for index, row in df.iterrows():
    print(f'\nEntering index {index}')
    id = row["id"]
    text = row['text']
    predictions = get_complete_prediction(text)
    anger = predictions["anger"]
    disgust = predictions["disgust"]
    fear = predictions["fear"]
    joy = predictions["joy"]
    sadness = predictions["sadness"]
    surprise = predictions["surprise"]
    results.append([id, anger, disgust, fear, joy, sadness, surprise])




In [None]:
df_res = pd.DataFrame(results)
df_res.columns = ["id","anger", "disgust" ,"fear", "joy", "sadness","surprise"]
df_res.to_csv(output_file, index=False)