In [None]:
import numpy as np
import pandas as pd
import pathlib
import swifter
import tweetnlp

from sklearn.metrics import accuracy_score, f1_score


DATA_DIR = pathlib.Path("data")
EMOJI_MAPPING = {
    "❤": 0,
    "😍": 1,
    "😂": 2,
    "💕": 3,
    "🔥": 4,
    "😊": 5,
    "😎": 6,
    "✨": 7,
    "💙": 8,
    "😘": 9,
    "📷": 10,
    "🇺🇸": 11,
    "☀": 12,
    "💜": 13,
    "😉": 14,
    "💯": 15,
    "😁": 16,
    "🎄": 17,
    "📸": 18,
    "😜": 19
}

In [None]:
def get_sota_results(df: pd.DataFrame) -> pd.DataFrame:
    model = tweetnlp.Classifier("cardiffnlp/twitter-roberta-base-2021-124m-emoji", max_length=128)
    answers = df["text"].swifter.apply(lambda x: EMOJI_MAPPING[model.predict(x)["label"]])
    return answers


def get_results(df: pd.DataFrame, sota_answers: pd.DataFrame) -> pd.DataFrame:
    y_true = df["annotation"].to_numpy()
    y_pred = sota_answers.to_numpy()
    
    f1_macro = f1_score(y_true=y_true, y_pred=y_pred, average="macro")
    f1_micro = f1_score(y_true=y_true, y_pred=y_pred, average="micro")
    acc = accuracy_score(y_true=y_true, y_pred=y_pred)
    
    return pd.DataFrame(
        [
            {"metric": "F1_macro", "value": f1_macro},
            {"metric": "F1_micro", "value": f1_micro},
            {"metric": "acc", "value": acc},
        ]
    )

In [None]:
df = pd.read_csv(DATA_DIR.joinpath("Ewaluacja ChatGPT - zadania - TweetEval - emojis.csv"))
df = df.drop(df[df["chatgpt_final"] == "[-1]"].index)
res = get_sota_results(df)
get_results(df=df, sota_answers=res)

Pandas Apply:   0%|          | 0/1664 [00:00<?, ?it/s]

Unnamed: 0,metric,value
0,F1_macro,0.321951
1,F1_micro,0.442909
2,acc,0.442909
