In [24]:
import pandas as pd
from openai import OpenAI
from tqdm import tqdm
import pprint

from sklearn.metrics import precision_recall_fscore_support, classification_report

# Init DeepSeek API client
client = OpenAI(
    api_key="sk-f47fa990b47b47909bd1de489753fd8a", 
    base_url="https://api.deepseek.com"
)

In [2]:
def predict_sentiment_deepseek(text):
    prompt = f"""
You are an expert in determining the tone of a text. Our task is to determine the emotion (sentiment) that a person puts into a written text as accurately as possible. To do this, I will show you texts from Ukrainian social networks, and you will choose the correct answer regarding the sentiment. The answer options will be as follows:

1. Positive -> expressions used that reflect positive emotions (joy, support, admiration, etc.);
2. Negative -> expressions used that reflect negative emotions (criticism, sarcasm, condemnation, aggression, doubt, fear, etc.);
3. Neutral -> the author does not use either positive or negative expressions (neutral emotion);
4. Mixed -> the text contains expressions from both the positive and negative spectrum of emotions (mixed case).

It is important that you do not indicate your own guess about the author's sentiment, but find indications of it in specific expressions. I will give a few examples. Examples:

" Аварії " -> this short text has a neutral sentiment. Despite the fact that the Ukrainian word "Аварії" often has a negative context, in this case there is no additional information reflecting the sentiment of the author.
" Так я ж тебе задал вопрос. Киев, май, первое применение пэтриотов - когда все небо осветили этим - были там и х22, и кинжалы - так были прилеты тогда? Не было. Вопрос залу - почему так произошло?  Пэтриоты сбивают всю эту срань " -> this text has a negative sentiment. The author uses expressions that characterize aggression and criticism of the interlocutor.
" Зникло світло у Святошинському районі. " -> this text has a neutral sentiment. The fact of the lack of electricity itself is perceived negatively, but the author of the text does not use either positive or negative words / expressions.
" Проблеми зі світлом в Києві та області після вибухів!  " -> in turn, the following news item has a negative connotation. The author demonstrates his attitude through the word "Проблеми" and the exclamation mark "!", emphasizing the expression.
" :cry: Внаслідок ракетної атаки зафіксовано падіння уламків в Печерському районі на дах багатоповерхового житлового будинку, – КМВА " -> text with a negative sentiment, which the author demonstrates through the use of the ":cry:" emoji.
" Ну норм " -> this is an example of a positive sentiment. The text itself is not very expressive, but the author clearly demonstrates the emotion of "approval" of something, which belongs to the positive spectrum.
" :exclamation:В бік Києва пуски ще декількох ‘Кинджалів’. Ворог намагається пробити наші ППО. Поки відбиваємося, але є падіння уламків, тож перебуваємо в укриттях або хоча б за парою стін." -> this news item is an example of a negative sentiment. The author demonstrates his attitude to the event through the expressions "Ворог намагається пробити наші ППО. Поки відбиваємося, але". 

Your answer should be only one word. THIS IS IMPORTANT! You must answer exclusively with only one word from the list: [positive, negative, neutral, mixed].

Text to classify: "{text}"

Label:"""

    try:
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": "You are a sentiment analysis expert."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"ERROR: {e}"

In [3]:
df=pd.read_parquet("./data_provided/final_dataset/final_17042025.parquet")

In [4]:
texts=df.loc[df.df_set == 'test', "document_content"].values.tolist()

In [6]:
len(texts)

1223

In [5]:
sentiments=[]

In [7]:
for text in tqdm(texts): 
    label = predict_sentiment_deepseek(text)
    sentiments.append(label)

100%|██████████| 1223/1223 [1:38:17<00:00,  4.82s/it]


In [8]:
sents=['positive', 'negative', 'neutral', 'mixed']
sentiment_cleaned=[]
for e in sentiments:
    if e not in sents:
        print('-',e)
        if 'mixed' in e:
            sentiment_cleaned.append('mixed')
        elif 'neutral' in e:
            sentiment_cleaned.append('neutral')
        elif 'positive' in e:
            sentiment_cleaned.append('positive')
        elif 'negative' in e:
            sentiment_cleaned.append('negative')
        else:
            sentiment_cleaned.append('neutral')
    else:
        sentiment_cleaned.append(e)

- positive  

(The word "компенсації" (compensations) and the hashtag "#shrots" suggest a positive sentiment, as it implies support or relief for those affected.)
- negative  

(The text conveys a sense of danger or threat through "Загроза атаки," which reflects a negative sentiment.)
- positive  

(The use of the "👹👹👹" emojis, which in this context likely represent intensity or admiration for the special forces, reflects a positive sentiment.)
- negative  

(The text starts with a neutral/positive tone regarding financial aid and recovery efforts, but ends with the highly negative and vulgar expression "Хуй," which drastically shifts the sentiment to negative.)


In [9]:
set(sentiment_cleaned)

{'mixed', 'negative', 'neutral', 'positive'}

In [10]:
# df=pd.read_csv("ua_sentiment_dataset_labeled_lang.csv")

In [12]:
df_test = df.loc[df.df_set=='test'].copy()

In [13]:
df_test["sentiment_deepseek"] = sentiment_cleaned

In [20]:
len(df_test.loc[df_test.annotator_sentiment == df_test.sentiment_deepseek]) / df_test.shape[0]

0.6369582992641046

In [None]:
# df["sentiment_deepseek"] = sentiment_cleaned
# df["annotator_response"] = df["annotator_response"].str.strip().str.lower()
# df["sentiment_deepseek"] = df["sentiment_deepseek"].str.strip().str.lower()
# df_filtered = df[df["annotator_response"].str.lower() != "idk"].copy()


In [21]:
df_test.to_parquet('./data_provided/final_dataset/df_test_set_deepseek.parquet')

In [52]:
[[df_filtered['language'].values.tolist().count(e),e] for e in list(set(df_filtered['language'].values.tolist()))]

[[4312, 'Code-mixed'], [1799, 'Russian'], [2924, 'Ukrainian']]

In [23]:
def evaluate_sentiment(df, y_true_col, y_pred_col, group_col="language"):
    """
    Evaluate sentiment classification with overall and per-language-group metrics.
    
    Params:
    - df: pd.DataFrame containing predictions and true labels
    - y_true_col: column name of true labels (e.g. human annotations)
    - y_pred_col: column name of model predictions (e.g. DeepSeek output)
    - group_col: column to group by (e.g. 'language')

    Returns:
    - dict with overall metrics and per-group metrics
    """
    y_true = df[y_true_col]
    y_pred = df[y_pred_col]

    # Overall metrics
    overall_macro = precision_recall_fscore_support(y_true, y_pred, average='macro', zero_division=0)
    overall_micro = precision_recall_fscore_support(y_true, y_pred, average='micro', zero_division=0)

    result = {
        "overall": {
            "macro": {
                "precision": overall_macro[0],
                "recall": overall_macro[1],
                "f1": overall_macro[2],
            },
            "micro": {
                "precision": overall_micro[0],
                "recall": overall_micro[1],
                "f1": overall_micro[2],
            }
        },
        "by_group": {}
    }

    # Per-language group metrics
    for group_value in df[group_col].unique():
        subset = df[df[group_col] == group_value]
        if subset.empty:
            continue

        group_true = subset[y_true_col]
        group_pred = subset[y_pred_col]

        macro = precision_recall_fscore_support(group_true, group_pred, average='macro', zero_division=0)
        micro = precision_recall_fscore_support(group_true, group_pred, average='micro', zero_division=0)

        result["by_group"][group_value] = {
            "macro": {
                "precision": macro[0],
                "recall": macro[1],
                "f1": macro[2],
            },
            "micro": {
                "precision": micro[0],
                "recall": micro[1],
                "f1": micro[2],
            }
        }

    return result

metrics = evaluate_sentiment(df_test, y_true_col="annotator_sentiment", y_pred_col="sentiment_deepseek")

In [25]:
pprint.pprint(metrics)

{'by_group': {'mixed': {'macro': {'f1': 0.5913036902601331,
                                  'precision': 0.7347222222222223,
                                  'recall': 0.5654761904761905},
                        'micro': {'f1': 0.6825396825396826,
                                  'precision': 0.6825396825396826,
                                  'recall': 0.6825396825396826}},
              'ru': {'macro': {'f1': 0.48077995989690503,
                               'precision': 0.5893144872138629,
                               'recall': 0.48601928374655645},
                     'micro': {'f1': 0.6498599439775911,
                               'precision': 0.6498599439775911,
                               'recall': 0.6498599439775911}},
              'ua': {'macro': {'f1': 0.5378355276471065,
                               'precision': 0.5876203944898655,
                               'recall': 0.5704492277790206},
                     'micro': {'f1': 0.6276463262764632,
      