# Baseline solution based on GPT-4 prompting

## Imports and data read

In [116]:
import pandas as pd
import numpy as np
import pprint

from tqdm import tqdm
from openai import OpenAI
from sklearn.metrics import accuracy_score, f1_score

## Pipeline

https://platform.openai.com/docs/api-reference/authentication

In [2]:
api_key = 'sk-proj-7yyg4KC696T7WK6CmLeWT3BlbkFJfjiC6bs5r8R7yoGLCIUP'

In [3]:
client = OpenAI(api_key=api_key)

In [5]:
def analyze_sentiment(comment, system_prompt):
    """
    Sends a request to OpenAI's GPT model to analyze sentiment.
    """
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": comment}
        ]
    )   
    return response.choices[0].message.content

In [11]:
df = pd.read_parquet('./data_provided/final_dataset/final_17042025.parquet')

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12224 entries, 0 to 12223
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   response_id           12224 non-null  int64 
 1   document_id           12224 non-null  int64 
 2   user_id               12224 non-null  int64 
 3   annotator_sentiment   12224 non-null  object
 4   is_ck_annotation      12224 non-null  int64 
 5   response_timestamp    12224 non-null  object
 6   document_content      12224 non-null  object
 7   annotation_date       12224 non-null  object
 8   username              12224 non-null  object
 9   unique_document_id    12224 non-null  object
 10  language_wc           12224 non-null  object
 11  document_length       12224 non-null  int64 
 12  gpt_labels_v1         12224 non-null  object
 13  language_gpt          12224 non-null  object
 14  language_manual       12224 non-null  object
 15  language              12224 non-null

In [13]:
df

Unnamed: 0,response_id,document_id,user_id,annotator_sentiment,is_ck_annotation,response_timestamp,document_content,annotation_date,username,unique_document_id,language_wc,document_length,gpt_labels_v1,language_gpt,language_manual,language,stratification_label,df_set
0,1,1,277133851,neutral,1,2025-03-09T23:23:07.220881,‚ö°Ô∏è–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞ –¥–µ–ª–µ–≥–∞—Ü—ñ—è –≤—ñ–¥–ø—Ä–∞–≤–∏–ª–∞—Å—è –Ω–∞ –ø–µ—Ä–µ–º–æ–≤...,2025-03-09,O,1_1,uk,67,neutral,Ukrainian,ukrainian,ua,neutral_ua,train
1,3,2,1065283664,neutral,1,2025-03-09T23:44:28.262307,"–í–∏–±—É—Ö–∏ –Ω–∞ –û–¥–µ—â–∏–Ω—ñ, –ø–æ–ø–µ—Ä–µ–¥–Ω—å–æ ‚Äî –ü–ü–û.",2025-03-09,A,2_1,uk,36,negative,Ukrainian,ukrainian,ua,neutral_ua,validation
2,4,3,1065283664,negative,1,2025-03-09T23:45:00.503098,"–ê —á—Ç–æ –¥–µ–ª–∞—Ç—å —Ç–µ–º ,–∫—Ç–æ –ª–∏—à–∏–ª—Å—è —Å–≤–æ–µ–≥–æ –∂–∏–ª—å—è ,–ø–æ...",2025-03-09,A,3_1,ru,177,negative,Code-mixed,russian,ru,negative_ru,test
3,5,4,1065283664,negative,1,2025-03-09T23:46:33.265766,–¢–æ–≥–¥–∞ —É—á–∏—Å—å –±—ã—Å—Ç—Ä–æ –±–µ–≥–∞—Ç—å. –î–ª—è –º–µ–Ω—è –≤–æ–ø—Ä–æ—Å —Å–ª–æ...,2025-03-09,A,4_1,ru,103,negative,Code-mixed,russian,ru,negative_ru,train
4,6,5,1065283664,neutral,1,2025-03-09T23:46:38.993496,–î–æ–±—Ä–∏–π –¥–µ–Ω—å,2025-03-09,A,5_1,uk,11,neutral,Ukrainian,russian,ua,neutral_ua,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12219,13028,8948,467130971,negative,0,2025-04-04T08:02:37.362562,"–ö—Ä–∞—â–µ ""–ø–æ–≤–∏–Ω–Ω–∞ –±—É—Ç–∏ –∑—Ä—É—á–Ω—ñ—à–æ—é, –Ω—ñ–∂ Uber —á–∏ Boo...",2025-04-04,D,8948_0,uk,51,positive,Code-mixed,ukrainian,ua,negative_ua,train
12220,13029,2094,467130971,mixed,0,2025-04-04T08:03:35.792932,–£–≤–∞–≥–∞! –ó –¥–µ—è–∫–∏—Ö —ñ–Ω—Ç–µ—Ä–Ω–µ—Ç –¥–∂–µ—Ä–µ–ª —à–∏—Ä–∏—Ç—å—Å—è —ñ–Ω—Ñ–æ—Ä...,2025-04-04,D,2094_0,uk,402,positive,Ukrainian,ukrainian,ua,mixed_ua,train
12221,13030,5013,467130971,neutral,0,2025-04-04T08:03:42.008533,"–ü–∏—Ç–∞–Ω–Ω—è, —Ü–µ–π —Å–µ—Ä—Ç–∏—Ñ—ñ–∫–∞—Ç –º–æ–∂–Ω–∞ –≤–∂–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É–≤...",2025-04-04,D,5013_0,uk,113,neutral,Ukrainian,ukrainian,ua,neutral_ua,train
12222,13031,4572,467130971,negative,0,2025-04-04T08:03:48.251166,–ù–∞ –í—É–≥–ª–µ–¥–∞—Ä—Å—å–∫–æ–º—É –Ω–∞–ø—Ä—è–º–∫—É –∑–∞–≥–∏–Ω—É–≤ –†–æ–º–∞ –Ü–≤–∞–Ω–µ–Ω...,2025-04-04,D,4572_0,uk,114,negative,Ukrainian,ukrainian,ua,negative_ua,train


In [14]:
df.annotator_sentiment.unique()

array(['neutral', 'negative', 'positive', 'mixed'], dtype=object)

## Metrics

In [15]:
def evaluate_gpt_results(df):
    """
    Computes accuracy and F1-score for GPT sentiment analysis results.
    
    Parameters:
    df (pd.DataFrame): A DataFrame containing 'true_label' and 'gpt_label'.
    
    Returns:
    dict: A dictionary with accuracy and F1-score.
    """
    accuracy = accuracy_score(df['y'], df['y_hat'])
    f1 = f1_score(df['y'], df['y_hat'], average='weighted')
    
    return {"accuracy": round(accuracy, 4), "f1_score": round(f1, 4)}

## System prompt creation - zero shot approach

In [16]:
system_prompt = '''

You are a sentiment analysis expert. You need to analyze peoples emotions and define the comments sentiment from the Ukrainian social media. The input you receive is a text from Telegram messenger. The text can be Ukrainian or Russian. 

Your output must be a single word from the following list: ['positive', 'neutral', 'negative', 'mixed']. 

'''

In [17]:
print(len(system_prompt)/4)

86.75


In [None]:
gpt_answers = []

for id in tqdm(df.index[:1000]):
    gpt_answers.append(analyze_sentiment(df.loc[id, 'document_content'], system_prompt=system_prompt))

In [None]:
df['gpt_labels'] = gpt_answers

In [None]:
df['gpt_labels'].unique()

array(['positive', 'negative', 'mixed', 'neutral', 'negative '],
      dtype=object)

In [None]:
df.loc[df['gpt_labels'] == 'negative ', 'gpt_labels'] = 'negative'

In [None]:
df['gpt_labels'].unique()

array(['positive', 'negative', 'mixed', 'neutral'], dtype=object)

In [None]:
df.gpt_labels.value_counts()

gpt_labels
negative    494
positive    289
neutral     151
mixed        66
Name: count, dtype: int64

In [None]:
df.annotator_sentiment.value_counts()

annotator_sentiment
negative    364
neutral     360
positive    212
mixed        64
Name: count, dtype: int64

In [None]:
df['y'] = df['annotator_sentiment'].apply(lambda x: {'negative': -1, 'neutral': 0, 'positive': 1, 'mixed': 2}[x])

In [None]:
df['y_hat'] = df['gpt_labels'].apply(lambda x: {'negative': -1, 'neutral': 0, 'positive': 1, 'mixed': 2}[x])

In [41]:
df

Unnamed: 0,response_id,document_id,user_id,annotator_sentiment,is_ck_annotation,response_timestamp,document_content,annotation_date,username,unique_document_id,language_wc,document_length,gpt_labels_v1,language_gpt,language_manual,language,stratification_label,df_set
0,1,1,277133851,neutral,1,2025-03-09T23:23:07.220881,‚ö°Ô∏è–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞ –¥–µ–ª–µ–≥–∞—Ü—ñ—è –≤—ñ–¥–ø—Ä–∞–≤–∏–ª–∞—Å—è –Ω–∞ –ø–µ—Ä–µ–º–æ–≤...,2025-03-09,O,1_1,uk,67,neutral,Ukrainian,ukrainian,ua,neutral_ua,train
1,3,2,1065283664,neutral,1,2025-03-09T23:44:28.262307,"–í–∏–±—É—Ö–∏ –Ω–∞ –û–¥–µ—â–∏–Ω—ñ, –ø–æ–ø–µ—Ä–µ–¥–Ω—å–æ ‚Äî –ü–ü–û.",2025-03-09,A,2_1,uk,36,negative,Ukrainian,ukrainian,ua,neutral_ua,validation
2,4,3,1065283664,negative,1,2025-03-09T23:45:00.503098,"–ê —á—Ç–æ –¥–µ–ª–∞—Ç—å —Ç–µ–º ,–∫—Ç–æ –ª–∏—à–∏–ª—Å—è —Å–≤–æ–µ–≥–æ –∂–∏–ª—å—è ,–ø–æ...",2025-03-09,A,3_1,ru,177,negative,Code-mixed,russian,ru,negative_ru,test
3,5,4,1065283664,negative,1,2025-03-09T23:46:33.265766,–¢–æ–≥–¥–∞ —É—á–∏—Å—å –±—ã—Å—Ç—Ä–æ –±–µ–≥–∞—Ç—å. –î–ª—è –º–µ–Ω—è –≤–æ–ø—Ä–æ—Å —Å–ª–æ...,2025-03-09,A,4_1,ru,103,negative,Code-mixed,russian,ru,negative_ru,train
4,6,5,1065283664,neutral,1,2025-03-09T23:46:38.993496,–î–æ–±—Ä–∏–π –¥–µ–Ω—å,2025-03-09,A,5_1,uk,11,neutral,Ukrainian,russian,ua,neutral_ua,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12219,13028,8948,467130971,negative,0,2025-04-04T08:02:37.362562,"–ö—Ä–∞—â–µ ""–ø–æ–≤–∏–Ω–Ω–∞ –±—É—Ç–∏ –∑—Ä—É—á–Ω—ñ—à–æ—é, –Ω—ñ–∂ Uber —á–∏ Boo...",2025-04-04,D,8948_0,uk,51,positive,Code-mixed,ukrainian,ua,negative_ua,train
12220,13029,2094,467130971,mixed,0,2025-04-04T08:03:35.792932,–£–≤–∞–≥–∞! –ó –¥–µ—è–∫–∏—Ö —ñ–Ω—Ç–µ—Ä–Ω–µ—Ç –¥–∂–µ—Ä–µ–ª —à–∏—Ä–∏—Ç—å—Å—è —ñ–Ω—Ñ–æ—Ä...,2025-04-04,D,2094_0,uk,402,positive,Ukrainian,ukrainian,ua,mixed_ua,train
12221,13030,5013,467130971,neutral,0,2025-04-04T08:03:42.008533,"–ü–∏—Ç–∞–Ω–Ω—è, —Ü–µ–π —Å–µ—Ä—Ç–∏—Ñ—ñ–∫–∞—Ç –º–æ–∂–Ω–∞ –≤–∂–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É–≤...",2025-04-04,D,5013_0,uk,113,neutral,Ukrainian,ukrainian,ua,neutral_ua,train
12222,13031,4572,467130971,negative,0,2025-04-04T08:03:48.251166,–ù–∞ –í—É–≥–ª–µ–¥–∞—Ä—Å—å–∫–æ–º—É –Ω–∞–ø—Ä—è–º–∫—É –∑–∞–≥–∏–Ω—É–≤ –†–æ–º–∞ –Ü–≤–∞–Ω–µ–Ω...,2025-04-04,D,4572_0,uk,114,negative,Ukrainian,ukrainian,ua,negative_ua,train


In [None]:
metrics = evaluate_gpt_results(df)

In [45]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.585, 'f1_score': 0.5609}


## System prompt creation - few shots approach - Experiment 1

In [9]:
system_prompt = '''

You are a sentiment analysis expert. You need to analyze peoples emotions and define the comments sentiment from the Ukrainian social media. The input you receive is a text from Telegram messenger. The text can be Ukrainian or Russian. 

Your output must be a single word from the following list: ['positive', 'neutral', 'negative', 'mixed']. 
Here are some examples of inputs to you and your outputs expected:

```
Input: '–ö–∏–µ–≤–µ –ø—Ä–æ–¥–æ–ª–∂–∞—Ç—å –∑–≤—É—á–∞—Ç—å –≤–∑—Ä—ã–≤—ã. –°–ú–ò —Å–æ–æ–±—â–∞—é—Ç, —á—Ç–æ –≤ —Ä—è–¥–µ —Ä–∞–π–æ–Ω–æ–≤ –ø—Ä–æ–ø–∞–ª —Å–≤–µ—Ç.'
Output: 'negative'

Input: '–ß–µ—Ä–µ–∑ –æ–±—Å—Ç—Ä—ñ–ª–∏ —É –ö–∏—î–≤—ñ –∑–Ω–µ—Å—Ç—Ä—É–º–ª–µ–Ω–æ –º–∞–π–∂–µ 260 —Ç–∏—Å—è—á —Å–ø–æ–∂–∏–≤–∞—á—ñ–≤, –≤—ñ–¥–∫–ª—é—á–∞–ª–æ—Å—è –æ–±–ª–∞–¥–Ω–∞–Ω–Ω—è –ø—ñ–¥—Å—Ç–∞–Ω—Ü—ñ–π —É –ö–∏—ó–≤—Å—å–∫—ñ–π –æ–±–ª–∞—Å—Ç—ñ, ‚Äî –ú—ñ–Ω–µ–Ω–µ—Ä–≥–æ   –£ –ö–∏—î–≤—ñ –ø–æ—à–∫–æ–¥–∂–µ–Ω–æ –ø–æ–≤—ñ—Ç—Ä—è–Ω—É –ª—ñ–Ω—ñ—é, –±–µ–∑ –Ω–∞–ø—Ä—É–≥–∏ –Ω–∏–∑–∫–∞ –ø—ñ–¥—Å—Ç–∞–Ω—Ü—ñ–π ‚Äì –∑–Ω–µ—Å—Ç—Ä—É–º–ª–µ–Ω—ñ 259 —Ç–∏—Å—è—á —Ç–æ—á–æ–∫ –æ–±–ª—ñ–∫—É.'
Output: 'negative'

Input: '–ü–æ–ø–µ—Ä–µ–¥–Ω—ñ–π –≤–∏—Å–Ω–æ–≤–æ–∫üßê  –ü–µ—Ä—à–µ. –£ –Ω–∞—Å –Ω–∞–π–∫—Ä–∞—â–µ –≤ —Å–≤—ñ—Ç—ñ –ü–ü–û! –ñ–æ–¥–Ω–∞ –∫—Ä–∞—ó–Ω–∞ –Ω–µ –≤–∏—Ç—Ä–∏–º–∞–ª–∞ –± —Ç–∞–∫–æ—ó –º–∞—Å–æ–≤–∞–Ω–æ—ó –∞—Ç–∞–∫–∏. –î–æ—Å–≤—ñ–¥ —ñ –Ω–∞—è–≤–Ω—ñ—Å—Ç—å –∑–∞—Å–æ–±—ñ–≤ –≤–±–µ—Ä—ñ–≥–∞—é—Ç—å 99% –Ω–∞—Å–µ–ª–µ–Ω–Ω—è  –î—Ä—É–≥–µ. –†—É—Å–Ω—è –ø—Ä–æ—Å—Ç–æ –≤—á–µ—Ä–≥–æ–≤–µ —Ö–æ—á–µ –Ω–∞—Å –∑–∞–ª—è–∫–∞—Ç–∏, –∞–ª–µ –º–∏–Ω—É–ª–æ—ó –∑–∏–º–∏ —ñ –≤ —Ç—Ä–∞–≤–Ω—ñ –º–∏ —á—ñ—Ç–∫–æ –≤–∏–≤—á–∏–ª–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∏ –±–µ–∑–ø–µ–∫–∏ —ñ —Ö—î—Ä –∫–ª–∞–¥–µ–º —Ä—É—Å–∞–∫–∞–º –Ω–∞ –≤–æ—Ä–æ—Ç–Ω—ñ–∫  –¢—Ä–µ—Ç—î. –ù–µ –±—ñ–π—Ç–µ—Å—è, —Å–∫–æ—Ä–æ —ñ –≤–æ–¥–∞ —ñ –µ–ª–µ–∫—Ç—Ä–∏–∫–∞ –ø–æ–≤–µ—Ä–Ω—É—Ç—å—Å—è, –∞ –≤–æ–ª—è –Ω–∞—à–∞ –Ω–µ –∑–ª–∞–º–∞—î—Ç—å—Å—è!  –Ü–Ω—Ñ–æ—Ä–º—É—î –ö–∏—ó–≤ | –ù–∞–¥—ñ—Å–ª–∞—Ç–∏ –Ω–æ–≤–∏–Ω—É'
Output: 'positive'

Input: '–ì–æ–ª–æ—Å–µ–µ–≤—Å–∫–∏–π —Ä-–Ω –µ—Å—Ç—å —Å–≤–µ—Ç üòâ‚úåüèª'
Output: 'positive'

Input: '–ü—ñ–¥–∫–∞–∂—ñ—Ç—å, –±—É–¥—å –ª–∞—Å–∫–∞, —á–∏ –±—É–≤–∞—é—Ç—å –Ω–∞ –í–∏–¥—É–±–∏—á–∞—Ö –Ω–∞ –ø–ª–∞—Ç—Ñ–æ—Ä–º–∞—Ö –º—ñ—Å—å–∫–æ—ó —ñ –ø—Ä–∏–º—ñ—Å—å–∫–∏—Ö –µ–ª–µ–∫—Ç—Ä–∏—á–æ–∫?'
Output: 'neutral'

Input: '–ó —Ü—å–æ–≥–æ —Ä–µ—Å—Ç–æ—Ä–∞–Ω—É –¥–æ—Å—Ç–∞–≤–ª—è—é—Ç—å –¥—É–∂–µ —à–≤–∏–¥–∫–æ! –ê–ª–µ —ó–∂–∞ —É –Ω–∏—Ö –º–æ–∂–µ –±—É—Ç–∏ –Ω–µ—Å–º–∞—á–Ω–∞, —Ç–æ–º—É —Ä–µ–∫–æ–º–µ–Ω–¥—É–≤–∞—Ç–∏ —Å–∫–ª–∞–¥–Ω–æ...'
Output: 'mixed'
```

Remember that the 'sentiment' values can be only and only from the following list: ['positive', 'neutral', 'negative', 'mixed']. 

'''

In [None]:
print(len(system_prompt)/4)

In [None]:
gpt_answers = []

for id in tqdm(df.index):
    gpt_answers.append(analyze_sentiment(df.loc[id, 'document_content']))

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12224/12224 [1:52:13<00:00,  1.82it/s]  


In [None]:
gpt_answers

['–Ω–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π',
 '–ù–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–ù–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–∑–º—ñ—à–∞–Ω–∏–π',
 '–ù–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π',
 '–ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–ù–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π',
 '–ó–º—ñ—à–∞–Ω–∏–π',
 '–Ω–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–∑–º—ñ—à–∞–Ω–∏–π',
 '–ù–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–ù–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π',
 '–ø–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–ø–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–∑–º—ñ—à–∞–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–ó–º—ñ—à–∞–Ω–∏–π',
 '–∑–º—ñ—à–∞–Ω–∏–π',
 '–Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π',
 '–ù–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π',
 '–ù–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π',
 '–ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–∑–º—ñ—à–∞–Ω–∏–π',
 '–ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–Ω–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π',
 '–ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π',
 '–ü–æ–∑–∏—Ç

In [None]:
df

Unnamed: 0,response_id,document_id,user_id,annotator_sentiment,is_ck_annotation,response_timestamp,document_content,annotation_date,username,unique_document_id,language_wc,document_length
0,1,1,277133851,neutral,1,2025-03-09T23:23:07.220881,‚ö°Ô∏è–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞ –¥–µ–ª–µ–≥–∞—Ü—ñ—è –≤—ñ–¥–ø—Ä–∞–≤–∏–ª–∞—Å—è –Ω–∞ –ø–µ—Ä–µ–º–æ–≤...,2025-03-09,O,1_1,uk,67
1,3,2,1065283664,neutral,1,2025-03-09T23:44:28.262307,"–í–∏–±—É—Ö–∏ –Ω–∞ –û–¥–µ—â–∏–Ω—ñ, –ø–æ–ø–µ—Ä–µ–¥–Ω—å–æ ‚Äî –ü–ü–û.",2025-03-09,A,2_1,uk,36
2,4,3,1065283664,negative,1,2025-03-09T23:45:00.503098,"–ê —á—Ç–æ –¥–µ–ª–∞—Ç—å —Ç–µ–º ,–∫—Ç–æ –ª–∏—à–∏–ª—Å—è —Å–≤–æ–µ–≥–æ –∂–∏–ª—å—è ,–ø–æ...",2025-03-09,A,3_1,ru,177
3,5,4,1065283664,negative,1,2025-03-09T23:46:33.265766,–¢–æ–≥–¥–∞ —É—á–∏—Å—å –±—ã—Å—Ç—Ä–æ –±–µ–≥–∞—Ç—å. –î–ª—è –º–µ–Ω—è –≤–æ–ø—Ä–æ—Å —Å–ª–æ...,2025-03-09,A,4_1,ru,103
4,6,5,1065283664,neutral,1,2025-03-09T23:46:38.993496,–î–æ–±—Ä–∏–π –¥–µ–Ω—å,2025-03-09,A,5_1,uk,11
...,...,...,...,...,...,...,...,...,...,...,...,...
12219,13028,8948,467130971,negative,0,2025-04-04T08:02:37.362562,"–ö—Ä–∞—â–µ ""–ø–æ–≤–∏–Ω–Ω–∞ –±—É—Ç–∏ –∑—Ä—É—á–Ω—ñ—à–æ—é, –Ω—ñ–∂ Uber —á–∏ Boo...",2025-04-04,D,8948_0,uk,51
12220,13029,2094,467130971,mixed,0,2025-04-04T08:03:35.792932,–£–≤–∞–≥–∞! –ó –¥–µ—è–∫–∏—Ö —ñ–Ω—Ç–µ—Ä–Ω–µ—Ç –¥–∂–µ—Ä–µ–ª —à–∏—Ä–∏—Ç—å—Å—è —ñ–Ω—Ñ–æ—Ä...,2025-04-04,D,2094_0,uk,402
12221,13030,5013,467130971,neutral,0,2025-04-04T08:03:42.008533,"–ü–∏—Ç–∞–Ω–Ω—è, —Ü–µ–π —Å–µ—Ä—Ç–∏—Ñ—ñ–∫–∞—Ç –º–æ–∂–Ω–∞ –≤–∂–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É–≤...",2025-04-04,D,5013_0,uk,113
12222,13031,4572,467130971,negative,0,2025-04-04T08:03:48.251166,–ù–∞ –í—É–≥–ª–µ–¥–∞—Ä—Å—å–∫–æ–º—É –Ω–∞–ø—Ä—è–º–∫—É –∑–∞–≥–∏–Ω—É–≤ –†–æ–º–∞ –Ü–≤–∞–Ω–µ–Ω...,2025-04-04,D,4572_0,uk,114


In [None]:
df['gpt_labels'] = gpt_answers

In [102]:
df.to_parquet('./data_provided/final_dataset/gpt_labeled_with_language.parquet')

In [234]:
df = pd.read_parquet('./data_provided/final_dataset/gpt_labeled_with_language.parquet')

In [235]:
df

Unnamed: 0,response_id,document_id,user_id,annotator_sentiment,is_ck_annotation,response_timestamp,document_content,annotation_date,username,unique_document_id,language_wc,document_length,gpt_labels,language_gpt,language_explanation_gpt
0,1,1,277133851,neutral,1,2025-03-09T23:23:07.220881,‚ö°Ô∏è–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞ –¥–µ–ª–µ–≥–∞—Ü—ñ—è –≤—ñ–¥–ø—Ä–∞–≤–∏–ª–∞—Å—è –Ω–∞ –ø–µ—Ä–µ–º–æ–≤...,2025-03-09,O,1_1,uk,67,neutral,Ukrainian,
1,3,2,1065283664,neutral,1,2025-03-09T23:44:28.262307,"–í–∏–±—É—Ö–∏ –Ω–∞ –û–¥–µ—â–∏–Ω—ñ, –ø–æ–ø–µ—Ä–µ–¥–Ω—å–æ ‚Äî –ü–ü–û.",2025-03-09,A,2_1,uk,36,negative,Ukrainian,
2,4,3,1065283664,negative,1,2025-03-09T23:45:00.503098,"–ê —á—Ç–æ –¥–µ–ª–∞—Ç—å —Ç–µ–º ,–∫—Ç–æ –ª–∏—à–∏–ª—Å—è —Å–≤–æ–µ–≥–æ –∂–∏–ª—å—è ,–ø–æ...",2025-03-09,A,3_1,ru,177,negative,Code-mixed,
3,5,4,1065283664,negative,1,2025-03-09T23:46:33.265766,–¢–æ–≥–¥–∞ —É—á–∏—Å—å –±—ã—Å—Ç—Ä–æ –±–µ–≥–∞—Ç—å. –î–ª—è –º–µ–Ω—è –≤–æ–ø—Ä–æ—Å —Å–ª–æ...,2025-03-09,A,4_1,ru,103,negative,Code-mixed,
4,6,5,1065283664,neutral,1,2025-03-09T23:46:38.993496,–î–æ–±—Ä–∏–π –¥–µ–Ω—å,2025-03-09,A,5_1,uk,11,neutral,Ukrainian,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12219,13028,8948,467130971,negative,0,2025-04-04T08:02:37.362562,"–ö—Ä–∞—â–µ ""–ø–æ–≤–∏–Ω–Ω–∞ –±—É—Ç–∏ –∑—Ä—É—á–Ω—ñ—à–æ—é, –Ω—ñ–∂ Uber —á–∏ Boo...",2025-04-04,D,8948_0,uk,51,positive,Code-mixed,
12220,13029,2094,467130971,mixed,0,2025-04-04T08:03:35.792932,–£–≤–∞–≥–∞! –ó –¥–µ—è–∫–∏—Ö —ñ–Ω—Ç–µ—Ä–Ω–µ—Ç –¥–∂–µ—Ä–µ–ª —à–∏—Ä–∏—Ç—å—Å—è —ñ–Ω—Ñ–æ—Ä...,2025-04-04,D,2094_0,uk,402,positive,Ukrainian,
12221,13030,5013,467130971,neutral,0,2025-04-04T08:03:42.008533,"–ü–∏—Ç–∞–Ω–Ω—è, —Ü–µ–π —Å–µ—Ä—Ç–∏—Ñ—ñ–∫–∞—Ç –º–æ–∂–Ω–∞ –≤–∂–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É–≤...",2025-04-04,D,5013_0,uk,113,neutral,Ukrainian,
12222,13031,4572,467130971,negative,0,2025-04-04T08:03:48.251166,–ù–∞ –í—É–≥–ª–µ–¥–∞—Ä—Å—å–∫–æ–º—É –Ω–∞–ø—Ä—è–º–∫—É –∑–∞–≥–∏–Ω—É–≤ –†–æ–º–∞ –Ü–≤–∞–Ω–µ–Ω...,2025-04-04,D,4572_0,uk,114,negative,Ukrainian,


In [236]:
df.gpt_labels.unique()

array(['neutral', 'negative', 'positive', 'mixed'], dtype=object)

In [81]:
df.loc[(df.gpt_labels == """Input: "–ö–∏—î–≤–µ –ø—Ä–æ–¥–æ–≤–∂—É—é—Ç—å—Å—è –≤–∏–±—É—Ö–∏ —Ç–∞ –≤—ñ–¥–∫–ª—é—á–µ–Ω–Ω—è —Å–≤—ñ—Ç–ª–∞ –≤ —Ä—ñ–∑–Ω–∏—Ö —Ä–∞–π–æ–Ω–∞—Ö –º—ñ—Å—Ç–∞. –ñ–∏—Ç–µ–ª—ñ —Å–∫–∞—Ä–∂–∞—Ç—å—Å—è –Ω–∞ –Ω–µ–≤–ø–µ–≤–Ω–µ–Ω—ñ—Å—Ç—å —É –º–∞–π–±—É—Ç–Ω—å–æ–º—É."\nOutput: \'negative\'"""), 'gpt_labels'] = 'negative'

In [95]:
df.loc[(df.gpt_labels == """–ó–≤—ñ—Å–Ω–æ, –Ω–∞–ø–∏—à—ñ—Ç—å —Ç–µ–∫—Å—Ç, —è–∫–∏–π –≤–∏ –±–∞–∂–∞—î—Ç–µ –ø—Ä–æ–∞–Ω–∞–ª—ñ–∑—É–≤–∞—Ç–∏ –Ω–∞ –Ω–∞—Å—Ç—Ä—ñ–π!"""), 'gpt_labels'] = 'negative'

In [96]:
df.loc[(df.gpt_labels == """–ú–æ—è –º–æ–≤–∞ –∑–∞—Ä–∞–∑ —É–∫—Ä–∞—ó–Ω—Å—å–∫–∞. –Ø –∑–∞–≤–∂–¥–∏ –≥–æ—Ç–æ–≤–∞ –¥–æ–ø–æ–º–æ–≥—Ç–∏! –Ø–∫ —è –º–æ–∂—É –≤–∞–º —Å—å–æ–≥–æ–¥–Ω—ñ –¥–æ–ø–æ–º–æ–≥—Ç–∏?"""), 'gpt_labels'] = 'negative'

In [98]:
df.loc[(df.gpt_labels == """1. 'negative'\n2. 'negative'\n3. 'neutral'"""), 'gpt_labels'] = 'mixed'

In [237]:
df['y'] = df['annotator_sentiment'].apply(lambda x: {'negative': -1, 'neutral': 0, 'positive': 1, 'mixed': 2}[x])

In [238]:
df['y_hat'] = df['gpt_labels'].apply(lambda x: {'negative': -1, 'neutral': 0, 'positive': 1, 'mixed': 2}[x])

In [239]:
df

Unnamed: 0,response_id,document_id,user_id,annotator_sentiment,is_ck_annotation,response_timestamp,document_content,annotation_date,username,unique_document_id,language_wc,document_length,gpt_labels,language_gpt,language_explanation_gpt,y,y_hat
0,1,1,277133851,neutral,1,2025-03-09T23:23:07.220881,‚ö°Ô∏è–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞ –¥–µ–ª–µ–≥–∞—Ü—ñ—è –≤—ñ–¥–ø—Ä–∞–≤–∏–ª–∞—Å—è –Ω–∞ –ø–µ—Ä–µ–º–æ–≤...,2025-03-09,O,1_1,uk,67,neutral,Ukrainian,,0,0
1,3,2,1065283664,neutral,1,2025-03-09T23:44:28.262307,"–í–∏–±—É—Ö–∏ –Ω–∞ –û–¥–µ—â–∏–Ω—ñ, –ø–æ–ø–µ—Ä–µ–¥–Ω—å–æ ‚Äî –ü–ü–û.",2025-03-09,A,2_1,uk,36,negative,Ukrainian,,0,-1
2,4,3,1065283664,negative,1,2025-03-09T23:45:00.503098,"–ê —á—Ç–æ –¥–µ–ª–∞—Ç—å —Ç–µ–º ,–∫—Ç–æ –ª–∏—à–∏–ª—Å—è —Å–≤–æ–µ–≥–æ –∂–∏–ª—å—è ,–ø–æ...",2025-03-09,A,3_1,ru,177,negative,Code-mixed,,-1,-1
3,5,4,1065283664,negative,1,2025-03-09T23:46:33.265766,–¢–æ–≥–¥–∞ —É—á–∏—Å—å –±—ã—Å—Ç—Ä–æ –±–µ–≥–∞—Ç—å. –î–ª—è –º–µ–Ω—è –≤–æ–ø—Ä–æ—Å —Å–ª–æ...,2025-03-09,A,4_1,ru,103,negative,Code-mixed,,-1,-1
4,6,5,1065283664,neutral,1,2025-03-09T23:46:38.993496,–î–æ–±—Ä–∏–π –¥–µ–Ω—å,2025-03-09,A,5_1,uk,11,neutral,Ukrainian,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12219,13028,8948,467130971,negative,0,2025-04-04T08:02:37.362562,"–ö—Ä–∞—â–µ ""–ø–æ–≤–∏–Ω–Ω–∞ –±—É—Ç–∏ –∑—Ä—É—á–Ω—ñ—à–æ—é, –Ω—ñ–∂ Uber —á–∏ Boo...",2025-04-04,D,8948_0,uk,51,positive,Code-mixed,,-1,1
12220,13029,2094,467130971,mixed,0,2025-04-04T08:03:35.792932,–£–≤–∞–≥–∞! –ó –¥–µ—è–∫–∏—Ö —ñ–Ω—Ç–µ—Ä–Ω–µ—Ç –¥–∂–µ—Ä–µ–ª —à–∏—Ä–∏—Ç—å—Å—è —ñ–Ω—Ñ–æ—Ä...,2025-04-04,D,2094_0,uk,402,positive,Ukrainian,,2,1
12221,13030,5013,467130971,neutral,0,2025-04-04T08:03:42.008533,"–ü–∏—Ç–∞–Ω–Ω—è, —Ü–µ–π —Å–µ—Ä—Ç–∏—Ñ—ñ–∫–∞—Ç –º–æ–∂–Ω–∞ –≤–∂–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É–≤...",2025-04-04,D,5013_0,uk,113,neutral,Ukrainian,,0,0
12222,13031,4572,467130971,negative,0,2025-04-04T08:03:48.251166,–ù–∞ –í—É–≥–ª–µ–¥–∞—Ä—Å—å–∫–æ–º—É –Ω–∞–ø—Ä—è–º–∫—É –∑–∞–≥–∏–Ω—É–≤ –†–æ–º–∞ –Ü–≤–∞–Ω–µ–Ω...,2025-04-04,D,4572_0,uk,114,negative,Ukrainian,,-1,-1


In [240]:
metrics = evaluate_gpt_results(df)

In [241]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.6185, 'f1_score': 0.6044}


By language

In [251]:
df.language_wc.unique()

array(['uk', 'ru'], dtype=object)

In [252]:
metrics = evaluate_gpt_results(df.loc[df.language_wc == 'uk'])

In [253]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.6008, 'f1_score': 0.5829}


In [254]:
metrics = evaluate_gpt_results(df.loc[df.language_wc == 'ru'])

In [255]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.6578, 'f1_score': 0.6544}


By sentiment

In [242]:
df.annotator_sentiment.unique()

array(['neutral', 'negative', 'positive', 'mixed'], dtype=object)

In [243]:
metrics = evaluate_gpt_results(df.loc[df.annotator_sentiment == 'neutral'])

In [244]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.43, 'f1_score': 0.6014}


In [245]:
metrics = evaluate_gpt_results(df.loc[df.annotator_sentiment == 'negative'])

In [246]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.8128, 'f1_score': 0.8967}


In [247]:
metrics = evaluate_gpt_results(df.loc[df.annotator_sentiment == 'positive'])

In [248]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.7417, 'f1_score': 0.8517}


In [249]:
metrics = evaluate_gpt_results(df.loc[df.annotator_sentiment == 'mixed'])

In [250]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.1447, 'f1_score': 0.2529}


## System prompt creation - few shots approach - Experiment 2

In [6]:
system_prompt = '''

–¢–∏ - –µ–∫—Å–ø–µ—Ä—Ç —ñ–∑ –≤–∏–∑–Ω–∞—á–µ–Ω–Ω—è —Ç–æ–Ω–∞–ª—å–Ω–æ—Å—Ç—ñ —Ç–µ–∫—Å—Ç—É. –ù–∞—à–µ –∑–∞–≤–¥–∞–Ω–Ω—è - –º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ —Ç–æ—á–Ω–æ –≤–∏–∑–Ω–∞—á–∞—Ç–∏ –µ–º–æ—Ü—ñ—é (—Å–µ–Ω—Ç–∏–º–µ–Ω—Ç), —è–∫—É –ª—é–¥–∏–Ω–∞ –∑–∞–∫–ª–∞–¥–∞—î —É –Ω–∞–ø–∏—Å–∞–Ω–∏–π —Ç–µ–∫—Å—Ç. –î–ª—è —Ü—å–æ–≥–æ —è –ø–æ–∫–∞–∑—É–≤–∞—Ç–∏–º—É —Ç–æ–±—ñ —Ç–µ–∫—Å—Ç–∏ –∑ —É–∫—Ä–∞—ó–Ω—Å—å–∫–∏—Ö —Å–æ—Ü—ñ–∞–ª—å–Ω–∏—Ö –º–µ—Ä–µ–∂, –∞ —Ç–∏ - –æ–±–∏—Ä–∞—Ç–∏–º–µ—à –≤—ñ—Ä–Ω–∏–π –≤–∞—Ä—ñ–∞–Ω—Ç –≤—ñ–¥–ø–æ–≤—ñ–¥—ñ —â–æ–¥–æ —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç—É. –í–∞—Ä—ñ–∞–Ω—Ç–∏ –≤—ñ–¥–ø–æ–≤—ñ–¥–µ–π –±—É–¥—É—Ç—å –Ω–∞—Å—Ç—É–ø–Ω—ñ:

1. –ü–æ–∑–∏—Ç–∏–≤–Ω–∏–π -> –í–∏–∫–æ—Ä–∏—Å—Ç–∞–Ω—ñ –≤–∏—Ä–∞–∑–∏, —â–æ –≤—ñ–¥–æ–±—Ä–∞–∂–∞—é—Ç—å –ø–æ–∑–∏—Ç–∏–≤–Ω—ñ –µ–º–æ—Ü—ñ—ó (—Ä–∞–¥—ñ—Å—Ç—å, –ø—ñ–¥—Ç—Ä–∏–º–∫—É, –∑–∞—Ö–æ–ø–ª–µ–Ω–Ω—è —Ç–æ—â–æ);
2. –ù–µ–≥–∞—Ç–∏–≤–Ω–∏–π -> –í–∏–∫–æ—Ä–∏—Å—Ç–∞–Ω—ñ –≤–∏—Ä–∞–∑–∏, —â–æ –≤—ñ–¥–æ–±—Ä–∞–∂–∞—é—Ç—å –Ω–µ–≥–∞—Ç–∏–≤–Ω—ñ –µ–º–æ—Ü—ñ—ó (–∫—Ä–∏—Ç–∏–∫–∞, —Å–∞—Ä–∫–∞–∑–º, –æ—Å—É–¥, –∞–≥—Ä–µ—Å—ñ—è, —Å—É–º–Ω—ñ–≤, —Å—Ç—Ä–∞—Ö —Ç–æ—â–æ);
3. –ù–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π -> –ê–≤—Ç–æ—Ä –Ω–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É—î –Ω—ñ –ø–æ–∑–∏—Ç–∏–≤–Ω–∏—Ö, –Ω—ñ –Ω–µ–≥–∞—Ç–∏–≤–Ω–∏—Ö –≤–∏—Ä–∞–∑—ñ–≤ (–Ω–µ–π—Ç—Ä–∞–ª—å–Ω–∞ –µ–º–æ—Ü—ñ—è —Ç–µ–∫—Å—Ç—É);
4. –ó–º—ñ—à–∞–Ω–∏–π -> –¢–µ–∫—Å—Ç –º—ñ—Å—Ç–∏—Ç—å –≤–∏—Ä–∞–∑–∏ —è–∫ –∑ –ø–æ–∑–∏—Ç–∏–≤–Ω–æ–≥–æ —Å–ø–µ–∫—Ç—Ä—É –µ–º–æ—Ü—ñ–π, —Ç–∞–∫ —ñ –∑ –Ω–µ–≥–∞—Ç–∏–≤–Ω–æ–≥–æ (–∑–º—ñ—à–∞–Ω–∏–π –≤–∏–ø–∞–¥–æ–∫);


–í–∞–∂–ª–∏–≤–æ, —â–æ –ø–æ—Ç—Ä—ñ–±–Ω–æ –≤–∫–∞–∑—É–≤–∞—Ç–∏ –Ω–µ –≤–ª–∞—Å–Ω—É –∑–¥–æ–≥–∞–¥–∫—É —â–æ–¥–æ —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç—É –∞–≤—Ç–æ—Ä–∞, –∞ –∑–Ω–∞—Ö–æ–¥–∏—Ç–∏ –≤–∫–∞–∑—ñ–≤–∫–∏ –Ω–∞ –Ω—å–æ–≥–æ —É –∫–æ–Ω–∫—Ä–µ—Ç–Ω–∏—Ö –≤–∏—Ä–∞–∑–∞—Ö. –ù–∞–¥–∞–º –¥–µ–∫—ñ–ª—å–∫–∞ –ø—Ä–∏–∫–ª–∞–¥—ñ–≤. –ü—Ä–∏–∫–ª–∞–¥–∏:
" –ê–≤–∞—Ä—ñ—ó " -> —Ü–µ–π –∫–æ—Ä–æ—Ç–∫–∏–π —Ç–µ–∫—Å—Ç –º–∞—î –Ω–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç. –ü–æ–ø—Ä–∏ —Ç–µ, —â–æ —Å–ª–æ–≤–æ ‚Äú–∞–≤–∞—Ä—ñ—ó‚Äù —á–∞—Å—Ç–æ –º–∞—î –Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π –∫–æ–Ω—Ç–µ–∫—Å—Ç, —É –¥–∞–Ω–æ–º—É –≤–∏–ø–∞–¥–∫—É –≤—ñ–¥—Å—É—Ç–Ω—è –±—É–¥—å-—è–∫–∞ –¥–æ–¥–∞—Ç–∫–æ–≤–∞ —ñ–Ω—Ñ–æ—Ä–º–∞—Ü—ñ—è, —â–æ –≤—ñ–¥–æ–±—Ä–∞–∂–∞—î —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç –∞–≤—Ç–æ—Ä–∞.
" –¢–∞–∫ —è –∂ —Ç–µ–±–µ –∑–∞–¥–∞–ª –≤–æ–ø—Ä–æ—Å. –ö–∏–µ–≤, –º–∞–π, –ø–µ—Ä–≤–æ–µ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–µ –ø—ç—Ç—Ä–∏–æ—Ç–æ–≤ - –∫–æ–≥–¥–∞ –≤—Å–µ –Ω–µ–±–æ –æ—Å–≤–µ—Ç–∏–ª–∏ —ç—Ç–∏–º - –±—ã–ª–∏ —Ç–∞–º –∏ —Ö22, –∏ –∫–∏–Ω–∂–∞–ª—ã - —Ç–∞–∫ –±—ã–ª–∏ –ø—Ä–∏–ª–µ—Ç—ã —Ç–æ–≥–¥–∞? –ù–µ –±—ã–ª–æ. –í–æ–ø—Ä–æ—Å –∑–∞–ª—É - –ø–æ—á–µ–º—É —Ç–∞–∫ –ø—Ä–æ–∏–∑–æ—à–ª–æ?  –ü—ç—Ç—Ä–∏–æ—Ç—ã —Å–±–∏–≤–∞—é—Ç –≤—Å—é —ç—Ç—É —Å—Ä–∞–Ω—å " -> —Ü–µ–π —Ç–µ–∫—Å—Ç –º–∞—î –Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç. –ü–æ–ø—Ä–∏ —Ç–µ, —â–æ —Ñ–∞–∫—Ç ‚Äú–ü–µ—Ç—Ä—ñ–æ—Ç–∏ –∑–±–∏–≤–∞—é—Ç—å —Ä–∞–∫–µ—Ç–∏‚Äù –º–æ–∂–µ –≤—ñ–¥—á—É–≤–∞—Ç–∏—Å—å –ø–æ–∑–∏—Ç–∏–≤–Ω–æ, –∞–≤—Ç–æ—Ä –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É—î –≤–∏—Ä–∞–∑–∏, —â–æ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏–∑—É—é—Ç—å –∞–≥—Ä–µ—Å—ñ—é —Ç–∞ –∫—Ä–∏—Ç–∏–∫—É –¥–æ —Å–ø—ñ–≤—Ä–æ–∑–º–æ–≤–Ω–∏–∫–∞.
" –ó–Ω–∏–∫–ª–æ —Å–≤—ñ—Ç–ª–æ —É –°–≤—è—Ç–æ—à–∏–Ω—Å—å–∫–æ–º—É —Ä–∞–π–æ–Ω—ñ. " -> –¥–∞–Ω–∏–π —Ç–µ–∫—Å—Ç –º–∞—î –Ω–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç. –°–∞–º —Ñ–∞–∫—Ç –≤—ñ–¥—Å—É—Ç–Ω–æ—Å—Ç—ñ –µ–ª–µ–∫—Ç—Ä–æ–µ–Ω–µ—Ä–≥—ñ—ó —Å–ø—Ä–∏–π–º–∞—î—Ç—å—Å—è –Ω–µ–≥–∞—Ç–∏–≤–Ω–æ, –∞–ª–µ –∞–≤—Ç–æ—Ä —Ç–µ–∫—Å—Ç—É –Ω–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É—î –Ω—ñ –ø–æ–∑–∏—Ç–∏–≤–Ω–∏—Ö, –Ω—ñ –Ω–µ–≥–∞—Ç–∏–≤–Ω–∏—Ö —Å–ª—ñ–≤ / –≤–∏—Ä–∞–∑—ñ–≤.
" –ü—Ä–æ–±–ª–µ–º–∏ –∑—ñ —Å–≤—ñ—Ç–ª–æ–º –≤ –ö–∏—î–≤—ñ —Ç–∞ –æ–±–ª–∞—Å—Ç—ñ –ø—ñ—Å–ª—è –≤–∏–±—É—Ö—ñ–≤! " -> —É —Å–≤–æ—é —á–µ—Ä–≥—É –Ω–∞—Å—Ç—É–ø–Ω–∞ –Ω–æ–≤–∏–Ω–∞ –º–∞—î –Ω–µ–≥–∞—Ç–∏–≤–Ω–µ –∑–∞–±–∞—Ä–≤–ª–µ–Ω–Ω—è. –ê–≤—Ç–æ—Ä –¥–µ–º–æ–Ω—Å—Ç—Ä—É—î —Å–≤–æ—î –≤—ñ–¥–Ω–æ—à–µ–Ω–Ω—è —á–µ—Ä–µ–∑ —Å–ª–æ–≤–æ ‚Äú–ü—Ä–æ–±–ª–µ–º–∏‚Äù —Ç–∞ –∑–Ω–∞–∫ –æ–∫–ª–∏–∫—É ‚Äú!‚Äù, –ø—ñ–¥–∫—Ä–µ—Å–ª—é—é—á–∏ –µ–∫—Å–ø—Ä–µ—Å—ñ—é.
" :cry: –í–Ω–∞—Å–ª—ñ–¥–æ–∫ —Ä–∞–∫–µ—Ç–Ω–æ—ó –∞—Ç–∞–∫–∏ –∑–∞—Ñ—ñ–∫—Å–æ–≤–∞–Ω–æ –ø–∞–¥—ñ–Ω–Ω—è —É–ª–∞–º–∫—ñ–≤ –≤ –ü–µ—á–µ—Ä—Å—å–∫–æ–º—É —Ä–∞–π–æ–Ω—ñ –Ω–∞ –¥–∞—Ö –±–∞–≥–∞—Ç–æ–ø–æ–≤–µ—Ä—Ö–æ–≤–æ–≥–æ –∂–∏—Ç–ª–æ–≤–æ–≥–æ –±—É–¥–∏–Ω–∫—É, ‚Äì –ö–ú–í–ê " -> —Ç–µ–∫—Å—Ç —ñ–∑ –Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–º —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç–æ–º, —â–æ –∞–≤—Ç–æ—Ä –¥–µ–º–æ–Ω—Å—Ç—Ä—É—î —á–µ—Ä–µ–∑ –≤–∏–∫–æ—Ä–∏—Å—Ç–∞–Ω–Ω—è ‚Äú:cry:‚Äù –µ–º–æ–¥–∑—ñ.
" –ù—É –Ω–æ—Ä–º " -> —Ü–µ –ø—Ä–∏–∫–ª–∞–¥ –ø–æ–∑–∏—Ç–∏–≤–Ω–æ–≥–æ —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç—É. –°–∞–º —Ç–µ–∫—Å—Ç –Ω–µ —î —Å–∏–ª—å–Ω–æ –µ–∫—Å–ø—Ä–µ—Å–∏–≤–Ω–∏–º, –∞–ª–µ –∞–≤—Ç–æ—Ä —è–≤–Ω–æ –¥–µ–º–æ–Ω—Å—Ç—Ä—É—î –µ–º–æ—Ü—ñ—é ‚Äú—Å—Ö–≤–∞–ª–µ–Ω–Ω—è‚Äù —á–æ–≥–æ—Å—å, —è–∫–∞ –Ω–∞–ª–µ–∂–∏—Ç—å –¥–æ –ø–æ–∑–∏—Ç–∏–≤–Ω–æ–≥–æ —Å–ø–µ–∫—Ç—Ä—É.
" :exclamation:–í –±—ñ–∫ –ö–∏—î–≤–∞ –ø—É—Å–∫–∏ —â–µ –¥–µ–∫—ñ–ª—å–∫–æ—Ö ‚Äò–ö–∏–Ω–¥–∂–∞–ª—ñ–≤‚Äô. –í–æ—Ä–æ–≥ –Ω–∞–º–∞–≥–∞—î—Ç—å—Å—è –ø—Ä–æ–±–∏—Ç–∏ –Ω–∞—à—ñ –ü–ü–û. –ü–æ–∫–∏ –≤—ñ–¥–±–∏–≤–∞—î–º–æ—Å—è, –∞–ª–µ —î –ø–∞–¥—ñ–Ω–Ω—è —É–ª–∞–º–∫—ñ–≤, —Ç–æ–∂ –ø–µ—Ä–µ–±—É–≤–∞—î–º–æ –≤ —É–∫—Ä–∏—Ç—Ç—è—Ö –∞–±–æ —Ö–æ—á–∞ –± –∑–∞ –ø–∞—Ä–æ—é —Å—Ç—ñ–Ω. " -> –¥–∞–Ω–∞ –Ω–æ–≤–∏–Ω–∞ —î –ø—Ä–∏–∫–ª–∞–¥–æ–º –Ω–µ–≥–∞—Ç–∏–≤–Ω–æ–≥–æ —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç—É. –ê–≤—Ç–æ—Ä –¥–µ–º–æ–Ω—Å—Ç—Ä—É—î —Å–≤–æ—î –≤—ñ–¥–Ω–æ—à–µ–Ω–Ω—è –¥–æ –ø–æ–¥—ñ—ó —á–µ—Ä–µ–∑ –≤–∏—Ä–∞–∑–∏ ‚Äú–í–æ—Ä–æ–≥ –Ω–∞–º–∞–≥–∞—î—Ç—å—Å—è –ø—Ä–æ–±–∏—Ç–∏ –Ω–∞—à—ñ –ü–ü–û‚Äù, ‚Äú–ü–æ–∫–∏ –≤—ñ–¥–±–∏–≤–∞—î–º–æ—Å—è, –∞–ª–µ‚Ä¶‚Äù.
" –° —á–µ–≥–æ —Ç—ã –≤–∑—è–ª? –£ –º–µ–Ω—è –≤ –ò—Ä–ø–µ–Ω–µ –≤—Å–µ –æ–∫–Ω–∞ –ø–æ–≤—ã–±–∏–≤–∞–ª–æ —è —Å–æ—Ö—Ä–∞–Ω–∏–ª –∫–≤–∏—Ç–∞–Ω—Ü–∏—é —Ç–æ —á—Ç–æ —Å–∞–º –ø–æ—Å—Ç–∞–≤–∏–ª –∏ –≤–µ—Ä–Ω—É–ª–∏ 20.000‚Äç " -> –ø—Ä–∏–∫–ª–∞–¥ ‚Äú–∑–º—ñ—à–∞–Ω–æ–≥–æ‚Äù —Å–µ–Ω—Ç–∏–º–µ–Ω—Ç—É. –£ –ø–µ—Ä—à—ñ–π —á–∞—Å—Ç–∏–Ω—ñ –∞–≤—Ç–æ—Ä –¥–µ–º–æ–Ω—Å—Ç—Ä—É—î –∫—Ä–∏—Ç–∏–∫—É –ø–æ –≤—ñ–¥–Ω–æ—à–µ–Ω–Ω—é –¥–æ —ñ–Ω—à–æ—ó –ª—é–¥–∏–Ω–∏. –£ –¥—Ä—É–≥—ñ–π —á–∞—Å—Ç–∏–Ω—ñ —Ç–µ–∫—Å—Ç—É - –∞–≤—Ç–æ—Ä —Ä–∞–¥—ñ—î, —â–æ –π–æ–º—É –∫–æ–º–ø–µ–Ω—Å–æ–≤–∞–Ω–æ –≤–∏—Ç—Ä–∞—Ç–∏ –Ω–∞ –≤—ñ–¥–Ω–æ–≤–ª–µ–Ω–Ω—è –¥–æ–º—ñ–≤–∫–∏.


–¢–≤–æ—è –≤—ñ–¥–ø–æ–≤—ñ–¥—å –º–∞—î –±—É—Ç–∏ –ª–∏—à–µ –æ–¥–Ω–∏–º —Å–ª–æ–≤–æ–º. –¶–ï –í–ê–ñ–õ–ò–í–û! –¢–∏ –º–∞—î—à –≤—ñ–¥–ø–æ–≤—ñ–¥–∞—Ç–∏ –≤–∏–∫–ª—é—á–Ω–æ –ª–∏—à–µ –æ–¥–Ω–∏–º —Å–ª–æ–≤–æ–º: –ø–æ–∑–∏—Ç–∏–≤–Ω–∏–π, –Ω–µ–≥–∞—Ç–∏–≤–Ω–∏–π, –Ω–µ–π—Ç—Ä–∞–ª—å–Ω–∏–π, –∑–º—ñ—à–∞–Ω–∏–π. 

'''

In [7]:
print(len(system_prompt)/4)

810.0


In [None]:
gpt_answers = []

for id in tqdm(df.index):
    gpt_answers.append(analyze_sentiment(df.loc[id, 'document_content']))

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12224/12224 [1:52:13<00:00,  1.82it/s]  


In [None]:
df['gpt_labels'] = gpt_answers

In [206]:
df.to_parquet('./data_provided/final_dataset/gpt_labeled_2_1.parquet')

In [207]:
df = pd.read_parquet('./data_provided/final_dataset/gpt_labeled_2_1.parquet')

In [208]:
df

Unnamed: 0,response_id,document_id,user_id,annotator_sentiment,is_ck_annotation,response_timestamp,document_content,annotation_date,username,unique_document_id,language_wc,document_length,gpt_labels
0,1,1,277133851,neutral,1,2025-03-09T23:23:07.220881,‚ö°Ô∏è–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞ –¥–µ–ª–µ–≥–∞—Ü—ñ—è –≤—ñ–¥–ø—Ä–∞–≤–∏–ª–∞—Å—è –Ω–∞ –ø–µ—Ä–µ–º–æ–≤...,2025-03-09,O,1_1,uk,67,neutral
1,3,2,1065283664,neutral,1,2025-03-09T23:44:28.262307,"–í–∏–±—É—Ö–∏ –Ω–∞ –û–¥–µ—â–∏–Ω—ñ, –ø–æ–ø–µ—Ä–µ–¥–Ω—å–æ ‚Äî –ü–ü–û.",2025-03-09,A,2_1,uk,36,negative
2,4,3,1065283664,negative,1,2025-03-09T23:45:00.503098,"–ê —á—Ç–æ –¥–µ–ª–∞—Ç—å —Ç–µ–º ,–∫—Ç–æ –ª–∏—à–∏–ª—Å—è —Å–≤–æ–µ–≥–æ –∂–∏–ª—å—è ,–ø–æ...",2025-03-09,A,3_1,ru,177,negative
3,5,4,1065283664,negative,1,2025-03-09T23:46:33.265766,–¢–æ–≥–¥–∞ —É—á–∏—Å—å –±—ã—Å—Ç—Ä–æ –±–µ–≥–∞—Ç—å. –î–ª—è –º–µ–Ω—è –≤–æ–ø—Ä–æ—Å —Å–ª–æ...,2025-03-09,A,4_1,ru,103,mixed
4,6,5,1065283664,neutral,1,2025-03-09T23:46:38.993496,–î–æ–±—Ä–∏–π –¥–µ–Ω—å,2025-03-09,A,5_1,uk,11,neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12219,13028,8948,467130971,negative,0,2025-04-04T08:02:37.362562,"–ö—Ä–∞—â–µ ""–ø–æ–≤–∏–Ω–Ω–∞ –±—É—Ç–∏ –∑—Ä—É—á–Ω—ñ—à–æ—é, –Ω—ñ–∂ Uber —á–∏ Boo...",2025-04-04,D,8948_0,uk,51,positive
12220,13029,2094,467130971,mixed,0,2025-04-04T08:03:35.792932,–£–≤–∞–≥–∞! –ó –¥–µ—è–∫–∏—Ö —ñ–Ω—Ç–µ—Ä–Ω–µ—Ç –¥–∂–µ—Ä–µ–ª —à–∏—Ä–∏—Ç—å—Å—è —ñ–Ω—Ñ–æ—Ä...,2025-04-04,D,2094_0,uk,402,neutral
12221,13030,5013,467130971,neutral,0,2025-04-04T08:03:42.008533,"–ü–∏—Ç–∞–Ω–Ω—è, —Ü–µ–π —Å–µ—Ä—Ç–∏—Ñ—ñ–∫–∞—Ç –º–æ–∂–Ω–∞ –≤–∂–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É–≤...",2025-04-04,D,5013_0,uk,113,neutral
12222,13031,4572,467130971,negative,0,2025-04-04T08:03:48.251166,–ù–∞ –í—É–≥–ª–µ–¥–∞—Ä—Å—å–∫–æ–º—É –Ω–∞–ø—Ä—è–º–∫—É –∑–∞–≥–∏–Ω—É–≤ –†–æ–º–∞ –Ü–≤–∞–Ω–µ–Ω...,2025-04-04,D,4572_0,uk,114,negative


In [209]:
df['gpt_labels'].value_counts()

gpt_labels
negative    5150
positive    3298
neutral     2378
mixed       1398
Name: count, dtype: int64

In [210]:
df['y'] = df['annotator_sentiment'].apply(lambda x: {'negative': -1, 'neutral': 0, 'positive': 1, 'mixed': 2}[x])

In [211]:
df['y_hat'] = df['gpt_labels'].apply(lambda x: {'negative': -1, 'neutral': 0, 'positive': 1, 'mixed': 2}[x])

In [212]:
df

Unnamed: 0,response_id,document_id,user_id,annotator_sentiment,is_ck_annotation,response_timestamp,document_content,annotation_date,username,unique_document_id,language_wc,document_length,gpt_labels,y,y_hat
0,1,1,277133851,neutral,1,2025-03-09T23:23:07.220881,‚ö°Ô∏è–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞ –¥–µ–ª–µ–≥–∞—Ü—ñ—è –≤—ñ–¥–ø—Ä–∞–≤–∏–ª–∞—Å—è –Ω–∞ –ø–µ—Ä–µ–º–æ–≤...,2025-03-09,O,1_1,uk,67,neutral,0,0
1,3,2,1065283664,neutral,1,2025-03-09T23:44:28.262307,"–í–∏–±—É—Ö–∏ –Ω–∞ –û–¥–µ—â–∏–Ω—ñ, –ø–æ–ø–µ—Ä–µ–¥–Ω—å–æ ‚Äî –ü–ü–û.",2025-03-09,A,2_1,uk,36,negative,0,-1
2,4,3,1065283664,negative,1,2025-03-09T23:45:00.503098,"–ê —á—Ç–æ –¥–µ–ª–∞—Ç—å —Ç–µ–º ,–∫—Ç–æ –ª–∏—à–∏–ª—Å—è —Å–≤–æ–µ–≥–æ –∂–∏–ª—å—è ,–ø–æ...",2025-03-09,A,3_1,ru,177,negative,-1,-1
3,5,4,1065283664,negative,1,2025-03-09T23:46:33.265766,–¢–æ–≥–¥–∞ —É—á–∏—Å—å –±—ã—Å—Ç—Ä–æ –±–µ–≥–∞—Ç—å. –î–ª—è –º–µ–Ω—è –≤–æ–ø—Ä–æ—Å —Å–ª–æ...,2025-03-09,A,4_1,ru,103,mixed,-1,2
4,6,5,1065283664,neutral,1,2025-03-09T23:46:38.993496,–î–æ–±—Ä–∏–π –¥–µ–Ω—å,2025-03-09,A,5_1,uk,11,neutral,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12219,13028,8948,467130971,negative,0,2025-04-04T08:02:37.362562,"–ö—Ä–∞—â–µ ""–ø–æ–≤–∏–Ω–Ω–∞ –±—É—Ç–∏ –∑—Ä—É—á–Ω—ñ—à–æ—é, –Ω—ñ–∂ Uber —á–∏ Boo...",2025-04-04,D,8948_0,uk,51,positive,-1,1
12220,13029,2094,467130971,mixed,0,2025-04-04T08:03:35.792932,–£–≤–∞–≥–∞! –ó –¥–µ—è–∫–∏—Ö —ñ–Ω—Ç–µ—Ä–Ω–µ—Ç –¥–∂–µ—Ä–µ–ª —à–∏—Ä–∏—Ç—å—Å—è —ñ–Ω—Ñ–æ—Ä...,2025-04-04,D,2094_0,uk,402,neutral,2,0
12221,13030,5013,467130971,neutral,0,2025-04-04T08:03:42.008533,"–ü–∏—Ç–∞–Ω–Ω—è, —Ü–µ–π —Å–µ—Ä—Ç–∏—Ñ—ñ–∫–∞—Ç –º–æ–∂–Ω–∞ –≤–∂–µ –≤–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É–≤...",2025-04-04,D,5013_0,uk,113,neutral,0,0
12222,13031,4572,467130971,negative,0,2025-04-04T08:03:48.251166,–ù–∞ –í—É–≥–ª–µ–¥–∞—Ä—Å—å–∫–æ–º—É –Ω–∞–ø—Ä—è–º–∫—É –∑–∞–≥–∏–Ω—É–≤ –†–æ–º–∞ –Ü–≤–∞–Ω–µ–Ω...,2025-04-04,D,4572_0,uk,114,negative,-1,-1


In [213]:
metrics = evaluate_gpt_results(df)

In [214]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.5722, 'f1_score': 0.5748}


By language

In [217]:
df.language_wc.unique()

array(['uk', 'ru'], dtype=object)

In [219]:
metrics = evaluate_gpt_results(df.loc[df.language_wc == 'uk'])

In [220]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.5765, 'f1_score': 0.5724}


In [221]:
metrics = evaluate_gpt_results(df.loc[df.language_wc == 'ru'])

In [222]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.5625, 'f1_score': 0.5835}


By sentiment

In [223]:
df.annotator_sentiment.unique()

array(['neutral', 'negative', 'positive', 'mixed'], dtype=object)

In [226]:
metrics = evaluate_gpt_results(df.loc[df.annotator_sentiment == 'neutral'])

In [227]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.386, 'f1_score': 0.557}


In [228]:
metrics = evaluate_gpt_results(df.loc[df.annotator_sentiment == 'negative'])

In [229]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.7058, 'f1_score': 0.8275}


In [230]:
metrics = evaluate_gpt_results(df.loc[df.annotator_sentiment == 'positive'])

In [231]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.7581, 'f1_score': 0.8624}


In [232]:
metrics = evaluate_gpt_results(df.loc[df.annotator_sentiment == 'mixed'])

In [233]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.2878, 'f1_score': 0.447}


# Conslusions

* English promtps work better
* Best results are achieved
* <xml> tags in prompts don't fully resolve problems with possible prompt injections 
* The best approach: few-shot examples, English prompt,  similar to guidelines for people