# Baseline solution based on GPT-4 prompting

## Imports and data read

In [115]:
import pandas as pd
import numpy as np

from openai import OpenAI
from sklearn.metrics import accuracy_score, f1_score

In [72]:
with open('./data_provided/kyiv_digital_sentiment_annotation/kyiv_digital_sentiment_annotation_annotator_x.tsv','r', encoding='utf-8') as file_tsv:
    df=list(file_tsv)
    df=[e.replace('\n','').split('\t')[0:2] for e in df] #[1:652]

In [73]:
df = pd.DataFrame(df, columns=['content', 'label'])

In [74]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 564 entries, 0 to 563
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   content  564 non-null    object
 1   label    564 non-null    object
dtypes: object(2)
memory usage: 8.9+ KB


In [75]:
df.drop(0, inplace=True)

In [76]:
df.label.unique()

array(['Negative', 'Positive', 'Neutral'], dtype=object)

Examples of inconsistencies: 

![image.png](attachment:image.png)

![image.png](attachment:image.png)

## System prompt creation

In [59]:
system_prompt = '''

You are a Ukrainian professional social network analyst. You need to analyze peoples emotions and define the comments sentiment from the Ukrainian social media. The input you receive is a text from Telegram messenger. The text can be Ukrainian or Russian. 

Your output must be a single word from the following list: ['positive', 'neutral', 'negative']. 
Here are some examples of inputs to you and your outputs expected:

```
Input: '–ö–∏–µ–≤–µ –ø—Ä–æ–¥–æ–ª–∂–∞—Ç—å –∑–≤—É—á–∞—Ç—å –≤–∑—Ä—ã–≤—ã. –°–ú–ò —Å–æ–æ–±—â–∞—é—Ç, —á—Ç–æ –≤ —Ä—è–¥–µ —Ä–∞–π–æ–Ω–æ–≤ –ø—Ä–æ–ø–∞–ª —Å–≤–µ—Ç.'
Output: 'negative'

Input: '–ß–µ—Ä–µ–∑ –æ–±—Å—Ç—Ä—ñ–ª–∏ —É –ö–∏—î–≤—ñ –∑–Ω–µ—Å—Ç—Ä—É–º–ª–µ–Ω–æ –º–∞–π–∂–µ 260 —Ç–∏—Å—è—á —Å–ø–æ–∂–∏–≤–∞—á—ñ–≤, –≤—ñ–¥–∫–ª—é—á–∞–ª–æ—Å—è –æ–±–ª–∞–¥–Ω–∞–Ω–Ω—è –ø—ñ–¥—Å—Ç–∞–Ω—Ü—ñ–π —É –ö–∏—ó–≤—Å—å–∫—ñ–π –æ–±–ª–∞—Å—Ç—ñ, ‚Äî –ú—ñ–Ω–µ–Ω–µ—Ä–≥–æ   –£ –ö–∏—î–≤—ñ –ø–æ—à–∫–æ–¥–∂–µ–Ω–æ –ø–æ–≤—ñ—Ç—Ä—è–Ω—É –ª—ñ–Ω—ñ—é, –±–µ–∑ –Ω–∞–ø—Ä—É–≥–∏ –Ω–∏–∑–∫–∞ –ø—ñ–¥—Å—Ç–∞–Ω—Ü—ñ–π ‚Äì –∑–Ω–µ—Å—Ç—Ä—É–º–ª–µ–Ω—ñ 259 —Ç–∏—Å—è—á —Ç–æ—á–æ–∫ –æ–±–ª—ñ–∫—É.'
Output: 'negative'

Input: '–ü–æ–ø–µ—Ä–µ–¥–Ω—ñ–π –≤–∏—Å–Ω–æ–≤–æ–∫üßê  –ü–µ—Ä—à–µ. –£ –Ω–∞—Å –Ω–∞–π–∫—Ä–∞—â–µ –≤ —Å–≤—ñ—Ç—ñ –ü–ü–û! –ñ–æ–¥–Ω–∞ –∫—Ä–∞—ó–Ω–∞ –Ω–µ –≤–∏—Ç—Ä–∏–º–∞–ª–∞ –± —Ç–∞–∫–æ—ó –º–∞—Å–æ–≤–∞–Ω–æ—ó –∞—Ç–∞–∫–∏. –î–æ—Å–≤—ñ–¥ —ñ –Ω–∞—è–≤–Ω—ñ—Å—Ç—å –∑–∞—Å–æ–±—ñ–≤ –≤–±–µ—Ä—ñ–≥–∞—é—Ç—å 99% –Ω–∞—Å–µ–ª–µ–Ω–Ω—è  –î—Ä—É–≥–µ. –†—É—Å–Ω—è –ø—Ä–æ—Å—Ç–æ –≤—á–µ—Ä–≥–æ–≤–µ —Ö–æ—á–µ –Ω–∞—Å –∑–∞–ª—è–∫–∞—Ç–∏, –∞–ª–µ –º–∏–Ω—É–ª–æ—ó –∑–∏–º–∏ —ñ –≤ —Ç—Ä–∞–≤–Ω—ñ –º–∏ —á—ñ—Ç–∫–æ –≤–∏–≤—á–∏–ª–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∏ –±–µ–∑–ø–µ–∫–∏ —ñ —Ö—î—Ä –∫–ª–∞–¥–µ–º —Ä—É—Å–∞–∫–∞–º –Ω–∞ –≤–æ—Ä–æ—Ç–Ω—ñ–∫  –¢—Ä–µ—Ç—î. –ù–µ –±—ñ–π—Ç–µ—Å—è, —Å–∫–æ—Ä–æ —ñ –≤–æ–¥–∞ —ñ –µ–ª–µ–∫—Ç—Ä–∏–∫–∞ –ø–æ–≤–µ—Ä–Ω—É—Ç—å—Å—è, –∞ –≤–æ–ª—è –Ω–∞—à–∞ –Ω–µ –∑–ª–∞–º–∞—î—Ç—å—Å—è!  –Ü–Ω—Ñ–æ—Ä–º—É—î –ö–∏—ó–≤ | –ù–∞–¥—ñ—Å–ª–∞—Ç–∏ –Ω–æ–≤–∏–Ω—É'
Output: 'positive'

Input: '–ì–æ–ª–æ—Å–µ–µ–≤—Å–∫–∏–π —Ä-–Ω –µ—Å—Ç—å —Å–≤–µ—Ç üòâ‚úåüèª'
Output: 'positive'

Input: '–ü—ñ–¥–∫–∞–∂—ñ—Ç—å, –±—É–¥—å –ª–∞—Å–∫–∞, —á–∏ –±—É–≤–∞—é—Ç—å –Ω–∞ –í–∏–¥—É–±–∏—á–∞—Ö –Ω–∞ –ø–ª–∞—Ç—Ñ–æ—Ä–º–∞—Ö –º—ñ—Å—å–∫–æ—ó —ñ –ø—Ä–∏–º—ñ—Å—å–∫–∏—Ö –µ–ª–µ–∫—Ç—Ä–∏—á–æ–∫?'
Output: 'neutral'
```

Remember that the 'sentiment' values can be only and only from the following list: ['positive', 'neutral', 'negative']. 

'''

In [60]:
print(len(system_prompt)/4)

391.25


## Pipeline

https://platform.openai.com/docs/api-reference/authentication

In [None]:
api_key = 'sk-proj-7yyg4KC696T7WK6CmLeWT3BlbkFJfjiC6bs5r8R7yoGLCIUP'

In [96]:
client = OpenAI(api_key=api_key)

In [100]:
def analyze_sentiment(comment):
    """
    Sends a request to OpenAI's GPT model to analyze sentiment.
    """
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": comment}
        ]
    )   
    return response.choices[0].message.content

In [101]:
example_comment = '–£ –¥–µ—è–∫–∏—Ö —Ä–∞–π–æ–Ω–∞—Ö –ö–∏—î–≤–∞ –∑–Ω–∏–∫–ª–æ —Å–≤—ñ—Ç–ª–æ.'

In [102]:
sentiment_result = analyze_sentiment(example_comment)

In [103]:
print("Sentiment Analysis Result:", sentiment_result)

Sentiment Analysis Result: 'negative'


In [104]:
gpt_answers = []

for id in df.index:
    gpt_answers.append(analyze_sentiment(df.loc[id, 'content']))

In [105]:
df['gpt_labels'] = gpt_answers

In [107]:
df['gpt_labels'].value_counts()

gpt_labels
'negative'    253
'positive'    128
'neutral'     123
negative       44
neutral         8
positive        7
Name: count, dtype: int64

In [111]:
df.loc[df['gpt_labels'] == "'negative'", 'gpt_labels'] = 'negative'
df.loc[df['gpt_labels'] == "'positive'", 'gpt_labels'] = 'positive'
df.loc[df['gpt_labels'] == "'neutral'", 'gpt_labels'] = 'neutral'

In [112]:
df['gpt_labels'].value_counts()

gpt_labels
negative    297
positive    135
neutral     131
Name: count, dtype: int64

In [113]:
df['label'].value_counts()

label
Negative    323
Positive    169
Neutral      71
Name: count, dtype: int64

In [None]:
df['y'] = df['label'].apply(lambda x: {'Negative': -1, 'Neutral': 0, 'Positive': 1}[x])

In [None]:
df['y_hat'] = df['gpt_labels'].apply(lambda x: {'negative': -1, 'neutral': 0, 'positive': 1}[x])

In [None]:
df

Unnamed: 0,content,label,gpt_labels,y,y_hat
1,–®–æ –ø–æ –≤—ñ–¥–∫–ª—é—á–µ–Ω—å –µ–Ω–µ—Ä–≥—ñ—ó –≤ –ö–∏—î–≤—ñ,Negative,neutral,-1,0
2,–ó–±—É–¥—É–≤–∞–ª–∏ —Ñ—ñ–∑–∏—á–Ω–∏–π –∑–∞—Ö–∏—Å—Ç –¥–ª—è –µ–Ω–µ—Ä–≥–æ–æ–± º—î–∫—Ç—ñ–≤ –ö...,Positive,positive,1,1
3,"–¢–∞–∫ —è –∂ —Ç–µ–±–µ –∑–∞–¥–∞–ª –≤–æ–ø—Ä–æ—Å. –ö–∏–µ–≤, –º–∞–π, –ø–µ—Ä–≤–æ–µ –ø...",Positive,positive,1,1
4,–ê–≤–∞—Ä—ñ—ó,Negative,negative,-1,-1
5,"–£ –≤–∞—Å –¥–∞–≤–Ω–æ –≤–∂–µ —â–æ—Å—å –≤—ñ–¥–ø–∞–¥–∞—î,–∞ –≤–∏ —â–µ –¥–æ —Ü–∏—Ö –ø...",Negative,negative,-1,-1
...,...,...,...,...,...
559,‚ÄºÔ∏è–ö–∏—ó–≤. –ó–∞–ø—Ä–æ—à—É—î–º–æ –¥–æ —Å–ø—ñ–≤–ø—Ä–∞—Ü—ñ —É –≤–µ–ª–∏–∫—É –∫–æ–º–ø–∞...,Positive,positive,1,1
560,–Ø –Ω–µ —è —ñ —Ö–∞—Ç–∞ –º–µ—Ç—Ä–æ –Ω–µ –º–æ—î. –î–µ—Å—å —Ç–∞–∫–µ –∑–∞–≥–∞–ª—å–Ω–µ...,Negative,negative,-1,-1
561,–£ –º–µ–Ω—è —Å–æ—Å–µ–¥–∞ –≤ –Ω–∞—á–∞–ª–µ —è–Ω–≤–∞—Ä—è –ø—Ä–∏–Ω—è–ª–∏ –Ω–∞ –¢—Ä–æ–µ—â...,Neutral,neutral,0,0
562,–û–±“ë—Ä—É–Ω—Ç–æ–≤–∞–Ω–∏–π —Ç–∞—Ä–∏—Ñ –Ω–∞ –ø—Ä–æ—ó–∑–¥ —É –ö–∏—ó–≤—Å—å–∫–æ–º—É –º–µ—Ç...,Negative,negative,-1,-1


## Metrics

In [136]:
def evaluate_gpt_results(df):
    """
    Computes accuracy and F1-score for GPT sentiment analysis results.
    
    Parameters:
    df (pd.DataFrame): A DataFrame containing 'true_label' and 'gpt_label'.
    
    Returns:
    dict: A dictionary with accuracy and F1-score.
    """
    accuracy = accuracy_score(df['y'], df['y_hat'])
    f1 = f1_score(df['y'], df['y_hat'], average='weighted')
    
    return {"accuracy": round(accuracy, 4), "f1_score": round(f1, 4)}

In [137]:
metrics = evaluate_gpt_results(df)

In [138]:
print("Evaluation Metrics:", metrics)

Evaluation Metrics: {'accuracy': 0.7798, 'f1_score': 0.7985}
