In [1]:
import pandas as pd
from llms.OpenAIAssistantChatbot import OpenAIAssistantChatbot
import json

In [2]:
sample_df = pd.read_csv('../data/sample/train_sample_unlabeled.csv', sep=';')

In [None]:
# 3 Runs
run_titles = ['qwen2_0', 'qwen2_1', 'qwen2_2']

# Do Runs
for run_title in run_titles:
    # Add Columns
    sample_df[run_title+'_label'] = None
    sample_df[run_title+'_reason'] = None
    # Do Run
    for index, row in sample_df.iterrows():
        print(f"({index}/{len(sample_df)})")
        d = str(row['description']).replace('\n', ' ')
        message = f"Does it significantly help me achieve my goal -'{row['goal']}'- if I perform the volunteering activity titled '{str(row['title']).strip()}' with the following description: '{d}'?"
        # send message to Local Model
        response = OpenAIAssistantChatbot.ask_local_model(message)
        # get JSON Object
        try:
            json_response = json.loads(response.content[response.content.rfind('{') : response.content.rfind('}') + 1])
            # Save
            sample_df.at[index, run_title+'_label'] = json_response['label']
            sample_df.at[index, run_title+'_reason'] = json_response['reason']
        except Exception as e:
            print(f"Error index: {index}")
            print(f"Error: {response.content[response.content.rfind('{') : response.content.rfind('}') + 1]}")     
            print(f"Error: {e}")

In [None]:
# Index 124 JSON Parsing ERROR Correction
for run_title in run_titles:
    sample_df.at[124, run_title+'_label'] = 0
    sample_df.at[124, run_title+'_reason'] = """The description of the 'Long-term volunteering project in EA World-Our Home' focuses on developing professional skills, leadership, team building, communication, intercultural dialogue, and cooperation. While these activities can be beneficial for personal growth and skill development, they do not specifically emphasize taking risks as a primary goal. The main objectives are more about learning new skills and contributing to the community rather than actively seeking out risky situations."""

In [None]:
# check if model is consistent
def get_consense(row, columns):
    # check if all cols hava same value
    label_set = set(row[col] for col in columns)
    return len(label_set) == 1

# get final decision of labels
def get_final_decision(row, columns):
    # check all cols for 0 and 1 and decide for final label (most agreed)
    labels = [row[col] for col in columns]
    return 1 if labels.count(0) < labels.count(1) else 0

In [None]:
# check for consent
sample_df['consense'] = sample_df.apply(lambda row: get_consense(row, [run_title+'_label' for run_title in run_titles]), axis=1)
sample_df['final_label'] = sample_df.apply(lambda row: get_final_decision(row, [run_title+'_label' for run_title in run_titles]), axis=1)

In [None]:
# save to csv
sample_df.to_csv('../data/labeled_data/qwen_labeled_train_sample.csv', sep=';', index=False)