In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

## Andmete vormistamine

In [2]:
# loeme sisse manuaalse märgendamise tulemused
df_minu = pd.read_csv('minu_tulemused.csv')

df_minu = df_minu.drop(columns=['Unnamed: 0'])
# selleks, et manuaalse ja GPT märgenduse veerud kattuksid
df_minu = df_minu.rename(columns={
    "ese": "belongings",
    "riide_ese": "clothing_items",
    "jook_toit": "edibles",
    "sisse_minna": "enclosures_and_vehicles",
    "geo_lok": "geographical_locations",
    "asutus": "institutions",
    "-": "nonsense_words",
    "org_ür_fir": "organisations_and_events",  
    "isikunimi": "persons",
    "peal_seista": "platforms_and_floors",
    "suund": "spatial_directions",
    "piirkond": "spatial_region",
    "ajaperiood": "time_expressions",
    "taim": "vegetative_enclosure"
})
df_minu['-'] = df_minu['nonsense_words']

df_GPT35 = pd.read_csv('GPT3_5_tulemused.csv')
df_GPT35['nonsense_words'] = df_GPT35['nonsense_words'].apply(lambda x: 1 if x == 0 else 0 if x == 1 else x)
df_GPT4 = pd.read_csv('GPT4_0_tulemused.csv')
df_GPT4['nonsense_words'] = df_GPT4['nonsense_words'].apply(lambda x: 1 if x == 0 else 0 if x == 1 else x)

## Reeglipõhine märgendamine

In [3]:
# reeglistik, mis ennustab, kas sõna on koht või mitte alamklasside põhjal
def kohaMääramine(df_gpt):
    df_gpt['0'] = 0
    df_gpt['1'] = 0
    df_gpt['?'] = 0
    df_gpt['-'] = 0

    koha_veerud = [
        "belongings", "enclosures_and_vehicles", "geographical_locations",
        "platforms_and_floors", "spatial_directions", "spatial_region",
        "vegetative_enclosure"
    ]
    mitte_koha_veerud = [
        "clothing_items", "edibles", "persons",
        "time_expressions", "organisations_and_events"
    ]

    for index, row in df_gpt.iterrows():
        if row['nonsense_words'] == 1 and row['geographical_locations'] == 0 and row['persons'] == 0:
            df_gpt.loc[index, '-'] = 1
            continue
        if row['institutions'] == 1 and not row['organisations_and_events']:
            df_gpt.loc[index, '?'] = 1
            continue
        if row['clothing_items'] == 1:
            df_gpt.loc[index, '0'] = 1
            continue    
        if row['geographical_locations'] == 1:
            df_gpt.loc[index, '1'] = 1
            continue 
            
        onKoht = any(row[veerg] == 1 for veerg in koha_veerud)
        eiOleKoht = any(row[veerg] == 1 for veerg in mitte_koha_veerud)

        if not onKoht: # kui ei ole ühtegi 'koha veeergu' märgitud
            df_gpt.loc[index, '0'] = 1
        elif not eiOleKoht: # # kui ei ole ühtegi 'mitte koha veeergu' märgitud
            df_gpt.loc[index, '1'] = 1
        # kui on segu koha ja mitte koha veergudest märgitud    
        else:
            df_gpt.loc[index, '0'] = 1


In [4]:
#funktsioon, mis võrdleb ennustust manuaalse märgendusega
def kontrolliKoht(df_gpt, df_minu):
    y_true = []
    y_pred = []

    for _, row in df_minu.iterrows():
        kontrollitav_lemma = row["lemma"]
        koha_veerud = ['0', '1', '-', '?']
        õige_veerg = row[koha_veerud].idxmax()

        y_true.append(õige_veerg)

        leitud_rida = df_gpt[df_gpt['lemma'] == kontrollitav_lemma]
        if not leitud_rida.empty:
            ennustatud_veerg = leitud_rida[koha_veerud].iloc[0].idxmax()
        else:
            ennustatud_veerg = None

        y_pred.append(ennustatud_veerg)

    return y_true, y_pred

In [5]:
# määrame GPT tulemuste põhjal kas sõnad on kohad või mitte
kohaMääramine(df_GPT35)
kohaMääramine(df_GPT4)


In [6]:
# Õigsuse, täpsuse, saagise ja f1-skoori arvutamine
y_true, y_pred_35 = kontrolliKoht(df_GPT35, df_minu)
_, y_pred_4 = kontrolliKoht(df_GPT4, df_minu)

def print_metrics(name, y_true, y_pred):
    y_true_filtered = [t for t, p in zip(y_true, y_pred) if p is not None]
    y_pred_filtered = [p for p in y_pred if p is not None]

    print(f"\nModel: {name}")
    print("Õigsus", accuracy_score(y_true_filtered, y_pred_filtered))
    print("Täpsus:", precision_score(y_true_filtered, y_pred_filtered, average='macro'))
    print("Saagis:", recall_score(y_true_filtered, y_pred_filtered, average='macro'))
    print("F1-skoor:", f1_score(y_true_filtered, y_pred_filtered, average='macro'))

print_metrics("GPT-3.5", y_true, y_pred_35)
print_metrics("GPT-4", y_true, y_pred_4)


Model: GPT-3.5
Õigsus 0.577962577962578
Täpsus: 0.4365862322306088
Saagis: 0.4567116377822499
F1-skoor: 0.4414283961957885

Model: GPT-4
Õigsus 0.6403326403326404
Täpsus: 0.47671408624378925
Saagis: 0.49365894244609004
F1-skoor: 0.4745275549263279


## RandomForestClassifier

In [7]:
label_cols = ['0', '1', '-', '?']
feature_cols = [col for col in df_minu.columns if col not in ['lemma'] + label_cols]

In [8]:
def lisaKoht(df_gpt, df_minu):
    koha_veerud = ['0', '1', '?', '-']

    for veerg in koha_veerud:
        df_gpt[veerg] = 0

    for i, row in df_gpt.iterrows():
        lemma = row['lemma']
        minu_rida = df_minu[df_minu['lemma'] == lemma]

        if not minu_rida.empty:
            for veerg in koha_veerud:
                df_gpt.at[i, veerg] = minu_rida.iloc[0][veerg]


In [9]:
# GPT 35
df_GPT35 = pd.read_csv('GPT3_5_tulemused.csv')
df_GPT35['nonsense_words'] = df_GPT35['nonsense_words'].apply(lambda x: 1 if x == 0 else 0 if x == 1 else x)
df_GPT35 = df_GPT35.dropna()
lisaKoht(df_GPT35, df_minu)

In [10]:
# GPT 4
df_GPT4 = pd.read_csv('GPT4_0_tulemused.csv')
df_GPT4['nonsense_words'] = df_GPT4['nonsense_words'].apply(lambda x: 1 if x == 0 else 0 if x == 1 else x)
df_GPT4 = df_GPT4.dropna()
lisaKoht(df_GPT4, df_minu)


In [11]:
X_train = df_minu[feature_cols]
y_train = df_minu[label_cols].idxmax(axis=1)  # märgendus on kõige kõrgema väärtusega lahter (1)

clf = RandomForestClassifier() # mudeli loomine
clf.fit(X_train, y_train) # mudeli treenimine


In [13]:
# mudeli testimine
def evaluate_on_test_set(df_test, name):
    X_test = df_test[feature_cols]
    y_true = df_test[label_cols].idxmax(axis=1)
    y_pred = clf.predict(X_test)

    print(f"\nModel: {name}")
    print("Õigsus:", accuracy_score(y_true, y_pred))
    print("Täpsus:", precision_score(y_true, y_pred, average='macro'))
    print("Saagis:", recall_score(y_true, y_pred, average='macro'))
    print("F1-skoor:", f1_score(y_true, y_pred, average='macro'))

evaluate_on_test_set(df_GPT35, "GPT-3.5")
evaluate_on_test_set(df_GPT4, "GPT-4")


Model: GPT-3.5
Õigsus: 0.578125
Täpsus: 0.47626925756033234
Saagis: 0.5192518429966129
F1-skoor: 0.4427171555364794

Model: GPT-4
Õigsus: 0.6673640167364017
Täpsus: 0.49023076211616723
Saagis: 0.5388888888888889
F1-skoor: 0.4920444433283495
