In [2]:
import numpy as np
import re
import json
import pandas as pd

In [3]:
model = "text-babbage-001"

labels = pd.read_csv("results.txt", delimiter = ",").loc[:,["pairid", " varA","varB","dataset","groundtruth"]]
labels['pairid'] = labels['pairid'].str.extract('(\d{4})').astype(int) # convert pairid to int
labels["groundtruth"] = labels["groundtruth"].str.replace(r'\s+', '', regex=True) # remove white space in labels

weights = pd.read_csv("pairmeta.txt", delimiter = " ", header = None).iloc[:,[0,5]]
weights.rename(columns = {0:"pairid", 5:"weight"}, inplace=True)
weights['pairid'] = weights['pairid'].astype(int) # convert pairid to int

merged_df = labels.merge(weights, on='pairid')
merged_df["AtoB"] = ""
merged_df["BtoA"] = ""

with open(model + "_response.json", "r") as file:
    loaded_data = json.load(file)
    
merged_df.head()

Unnamed: 0,pairid,varA,varB,dataset,groundtruth,weight,AtoB,BtoA
0,1,Altitude,Temperature,DWD,->,0.166,,
1,2,Altitude,Precipitation,DWD,->,0.166,,
2,3,Longitude,Temperature,DWD,->,0.167,,
3,4,Altitude,Sunshine hours,DWD,->,0.166,,
4,5,Age,Length,Abalone,->,0.143,,


In [6]:
for i in range(108):
    print(loaded_data[i])



In [32]:
def add_prediction(merged_df, loaded_data, model):
    merged_df = merged_df.set_index('pairid')
    for response in loaded_data:
        pairid, resid, answer = extract_answers(response)
        print(pairid,resid, answer)
        col = "AtoB" if resid == "1" else "BtoA"
        merged_df.loc[int(pairid),col] = "Yes" if answer[:3] == "Yes" else "No" if answer == "" else ">>>>??"
        print("Converted:", merged_df.loc[int(pairid),col])
    return merged_df


def extract_answers(s):
    # Compile regex patterns
    
    pairid_pattern = re.compile(r"\{'pair0(\d{3}),_res(\d)")
    
    # Search for pairid
    pairid_match = pairid_pattern.search(str(s))
    pairid = pairid_match.group(1) if pairid_match else None
    resid = pairid_match.group(2) if pairid_match else None
    
    # Search for answer
    ans = s["pair0"+ str(pairid) + ',_res' + str(resid)]["choices"][0]["text"]
    
    return pairid, resid, ans[2:]

In [33]:
results = add_prediction(merged_df, loaded_data, model)

001 1 Yes, the altitude can
Converted: Yes
001 2 
Converted: No
002 1 Yes, the altitude can
Converted: Yes
002 2 Yes, precipitation can cause
Converted: Yes
003 1 
Converted: No
003 2 
Converted: No
004 1 Yes, the altitude can
Converted: Yes
004 2 Yes, changing sunlight hours
Converted: Yes
005 1 Yes, abalone can
Converted: Yes
005 2 
Converted: No
006 1 Yes, abalone shells
Converted: Yes
006 2 
Converted: No
007 1 Yes, abalone at
Converted: Yes
007 2 
Converted: No
008 1 Yes, abalone at
Converted: Yes
008 2 
Converted: No
009 1 
Converted: No
009 2 
Converted: No
010 1 Yes, the shucked
Converted: Yes
010 2 Yes, a change in
Converted: Yes
011 1 Yes, abalone changes
Converted: Yes
011 2 Yes, a change in
Converted: Yes
012 1 
Converted: No
012 2 Yes, if the wage
Converted: Yes
013 1 Yes, displacement of a
Converted: Yes
013 2 Yes
Converted: Yes
014 1 Yes, the horse power
Converted: Yes
014 2 Yes, a change in
Converted: Yes
015 1 Yes, the weight of
Converted: Yes
015 2 Yes, a change in
Co

In [30]:
results

Unnamed: 0_level_0,varA,varB,dataset,groundtruth,weight,AtoB,BtoA
pairid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Altitude,Temperature,DWD,->,0.166,Yes,No
2,Altitude,Precipitation,DWD,->,0.166,Yes,Yes
3,Longitude,Temperature,DWD,->,0.167,No,No
4,Altitude,Sunshine hours,DWD,->,0.166,Yes,Yes
5,Age,Length,Abalone,->,0.143,Yes,No
...,...,...,...,...,...,...,...
104,time for passing 1. segment,time for passing 2. segment,D. Janzing,->,0.200,Yes,Yes
105,pixel vector of a patch,total brightness at the screen,D. Janzing,->,0.500,Yes,Yes
106,time required for one round,voltage,D. Janzing,<-,1.000,Yes,Yes
107,strength of contrast,answer correct or not,"Schuett, edited by D. Janzing",->,1.000,Yes,Yes


In [31]:
results["gt_AtoB"] = np.where(results["groundtruth"] == "->", "Yes", "No")
results["gt_BtoA"] = np.where(results["groundtruth"] == "->", "No", "Yes")
results["correct_AtoB"] = np.where(results["gt_AtoB"] == results["AtoB"], 1, 0)
results["correct_BtoA"] = np.where(results["gt_BtoA"] == results["BtoA"], 1, 0)
wtd_acc = (sum(results["correct_AtoB"]*results["weight"]) + sum(results["correct_BtoA"]*results["weight"])) / (results["weight"].sum()*2)
acc = (sum(results["correct_AtoB"]) + sum(results["correct_BtoA"])) / 108 / 2
print("acc:", acc, "weighted acc:", wtd_acc)

acc: 0.49537037037037035 weighted acc: 0.47652437695363087


In [17]:
results["correct"] = np.where((results["BtoA"] == results["gt_BtoA"]) & (results["gt_AtoB"] == results["AtoB"]), 1, 0)
wtd_acc = (sum(results["correct"]*results["weight"])) / (results["weight"].sum())
acc = (sum(results["correct"])) / 108
print("if we count both correct as correct: ")
print("acc:", acc, "weighted acc:", wtd_acc)

if we count both correct as correct: 
acc: 0.1111111111111111 weighted acc: 0.08087614974139633


In [9]:
results

Unnamed: 0_level_0,varA,varB,dataset,groundtruth,weight,AtoB,BtoA,gt_AtoB,gt_BtoA,correct_AtoB,correct_BtoA,correct
pairid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Altitude,Temperature,DWD,->,0.166,Yes,Yes,Yes,No,1,0,0
2,Altitude,Precipitation,DWD,->,0.166,Yes,Yes,Yes,No,1,0,0
3,Longitude,Temperature,DWD,->,0.167,Yes,Yes,Yes,No,1,0,0
4,Altitude,Sunshine hours,DWD,->,0.166,Yes,Yes,Yes,No,1,0,0
5,Age,Length,Abalone,->,0.143,Yes,Yes,Yes,No,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
104,time for passing 1. segment,time for passing 2. segment,D. Janzing,->,0.200,Yes,Yes,Yes,No,1,0,0
105,pixel vector of a patch,total brightness at the screen,D. Janzing,->,0.500,Yes,Yes,Yes,No,1,0,0
106,time required for one round,voltage,D. Janzing,<-,1.000,Yes,Yes,No,Yes,0,1,0
107,strength of contrast,answer correct or not,"Schuett, edited by D. Janzing",->,1.000,Yes,Yes,Yes,No,1,0,0
