In [37]:
import numpy as np
import pandas as pd
import scienceplots
import seaborn as sns
import matplotlib.pyplot as plt

from itertools import product
from collections import Counter
from datasets import load_dataset
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

In [38]:
def metric_calculation(pred, gt):    
    acc=accuracy_score(gt, pred)
    f1=f1_score(gt, pred, average='macro')
    confusion=confusion_matrix(gt, pred)
    fpr=confusion[0,1]/len(gt) ## predict to be 1; actual 0
    fnr=confusion[1,0]/len(gt) ## predict to be 0; actual 1
    return acc, f1, fpr, fnr

In [39]:
def post_processing(pred):
    new_pred=[]
    for i in pred:
        i=i.lower()
        if 'response' in i:
            try: new_pred.append(i.split('response')[1].split()[1].replace('</s>', ''))
            except: new_pred.append(2)
        elif 'output' in i:
            try: new_pred.append(i.split('output')[1].split()[1].replace('</s>', ''))
            except: new_pred.append(2)
        else:
            try: new_pred.append(i.split()[0].replace('</s>', ''))
            except:new_pred.append(2)
    new_pred = np.array([int(float(i)) if i in ['0', '0.0', '1', '1.0'] else 2 for i in new_pred])
    return new_pred

### no values

In [40]:
ds = load_dataset("beanham/spatial_union_dataset")
test=ds['test']
gt=np.array(test['label'])
configs = [
    'zero_shot_no_heur',    
    'zero_shot_with_heur_hint_angle',
    'zero_shot_with_heur_hint_area',
    'zero_shot_with_heur_hint_angle_area',
    'few_shot_no_heur',    
    'few_shot_with_heur_hint_angle',
    'few_shot_with_heur_hint_area',
    'few_shot_with_heur_hint_angle_area',
]
new_names={
    'zero_shot_no_heur':'(plain)', 
    'zero_shot_with_heur_hint_angle': '(p)',
    'zero_shot_with_heur_hint_area': '(o)',
    'zero_shot_with_heur_hint_angle_area': '(p,o)',
    'few_shot_no_heur':'(plain)', 
    'few_shot_with_heur_hint_angle': '(p)',
    'few_shot_with_heur_hint_area': '(o)',
    'few_shot_with_heur_hint_angle_area': '(p,o)'    
}

In [42]:
results=[]
models=['llama3', 'mistral', '4o_mini', 'qwen_plus', '4o']
for model in models:
    print(f'Model: {model}...')
    for config in configs:
        pred=np.load(f'base/{model}/{model}_{config}.npy')
        pred=post_processing(pred)        
        metrics=metric_calculation(pred, gt)
        if 'zero_shot' in config:prompting='zero shot'
        else:prompting='few shot'
        results.append([new_names[config], prompting, model, metrics[0], metrics[1]])
results=pd.DataFrame(results, columns=['heuristics', 'prompting', 'model', 'acc', 'f1'])

Model: llama3...
Model: mistral...
Model: 4o_mini...
Model: qwen_plus...
Model: 4o...


In [43]:
results['heuristics'][results['heuristics']!='(plain)']='(hints)'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  results['heuristics'][results['heuristics']!='(plain)']='(hints)'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [44]:
results.groupby(['heuristics', 'model', 'prompting']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,f1
heuristics,model,prompting,Unnamed: 3_level_1,Unnamed: 4_level_1
(hints),4o,few shot,0.790309,0.753784
(hints),4o,zero shot,0.695071,0.684507
(hints),4o_mini,few shot,0.6132,0.607154
(hints),4o_mini,zero shot,0.493734,0.463465
(hints),llama3,few shot,0.382623,0.253068
(hints),llama3,zero shot,0.508772,0.389737
(hints),mistral,few shot,0.402673,0.226395
(hints),mistral,zero shot,0.558062,0.360336
(hints),qwen_plus,few shot,0.803676,0.78728
(hints),qwen_plus,zero shot,0.79198,0.768315


In [35]:
for model in models:
    sub=results[results.model==model]
    if model == 'llama3':
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.962, color='red', linestyle='-.', linewidth=2, label='Best Heuritic')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("Spatial Union Accuracy", fontsize=12)
        #plt.title(model, fontsize=14)
        plt.savefig(f'../../visualizations/union_task_no_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass
    elif model == '4o':
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        g=sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.962, color='red', linestyle='-.', linewidth=2, label='Best Heuritic (0.962)')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("")
        plt.yticks([])
        #plt.title(model, fontsize=14)
        plt.legend()
        plt.savefig(f'../../visualizations/union_task_no_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass        
    else:
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.962, color='red', linestyle='-.', linewidth=2, label='Best Heuritic')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("")
        plt.yticks([])
        #plt.title(model, fontsize=14)
        plt.savefig(f'../../visualizations/union_task_no_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass        

### with values

In [45]:
configs = [
    'zero_shot_with_heur_value_angle',
    'zero_shot_with_heur_value_area',
    'zero_shot_with_heur_value_angle_area',
    'few_shot_with_heur_value_angle',
    'few_shot_with_heur_value_area',
    'few_shot_with_heur_value_angle_area',
]
new_names={
    'zero_shot_with_heur_value_angle': '(p)',
    'zero_shot_with_heur_value_area': '(o)',
    'zero_shot_with_heur_value_angle_area': '(p,o)',
    'few_shot_with_heur_value_angle': '(p)',
    'few_shot_with_heur_value_area': '(o)',
    'few_shot_with_heur_value_angle_area': '(p,o)'    
}

In [46]:
results=[]
models=['llama3', 'mistral', '4o_mini', 'qwen_plus', '4o']
for model in models:
    print(f'Model: {model}...')
    for config in configs:
        pred=np.load(f'base/{model}/{model}_{config}.npy')
        pred=post_processing(pred)        
        metrics=metric_calculation(pred, gt)
        if 'zero_shot' in config:
            prompting='zero shot'
        else:
            prompting='few shot'
        results.append([new_names[config], prompting, model, metrics[0], metrics[1]])
results=pd.DataFrame(results, columns=['heuristics', 'prompting', 'model', 'acc', 'f1'])

Model: llama3...
Model: mistral...
Model: 4o_mini...
Model: qwen_plus...
Model: 4o...


In [48]:
results['heuristics'][results['heuristics']!='(plain)']='(values)'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  results['heuristics'][results['heuristics']!='(plain)']='(values)'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

In [49]:
results.groupby(['heuristics', 'model', 'prompting']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,f1
heuristics,model,prompting,Unnamed: 3_level_1,Unnamed: 4_level_1
(values),4o,few shot,0.863826,0.835804
(values),4o,zero shot,0.884712,0.861634
(values),4o_mini,few shot,0.818713,0.767507
(values),4o_mini,zero shot,0.772765,0.70351
(values),llama3,few shot,0.471178,0.324843
(values),llama3,zero shot,0.578947,0.418468
(values),mistral,few shot,0.680869,0.440159
(values),mistral,zero shot,0.616541,0.302994
(values),qwen_plus,few shot,0.90142,0.883764
(values),qwen_plus,zero shot,0.869674,0.841693


In [36]:
for model in models:
    sub=results[results.model==model]
    if model == 'llama3':
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]],legend=False)
        plt.axhline(y=0.962, color='red', linestyle='-.', linewidth=2, label='Best Heuritic (0.962)')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("Spatial Union Accuracy", fontsize=12)
        #plt.title(model, fontsize=14)
        plt.legend()
        plt.savefig(f'../../visualizations/union_task_with_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass
    elif model == '4o':
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        g=sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.962, color='red', linestyle='-.', linewidth=2, label='Best Heuritic (0.990)')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("")
        plt.yticks([])
        #plt.title(model, fontsize=14)
        plt.savefig(f'../../visualizations/union_task_with_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass
    else:
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.962, color='red', linestyle='-.', linewidth=2, label='Best Heuritic')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("")
        plt.yticks([])
        #plt.title(model, fontsize=14)
        plt.savefig(f'../../visualizations/union_task_with_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass        