In [1]:
import numpy as np
import pandas as pd
import scienceplots
import seaborn as sns
import matplotlib.pyplot as plt

from itertools import product
from collections import Counter
from datasets import load_dataset
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

In [2]:
def metric_calculation(pred, gt):    
    acc=accuracy_score(gt, pred)
    f1=f1_score(gt, pred, average='macro')
    confusion=confusion_matrix(gt, pred)
    fpr=confusion[0,1]/len(gt) ## predict to be 1; actual 0
    fnr=confusion[1,0]/len(gt) ## predict to be 0; actual 1
    return acc, f1, fpr, fnr
    
def post_processing(pred):
    new_pred=[]
    for i in pred:
        i=i.lower()
        if 'response' in i:
            try: new_pred.append(i.split('response')[1].split()[1].replace('</s>', ''))
            except: new_pred.append(2)
        elif 'output' in i:
            try: new_pred.append(i.split('output')[1].split()[1].replace('</s>', ''))
            except: new_pred.append(2)
        else:
            try: new_pred.append(i.split()[0].replace('</s>', ''))
            except:new_pred.append(2)
    new_pred = np.array([int(float(i)) if i in ['0', '0.0', '1', '1.0'] else 2 for i in new_pred])
    return new_pred

## no values

In [36]:
ds = load_dataset("beanham/spatial_join_dataset")
test=ds['test']
gt=np.array(test['label'])
configs = [
    'zero_shot_no_heur',    
    'zero_shot_with_heur_hint_angle',
    'zero_shot_with_heur_hint_distance',
    'zero_shot_with_heur_hint_area',
    'zero_shot_with_heur_hint_angle_distance',
    'zero_shot_with_heur_hint_angle_area',
    'zero_shot_with_heur_hint_distance_area',
    'zero_shot_with_heur_hint_all',    
    'few_shot_no_heur',    
    'few_shot_with_heur_hint_angle',
    'few_shot_with_heur_hint_distance',
    'few_shot_with_heur_hint_area',
    'few_shot_with_heur_hint_angle_distance',
    'few_shot_with_heur_hint_angle_area',
    'few_shot_with_heur_hint_distance_area',
    'few_shot_with_heur_hint_all'
]
new_names={
    'zero_shot_no_heur':'(plain)', 
    'zero_shot_with_heur_hint_angle': '(p)',
    'zero_shot_with_heur_hint_distance': '(c)',
    'zero_shot_with_heur_hint_area': '(o)',
    'zero_shot_with_heur_hint_angle_distance': '(p,c)',
    'zero_shot_with_heur_hint_angle_area': '(p,o)',
    'zero_shot_with_heur_hint_distance_area': '(c,o)',
    'zero_shot_with_heur_hint_all': '(p,c,o)',
    'few_shot_no_heur':'(plain)', 
    'few_shot_with_heur_hint_angle': '(p)',
    'few_shot_with_heur_hint_distance': '(c)',
    'few_shot_with_heur_hint_area': '(o)',
    'few_shot_with_heur_hint_angle_distance': '(p,c)',
    'few_shot_with_heur_hint_angle_area': '(p,o)',
    'few_shot_with_heur_hint_distance_area': '(c,o)',
    'few_shot_with_heur_hint_all': '(p,c,o)',
}
## evaluate on a subset
np.random.seed(100)
index=np.random.randint(0, 3069, 1000)

In [37]:
results=[]
models=['llama3', 'mistral', '4o_mini', 'qwen_plus', '4o']
for model in models:
    print(f'Model: {model}...')
    for config in configs:
        pred=np.load(f'base/{model}/{model}_{config}.npy')
        pred=post_processing(pred)        
        if len(pred)==1000:metrics=metric_calculation(pred, gt)
        else:metrics=metric_calculation(pred[index], gt)
        if 'zero_shot' in config:prompting='zero shot'
        else:prompting='few shot'
        results.append([new_names[config], prompting, model, metrics[0], metrics[1]])
results=pd.DataFrame(results, columns=['heuristics', 'prompting', 'model', 'acc', 'f1'])

Model: llama3...
Model: mistral...
Model: 4o_mini...
Model: qwen_plus...
Model: 4o...


In [38]:
results['heuristics'][results['heuristics']!='(plain)']='(hints)'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  results['heuristics'][results['heuristics']!='(plain)']='(hints)'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [39]:
results.groupby(['heuristics', 'model', 'prompting']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,f1
heuristics,model,prompting,Unnamed: 3_level_1,Unnamed: 4_level_1
(hints),4o,few shot,0.585429,0.515479
(hints),4o,zero shot,0.445286,0.399611
(hints),4o_mini,few shot,0.562143,0.502145
(hints),4o_mini,zero shot,0.494143,0.436149
(hints),llama3,few shot,0.491571,0.333741
(hints),llama3,zero shot,0.522286,0.344881
(hints),mistral,few shot,0.601286,0.359539
(hints),mistral,zero shot,0.599286,0.250481
(hints),qwen_plus,few shot,0.661429,0.628682
(hints),qwen_plus,zero shot,0.648571,0.502431


In [123]:
for model in models:
    sub=results[results.model==model]
    if model == 'llama3':
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.990, color='red', linestyle='-.', linewidth=2, label='Best Heuritic')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("Spatial Join Accuracy", fontsize=12)
        plt.title(model, fontsize=14)
        plt.savefig(f'../../visualizations/join_task_no_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass
    elif model == '4o':
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        g=sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]])
        plt.axhline(y=0.990, color='red', linestyle='-.', linewidth=2, label='Best Heuritic (0.990)')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("")
        plt.yticks([])
        plt.title(model, fontsize=14)
        plt.legend()
        plt.savefig(f'../../visualizations/join_task_no_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass
    else:
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.990, color='red', linestyle='-.', linewidth=2, label='Best Heuritic')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("")
        plt.yticks([])
        plt.title(model, fontsize=14)
        plt.savefig(f'../../visualizations/join_task_no_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass        

### With Values

In [43]:
configs = [
    'zero_shot_with_heur_value_angle',
    'zero_shot_with_heur_value_distance',
    'zero_shot_with_heur_value_area',
    'zero_shot_with_heur_value_angle_distance',
    'zero_shot_with_heur_value_angle_area',
    'zero_shot_with_heur_value_distance_area',
    'zero_shot_with_heur_value_all',
    'few_shot_with_heur_value_angle',
    'few_shot_with_heur_value_distance',
    'few_shot_with_heur_value_area',
    'few_shot_with_heur_value_angle_distance',
    'few_shot_with_heur_value_angle_area',
    'few_shot_with_heur_value_distance_area',
    'few_shot_with_heur_value_all'
]
new_names={
    'zero_shot_with_heur_value_angle': '(p)',
    'zero_shot_with_heur_value_distance': '(c)',
    'zero_shot_with_heur_value_area': '(o)',
    'zero_shot_with_heur_value_angle_distance': '(p,c)',
    'zero_shot_with_heur_value_angle_area': '(p,o)',
    'zero_shot_with_heur_value_distance_area': '(c,o)',
    'zero_shot_with_heur_value_all': '(p,c,o)',
    'few_shot_with_heur_value_angle': '(p)',
    'few_shot_with_heur_value_distance': '(c)',
    'few_shot_with_heur_value_area': '(o)',
    'few_shot_with_heur_value_angle_distance': '(p,c)',
    'few_shot_with_heur_value_angle_area': '(p,o)',
    'few_shot_with_heur_value_distance_area': '(c,o)',
    'few_shot_with_heur_value_all': '(p,c,o)',
}
## evaluate on a subset
np.random.seed(100)
index=np.random.randint(0, 3069, 1000)

In [41]:
results=[]
models=['llama3', 'mistral', '4o_mini', 'qwen_plus', '4o']
for model in models:
    print(f'Model: {model}...')
    for config in configs:
        pred=np.load(f'base/{model}/{model}_{config}.npy')
        pred=post_processing(pred)        
        if len(pred)==1000: metrics=metric_calculation(pred, gt)
        else: metrics=metric_calculation(pred[index], gt)
        if 'zero_shot' in config: prompting='zero shot'
        else: prompting='few shot'
        results.append([new_names[config], prompting, model, metrics[0], metrics[1]])
results=pd.DataFrame(results, columns=['heuristics', 'prompting', 'model', 'acc', 'f1'])

Model: llama3...
Model: mistral...
Model: 4o_mini...
Model: qwen_plus...
Model: 4o...


In [44]:
results['heuristics'][results['heuristics']!='(plain)']='(values)'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  results['heuristics'][results['heuristics']!='(plain)']='(values)'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

In [45]:
results.groupby(['heuristics', 'model', 'prompting']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,f1
heuristics,model,prompting,Unnamed: 3_level_1,Unnamed: 4_level_1
(values),4o,few shot,0.9,0.88378
(values),4o,zero shot,0.803714,0.781584
(values),4o_mini,few shot,0.866143,0.848503
(values),4o_mini,zero shot,0.747857,0.657706
(values),llama3,few shot,0.517286,0.353992
(values),llama3,zero shot,0.562714,0.367841
(values),mistral,few shot,0.642429,0.37705
(values),mistral,zero shot,0.610571,0.268262
(values),qwen_plus,few shot,0.901286,0.887346
(values),qwen_plus,zero shot,0.837143,0.78164


In [16]:
for model in models:
    sub=results[results.model==model]
    if model == 'llama3':
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]])
        plt.axhline(y=0.990, color='red', linestyle='-.', linewidth=2, label='Best Heuritic (0.990)')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("Spatial Join Accuracy", fontsize=12)
        plt.title(model, fontsize=14)
        plt.legend()
        plt.savefig(f'../../visualizations/join_task_with_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass
    elif model == '4o':
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        g=sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.990, color='red', linestyle='-.', linewidth=2, label='Best Heuritic (0.990)')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("")
        plt.yticks([])
        plt.title(model, fontsize=14)
        plt.savefig(f'../../visualizations/join_task_with_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass
    else:
        plt.style.use('science')
        plt.rc('text', usetex=False)
        pals=list(sns.color_palette("Paired"))
        plt.figure(figsize=(3,3))
        sns.barplot(sub, x="heuristics", y="acc", hue="prompting", palette=[pals[1],pals[3]], legend=False)
        plt.axhline(y=0.990, color='red', linestyle='-.', linewidth=2, label='Best Heuritic')
        plt.xlabel("")
        plt.xticks(fontsize=12,rotation=90,ha='right')
        plt.ylabel("")
        plt.yticks([])
        plt.title(model, fontsize=14)
        plt.savefig(f'../../visualizations/join_task_with_heuristic_values_{model}.png', bbox_inches='tight',pad_inches=0, dpi=600)
        plt.close()
        pass        