In [5]:
import os 
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

# Experiment 1

models = ['gpt4o', 'gpt4o_mini', 'llama3_1_instruct_70b', 'llama3_8b', 'gemma2_9b', 'exaone_8b', 'qwen2_7b']
answer_type = 'Binary'

data1_dfs = []
for model in models:
    path = f"results/experiment1/{answer_type}/{model}.csv"
    df = pd.read_csv(path)[['model_output']]
    df['model'] = model
    data1_dfs.append(df)

dataset = 'dataset2'
data2_dfs = []
for model in models:
    path = f"results/experiment2_1/{answer_type}/{dataset}/{model}.csv"
    df = pd.read_csv(path)[['model_output']]
    
    df['model'] = model
    data2_dfs.append(df)

data1_df = pd.concat(data1_dfs)
data2_df = pd.concat(data2_dfs)

fig, axes = plt.subplots(1, 2, figsize=(10, 5))

for data_index, plot_df in enumerate([data1_df, data2_df]):
    ax = axes[data_index]
    sns.barplot(
        data=plot_df,
        x='Model',
        y='Deontic Response Ratio',
        hue='Input Type',
        palette=custom_palette,  
        alpha=1.0,
        ax=ax
    )

    if data_index == 0:
        ax.get_legend().remove()
    # Customize the plot
    ax.set_xlabel(None)
    ax.set_ylabel(None)  # Remove ylabel
    ax.set_ylim(-4, 104)
    ax.tick_params(axis='x', labelsize=12, rotation=-15)  # Added this line
    
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0f}%'.format(y)))
    ax.set_yticks([i*10 for i in range(11)])
    
    # Remove individual legends
    if data_index == 1:  # Only keep legend for the second plot
        ax.get_legend().remove()
        ax.axhline(y=-1, color='#0088FF', linewidth=2.0, linestyle='--')
        # ax.text(0, 0, 'Target', ha='left', va='center', fontsize=12, color='red')
    else:
        ax.axhline(y=101, color='#0088FF', linewidth=2.0, linestyle='--')
        # ax.text(0, 100, '100%', ha='left', va='center', fontsize=12, color='red')
    

# Create a single legend for both plots
handles, labels = axes[1].get_legend_handles_labels()
fig.legend(handles, labels,
          title='Deontological Modal Verbs',
          loc='lower center',
          bbox_to_anchor=(0.5, -0.13),
          ncol=3,
          frameon=True)

plt.tight_layout()
plt.subplots_adjust(bottom=0.2)  # Add space at bottom for legend
plt.savefig('pdfs/figure_exp1_deontic_common.pdf', dpi=300, bbox_inches='tight')
plt.show()


   model_output  model
0           1.0  gpt4o
1           0.0  gpt4o
2           0.0  gpt4o
3           1.0  gpt4o
4           0.0  gpt4o
   model_output  model
0           0.0  gpt4o
1           0.0  gpt4o
2           0.0  gpt4o
3           0.0  gpt4o
4           0.0  gpt4o
