# Compliance Checks
To what degree do the models do or refuse their tasks?

In [None]:
STUDY_FOLDER = "random_numbers_spaces_basic" # 🔵 within exp/
CONDITIONS = { 
    # see `analysis/loading_data.py` for details
    # ("language_model","model"): ["gpt-3.5-turbo"],
    # ("dataset","n_shot"): [0]
}

In [None]:
from pathlib import Path
import subprocess
import sys

In [None]:
# Run the git command to get the repository root directory
REPO_DIR = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode().strip()

print("Repository directory:", REPO_DIR)
sys.path.append(REPO_DIR)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import words
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from analysis_helpers import merge_base_and_self_pred_dfs, create_df_from_configs, fill_df_with_function
from loading_data import load_dfs_with_filter

In [None]:
# Set the display option to None to show all content
pd.set_option('display.max_colwidth', 200)

In [None]:
# set color palette
palette = sns.color_palette("Set1")
sns.set_palette(palette)

In [None]:
# Set the directory for the data
EXPDIR = Path(REPO_DIR) / "exp"

In [None]:
# load the dataframes with configs as keys while keeping the non-compliant ones
dfs = load_dfs_with_filter(EXPDIR / STUDY_FOLDER, CONDITIONS, exclude_noncompliant=False)

In [None]:
def is_base_config(config):
    return "base" in config["prompt"]["method"]

In [None]:
base_dfs = {config: df for config, df in dfs.items() if is_base_config(config)}
self_pred_dfs = {config: df for config, df in dfs.items() if not is_base_config(config)}
print(f"Loaded {len(base_dfs)} base and {len(self_pred_dfs)} self-prediction dataframes")

In [None]:
# create results dataframe
results = create_df_from_configs(dfs.keys())
results.sort_values(by="language_model_model", inplace=True)

How many responses are non-compliant?

In [None]:
def avg_compliance(df):
    return (df["compliance"] == True).mean()

In [None]:
# fill the results dataframe with the accuracy and t-statistic
fill_df_with_function(dfs, avg_compliance, "avg_compliance", results)

In [None]:
results.drop(columns=["config"]).sort_values(by="avg_compliance", ascending=False).style.hide(axis="index").background_gradient(subset=["avg_compliance"], cmap="RdYlGn", low=0.0, high=1.0)

Making plots

In [None]:
sns.pointplot(data=results, x="dataset_n_shot", y="avg_compliance", hue="language_model_model")
plt.title("Average compliance per number of few-shot examples")
plt.xlabel("Number of few-shot examples")
plt.ylabel("Average compliance")
plt.ylim(-0.025, 1.025)
plt.show()


In [None]:
# a bar plot with bars for each language model and prompt method and also n_shot
sns.barplot(data=results, x="language_model_model", y="avg_compliance", hue="prompt_method")
plt.title("Average compliance per language model and prompt method")
plt.xlabel("Language model")
plt.ylabel("Average compliance")
plt.show()


Qualitative Analysis

In [None]:
# for each df, what are the most common reasons for non-compliance?
def get_most_common_reasons(df):
    df = df[df["compliance"] != True] 
    return df["compliance"].value_counts().head(10)

fill_df_with_function(dfs, get_most_common_reasons, "most_common_reasons", results)

In [None]:
# show the most common reasons for non-compliance
for config, row in results.iterrows():
    print(row[['language_model_model', 'prompt_method', 'dataset_n_shot']])
    print(row["most_common_reasons"])
    print()

In [None]:
# for each df, sample 10 non-compliant examples
def sample_non_compliant(df):
    try:
        return df[df["compliance"] != True].sample(10)["response"]
    except:
        return None

fill_df_with_function(dfs, sample_non_compliant, "sample_non_compliant", results)

In [None]:
# show the sample non-compliant examples
for config, row in results.iterrows():
    print(row[['language_model_model', 'prompt_method', 'dataset_n_shot']])
    print(row["sample_non_compliant"])
    print()

In [None]:
# we want to look at the full dataframe for the most common reasons for non-compliance
# so it can be opened in the data wrangler
# get least compliant config
least_compliant_config = results.sort_values(by="avg_compliance").iloc[0].name
least_compliant_df = dfs[least_compliant_config]
print(f"Least compliant df is for model {least_compliant_config['language_model']} on {least_compliant_config}")

In [None]:
least_compliant_df.sample(10)