# Comparing base experiments against self predictions
This notebook compares how well different models do scored against base predictions from itself or other models. This is most useful in checking finetuned models

In [None]:
STUDY_FOLDERS = [ # 🔵 within exp/
    "number_triplets_bergenia_ft_self_pred", 
    "number_triplets_bergenia_finetuned_small_dataset",
    # "random_words_bergenia",
    # "number_triplets_bergenia",
]
    
CONDITIONS = { 
    # see `analysis/loading_data.py` for details
    # ("language_model","model"): ["gpt-4-1106-preview"],
    # ("language_model","model"): ["gpt-3.5-turbo-1106", "gpt-4-0613"],
    # ("language_model","model"): ["gpt-3.5-turbo-1106"],
    # ("prompt", "method"): ["base-completion-bergenia", "self-prediction-bergenia-nontechnical"],
    # ("language_model","model"): ["gpt-3.5-turbo", "claude-2.1"],
    # ("language_model","model"): ["davinci-002"],
    # ("dataset", "topic"): ["number_triplets"],
    # ("dataset", "topic"): ["english_words"],
    # ("dataset","n_shot"): [100, None]
    ("dataset","n_shot"): [0, None],
    # ("dataset","n_shot_seeding"): ["other_model"]
    ('dataset', 'string_modifier'): ['None', None],
    ('dataset', 'response_property'): ['None', None],

}

In [None]:
from pathlib import Path
import subprocess
import sys
import random
import logging

In [None]:
# set log level
logging.basicConfig(level=logging.WARNING)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import words
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from evals.analysis.analysis_helpers import merge_base_and_self_pred_dfs, create_df_from_configs, fill_df_with_function, get_pretty_name, filter_configs_by_conditions, pretty_print_config, get_pretty_name_w_labels
from evals.analysis.loading_data import load_dfs_with_filter, load_base_df_from_config, get_hydra_config, load_single_df, get_data_path
from evals.analysis.analysis_functions import *
from evals.utils import get_maybe_nested_from_dict

In [None]:
# Set the display option to None to show all content
pd.set_option('display.max_colwidth', 200)
# show all columns
pd.set_option('display.max_columns', None)

In [None]:
# set color palette
palette = sns.color_palette("Set1", 64)
sns.set_palette(palette)

In [None]:
# get seaborn to shut up
import warnings
# Ignore the specific FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")

In [None]:
from evals.generate_few_shot import REPO_DIR

In [None]:
# Set the directory for the data
EXPDIR = Path(REPO_DIR) / "exp"

Load dataframes in

In [None]:
# load the dataframes with configs as keys
dfs = {}
for STUDY_FOLDER in STUDY_FOLDERS:
    _dfs = load_dfs_with_filter(EXPDIR / STUDY_FOLDER, CONDITIONS, exclude_noncompliant=False)
    dfs.update(_dfs)
    print(f"Loaded {len(_dfs)} dataframes from {STUDY_FOLDER}")
print(f"Loaded {len(dfs)} dataframes in total")

In [None]:
def is_base_config(config):
    return config["prompt"]["method"].startswith("base")

In [None]:
base_dfs = {config: df for config, df in dfs.items() if is_base_config(config)}
self_pred_dfs = {config: df for config, df in dfs.items() if not is_base_config(config)}
print(f"Loaded {len(base_dfs)} base and {len(self_pred_dfs)} self-prediction dataframes")

In [None]:
print("We have the following datasets:")
datasets = set([get_maybe_nested_from_dict(k, ('dataset', 'topic')) for k in base_dfs.keys()])
print(datasets)

## Plots

### Making labels

In [None]:
{get_maybe_nested_from_dict(c, ('language_model', 'model')) for c in base_dfs.keys()}.union({get_maybe_nested_from_dict(c, ('language_model', 'model')) for c in self_pred_dfs.keys()})

In [None]:
MODEL_LABELS = {
    "ft:gpt-3.5-turbo-1106:dcevals-kokotajlo:35on35onnum:8x4lehAb": "GPT3.5 fted on GPT3.5" ,
    "ft:gpt-3.5-turbo-1106:dcevals-kokotajlo:35on35onnumscram:8x6QzXiQ": "GPT3.5 fted on GPT3.5\n(scrambled)",
    "ft:gpt-3.5-turbo-1106:dcevals-kokotajlo:35on4onnum:8xMcmGZM": "GPT3.5 fted on GPT4",
    "ft:gpt-4-0613:dcevals-kokotajlo:4on4onnum:8x8dNwL1": "GPT4 fted on GPT4",
    "ft:gpt-4-0613:dcevals-kokotajlo:4on35onnum:8xq9fNVt": "GPT4 fted on GPT3.5",
    "ft:gpt-3.5-turbo-1106:dcevals-kokotajlo:35on35onnums:8zFjiOFt": "GPT3.5 fted on GPT3.5 (small dataset)",
    "ft:gpt-3.5-turbo-1106:dcevals-kokotajlo:35on4onnums:8zHmk4o8": "GPT3.5 fted on GPT4 (small dataset)",
    "gpt-3.5-turbo-1106": "GPT3.5",
    "gpt-4-0613": "GPT4",
}

In [None]:
models_wo_labels = [l for l in {get_maybe_nested_from_dict(c, ('language_model', 'model')) for c in base_dfs.keys()}.union({get_maybe_nested_from_dict(c, ('language_model', 'model')) for c in self_pred_dfs.keys()}) if l not in MODEL_LABELS]
if len(models_wo_labels) > 0: print("Models without labels:") 
else: print("All models have labels")
for m in models_wo_labels:
    print(m)

In [None]:
def get_label(config):
    label = ""
    if isinstance(config, str):
        config = eval(config)
    model = get_maybe_nested_from_dict(config, ('language_model', 'model'))
    if model in MODEL_LABELS:
        model = MODEL_LABELS[model]
    label += model
    response_property = get_maybe_nested_from_dict(config, ('dataset', 'response_property'))
    if response_property not in ["None", None]:
        label += f"\n predicting {response_property}"
    string_modifier = get_maybe_nested_from_dict(config, ('dataset', 'string_modifier'))
    if string_modifier not in ["None", None]:
        label += f"\nw string mod:{string_modifier}"
    return label

### Helper functions

In [None]:
def make_pairwise_tables(measure, base_dfs, self_pred_dfs):
    results = pd.DataFrame(columns=[str(config) for config in base_dfs.keys()], index=[str(config) for config in self_pred_dfs.keys()])
    for base_config, base_df in base_dfs.items():
        for self_config, self_df in self_pred_dfs.items():
            joint_df = merge_base_and_self_pred_dfs(
                base_df,
                self_df,
                string_modifier=get_maybe_nested_from_dict(self_config, ("dataset", "string_modifier")),
                response_property=get_maybe_nested_from_dict(self_config, ("dataset", "response_property")),
            )
            results.loc[str(self_config), str(base_config)] = measure(joint_df)
    results.index = results.index.map(get_label)
    results.columns = results.columns.map(get_label)
    # sort the columns and the rows
    results = results.sort_index(axis=0)
    results = results.sort_index(axis=1)
    return results

In [None]:
def filter_by_dataset(dfs, dataset):
    return {config: df for config, df in dfs.items() if get_maybe_nested_from_dict(config, ('dataset', 'topic')) == dataset}

### Accuracy heatmap

In [None]:
for dataset in datasets:
    results = make_pairwise_tables(calc_accuracy_with_excluded, filter_by_dataset(base_dfs, dataset), filter_by_dataset(self_pred_dfs, dataset))
    print(f"Accuracy for {dataset}")
    
    sns.heatmap(results.astype(float), annot=True, cmap="YlGnBu", cbar=False, vmin=0, vmax=1, fmt=".0%")
    plt.xlabel("Scored against object-level")
    plt.ylabel("Meta-level")
    plt.title(f"Accuracy of meta-level predicting different object-level models on {dataset}")
    plt.gca().set_aspect("equal")  # Set aspect ratio to "equal" for square cells
    plt.show()

### Base vs base change heatmap

In [None]:
for dataset in datasets:
    results = make_pairwise_tables(calc_accuracy, filter_by_dataset(base_dfs, dataset), filter_by_dataset(base_dfs, dataset))
    print(f"Overlap between object-level completions for {dataset}")
    
    mask = np.triu(np.ones_like(results, dtype=bool), k=1)
    sns.heatmap(results.astype(float), annot=True, cmap="YlGnBu", cbar=False, vmin=0, vmax=1, fmt=".0%", mask=mask)
    # plt.xlabel("Scored against object-level")
    # plt.ylabel("Meta-level")
    plt.title(f"Overlap between object-level completions for {dataset}")
    plt.gca().set_aspect("equal")  # Set aspect ratio to "equal" for square cells
    plt.show()

## Entropy barplots

In [None]:
measure = lambda df: stats.entropy(df['response'].value_counts(normalize=True))

for dataset in datasets:
    results = {get_label(config): measure(df) for config, df in filter_by_dataset(base_dfs, dataset).items()}
    print(f"Entropy of object-level completions for {dataset}")
    sns.barplot(x=list(results.keys()), y=list(results.values()), color = "green")

    plt.title(f"Entropy of object-level completions for {dataset}")
    # plt.gca().set_aspect("equal")  # Set aspect ratio to "equal" for square cells
    plt.xticks(rotation=90)
    plt.show()

for dataset in datasets:
    results = {get_label(config): measure(df) for config, df in filter_by_dataset(self_pred_dfs, dataset).items()}
    print(f"Entropy of meta-level completions for {dataset}")
    sns.barplot(x=list(results.keys()), y=list(results.values()), color = "purple")

    plt.title(f"Entropy of object-level completions for {dataset}")
    # plt.gca().set_aspect("equal")  # Set aspect ratio to "equal" for square cells
    plt.xticks(rotation=90)
    plt.show()