# Basic Analyses for Self Prediction Experiments

In [None]:
# What is the experiment with the base completions we want to use?
BASE_EXP = "num_4" # 🔵 within exp/
SELF_PRED_EXP =  BASE_EXP + "_5_shot" # 🔵
FILENAME = "data0.csv" 

In [None]:
from pathlib import Path
import subprocess
import sys

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import words
from scipy import stats

In [None]:
from compliance_checks import check_compliance
from string_cleaning import apply_all_cleaning
from analysis_helpers import load_and_prep_dfs, merge_base_and_meta_dfs

In [None]:
# Set the display option to None to show all content
pd.set_option('display.max_colwidth', None)

In [None]:
# Run the git command to get the repository root directory
REPO_DIR = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode().strip()

print("Repository directory:", REPO_DIR)
sys.path.append(REPO_DIR)

In [None]:
# Set the directory for the data
EXPDIR = Path(REPO_DIR) / "exp"

In [None]:
dfs = load_and_prep_dfs([
    EXPDIR / BASE_EXP / FILENAME,
    EXPDIR / SELF_PRED_EXP / FILENAME,
])

In [None]:
df = merge_base_and_meta_dfs(*dfs.values())

How many strings are correctly produced by the model?

In [None]:
# N_POSSIBLE_ITEMS = len(words.words()) # what is the number of possible items in the string?
N_POSSIBLE_ITEMS = 10
print(f"Number of possible items in the string: {N_POSSIBLE_ITEMS},\nwhich gives us a probability of {1/N_POSSIBLE_ITEMS:.6%} for a random guess")

In [None]:
rate_perfect_answer = (df['response_self'] == df['response_base']).mean()
print(f"Rate of perfect answer matches: {rate_perfect_answer:.2%}")
# t test
t, p = stats.ttest_1samp(df['response_self'] == df['response_base'], 1/N_POSSIBLE_ITEMS)
print(f"t = {t:.2f}, p = {p:.2f}")

In [None]:
rate_first_token_match = (df['first_token_self'] == df['first_token_base']).mean()
print(f"Rate of first token matches: {rate_first_token_match:.2%}")
# t test
t, p = stats.ttest_1samp(df['first_token_self'] == df['first_token_base'], 1/N_POSSIBLE_ITEMS)
print(f"t = {t:.2f}, p = {p:.2f}")

In [None]:
# let's qualitatively look at the matches
df[df['response_self'] == df['response_base']][['string', 'response_self', 'response_base', 'first_logprobs_base', 'last_word_repeated_self', 'nonlast_word_repeated_self']]

The models like to repeat the last word of the string. That is not a problem per se since we select the strings to be hard to predict.

In [None]:
prop_repeat_last_word = df[df['response_self'] == df['response_base']]['last_word_repeated_self'].mean()
print(f"Proportion of response-matching strings where the model repeats the last word:\t{prop_repeat_last_word:.2%}")

In [None]:
# if we exclude the strings where the model repeats the last word, what is the rate of perfect matches?
df_no_repeat = df[df['last_word_repeated_self'] == False]
print(f"There are {len(df_no_repeat)} rows where the model does not repeat the last word")
rate_perfect_answer_no_repeat = (df_no_repeat['response_self'] == df_no_repeat['response_base']).mean()
print(f"Rate of perfect answer matches (without word repetition):\t{rate_perfect_answer_no_repeat:.2%}")
# t test
t, p = stats.ttest_1samp(df_no_repeat['response_self'] == df_no_repeat['response_base'], 1/N_POSSIBLE_ITEMS)
print(f"t = {t:.2f}, p = {p:.2f}")

In [None]:
# let's qualitatively look at the matches
df_no_repeat[df_no_repeat['response_self'] == df_no_repeat['response_base']][['string', 'response_self', 'response_base', 'last_word_repeated_self', 'nonlast_word_repeated_self']]

In [None]:
# ok, what about those that aren't in the string at all?
df_no_repeat_no_repeats = df_no_repeat[df_no_repeat['nonlast_word_repeated_self'] == False]
print(f"There are {len(df_no_repeat_no_repeats)} rows where the model doesn't repeat the last word or any other word")
rate_perfect_answer_no_repeat_no_repeats = (df_no_repeat_no_repeats['response_self'] == df_no_repeat_no_repeats['response_base']).mean()
print(f"Rate of perfect answer matches (without word repetition and nonlast word repetition):\t{rate_perfect_answer_no_repeat_no_repeats:.2%}")
# t test
t, p = stats.ttest_1samp(df_no_repeat_no_repeats['response_self'] == df_no_repeat_no_repeats['response_base'], 1/N_POSSIBLE_ITEMS)
print(f"t = {t:.2f}, p = {p:.2f}")


In [None]:
# let's look at this most selective set of strings
df_no_repeat_no_repeats[df_no_repeat_no_repeats['response_self'] == df_no_repeat_no_repeats['response_base']][['string', 'response_self', 'response_base', 'last_word_repeated_self', 'nonlast_word_repeated_self']]