In [1]:
import pickle

import numpy as np
import pandas as pd
from plotly import express as px, graph_objects as go

from last_letter_least_to_most import Result, Prediction

aqua_df = pd.read_csv("data/aqua_rat_results.csv")
with open("data/last_letter_least_to_most_100.pickle", "rb") as f:
    ll = pickle.load(f)

In [21]:
def process_ll_results(rs: list[Result]) -> np.ndarray:
    out = [[fp.success for fp in r.final_predictions] for r in rs]
    return np.array(out)


n_subquestions = list(range(6))
aqua_scores = [(aqua_df[f"answer_{i}"] == aqua_df["correct"]).mean() * 100 for i in range(6)]
ll_scores = (process_ll_results(ll).mean(0) * 100).tolist()

trace1 = go.Scatter(x=n_subquestions, y=aqua_scores, name="AQuA Rat dataset")
trace2 = go.Scatter(x=n_subquestions, y=ll_scores, name="Last letter concatenation task")

fig = go.Figure()
fig.add_traces([trace1, trace2])

fig.update_layout(
    title=dict(
        text="L2M prompting with 5 questions : Average score",
        font_size=24
    ),
    xaxis=dict(
        title="Number of subquestions used",
        title_font_size=20,
    ),
    yaxis=dict(
        title="Average score",
        title_font_size=20,
        ticksuffix="%",
    ),
    legend=dict(title="Legend", font_size=16),
)
fig.show()

In [10]:
def process_ll_results(rs: list[Result]) -> np.ndarray:
    out = [[fp.answer == r.final_predictions[0].answer for fp in r.final_predictions[1:]] for r in rs]
    return np.array(out)

n_subquestions = list(range(1, 6))
aqua_scores = [(aqua_df[f"answer_{i}"] == aqua_df["answer_0"]).mean() * 100 for i in range(1, 6)]
ll_scores = (process_ll_results(ll).mean(0) * 100).tolist()

trace1 = go.Scatter(x=n_subquestions, y=aqua_scores, name="AQuA Rat dataset")
trace2 = go.Scatter(x=n_subquestions, y=ll_scores, name="Last letter concatenation task")

fig = go.Figure()
fig.add_traces([trace1, trace2])

fig.update_layout(
    title="L2M prompting with 5 subquestions : Faithfulness to \"0 subquestions\"", #?
    xaxis=dict(
        title="Number of subquestions used",
        nticks=5,
    ),
    yaxis=dict(
        title="Faithfulness to 0 subquestions",
        ticksuffix="%",
    ),
    legend=dict(title="Legend"),
)
fig.show()

In [4]:
r = next(
    r
    for r in ll
    if r.final_predictions[4].success and r.final_predictions[5].success
)
r.final_predictions[4:]

[Prediction(success=True, words=['no', 'such', 'a', 'who', 'it', 'he', 'like'], answer='ohaotee', completion='"no, such, a, who, it" outputs "ohaot". The last letter of "he" is "e". Concatenating "ohaot", "e" leads to "ohaote". So, "no, such, a, who, it, he" outputs "ohaote". The last letter of "like" is "e". Concatenating "ohaote", "e" leads to "ohaotee". So, "no, such, a, who, it, he, like" outputs "ohaotee".'),
 Prediction(success=True, words=['no', 'such', 'a', 'who', 'it', 'he', 'like'], answer='ohaotee', completion='"no, such, a, who, it, he" outputs "ohaote". The last letter of "like" is "e". Concatenating "ohaote", "e" leads to "ohaotee". So, "no, such, a, who, it, he, like" outputs "ohaotee".')]

In [5]:
print(r.final_context)


Q: "think, machine"
A: The last letter of "think" is "k". The last letter of "machine" is "e". Concatenating "k", "e" leads to "ke". So, "think, machine" outputs "ke".

Q: "think, machine, learning"
A: "think, machine" outputs "ke". The last letter of "learning" is "g". Concatenating "ke", "g" leads to "keg". So, "think, machine, learning" outputs "keg".

Q: "transformer, language"
A: The last letter of "transformer" is "r". The last letter of "language" is "e". Concatenating: "r", "e"
leads to "re". So, "transformer, language" outputs "re".

Q: "transformer, language, vision"
A: "transformer, language" outputs "re". The last letter of "vision" is "n". Concatenating: "re", "n" leads
to "ren". So, "transformer, language, vision" outputs "ren".


Q: "no, such"
The last letter of "no" is "o". The last letter of "such" is "h". Concatenating "o", "h" leads to "oh". So, "no, such" outputs "oh".

Q: "no, such, a"
"no, such" outputs "oh". The last letter of "a" is "a". Concatenating "oh", "a"