In [None]:
from dotenv import load_dotenv
import itertools
import os

from explainerdashboard import ClassifierExplainer, ExplainerDashboard, ExplainerHub
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.base import clone
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.inspection import PartialDependenceDisplay
import sklearn.metrics
from sqlalchemy import create_engine

import modelbuilderpaardensprong

In [None]:
load_dotenv()

PLAYERNAME = os.getenv("playername")

database_url_prod = os.getenv("PROD_DATABASE_URL").replace(
    "postgresql", "postgresql+psycopg"
)
engine_prod = create_engine(database_url_prod)

database_url_dev = os.getenv("DATABASE_URL").replace("postgresql", "postgresql+psycopg")
engine_dev = create_engine(database_url_dev)

In [None]:
def get_data_for_game(name):
    with engine_prod.connect() as conn:
        games = pd.read_sql_table("games", conn, name, index_col="game_id")
        guesses = pd.read_sql_table("guesses", conn, name, index_col="game_id")

    guesses_relevant = guesses.rename(columns={"correct": "GuessCorrect"})[
        ["guess_time", "GuessCorrect"]
    ]
    df = (
        games
        # Drop games which have no guess - probably time out because of long loading times
        .join(guesses_relevant, how="inner")
        .query("playername == @PLAYERNAME | game_id == 46")
        .assign(
            PuzzleTimeSec=lambda df: (df["guess_time"] - df["start_time"]).dt.seconds,
            # The on time is a bit strict; since you need a few seconds typing time
            # But that's on purpose: it makes sense to train to have a bit of spare time
            # And it helps the model since you have just a few more unsuccessfulls to train on
            OnTime=lambda df: df["PuzzleTimeSec"].lt(30),
            Success=lambda df: df["GuessCorrect"] & df["OnTime"],
            NTimesWordSeenBefore=lambda df: df.groupby("answer").cumcount(),
        )
        # A few answers were given extremely late; probably when reconnecting
        .query("PuzzleTimeSec < 120")
    )
    df.columns = df.columns.map(str)
    return df


df_paardensprong = (
    get_data_for_game("paardensprong")
    .assign(missing_letter_index=pd.NA)
    .astype({"missing_letter_index": "Int64"})
)
df_taartpuzzel = get_data_for_game("taartpuzzel")
df_paardensprong.index = pd.MultiIndex.from_product(
    [["Paardensprong"], df_paardensprong.index.astype(object)],
    names=["Game", df_paardensprong.index.name],
)
df_taartpuzzel.index = pd.MultiIndex.from_product(
    [["Taartpuzzel"], df_taartpuzzel.index.astype(object)],
    names=["Game", df_taartpuzzel.index.name],
)

In [None]:
model_cols = [
    "start_time",
    "answer",
    "startpoint",
    "direction",
    "NTimesWordSeenBefore",
    "missing_letter_index",
    "Success",
]


X_ps = df_paardensprong[model_cols].assign(IsTaartpuzzel=0)
X_tp = df_taartpuzzel[model_cols].assign(IsTaartpuzzel=1)


y_ps = X_ps.pop("Success").astype(int)
y_tp = X_tp.pop("Success").astype(int)


# Split so we can investigate results per puzzle specifically
X_ps_train, X_ps_test, y_ps_train, y_ps_test = train_test_split(
    X_ps, y_ps, stratify=y_ps, random_state=42
)
X_tp_train, X_tp_test, y_tp_train, y_tp_test = train_test_split(
    X_tp, y_tp, stratify=y_tp, random_state=42
)
X_train = pd.concat([X_ps_train, X_tp_train])
y_train = pd.concat([y_ps_train, y_tp_train])
X_test = pd.concat([X_ps_test, X_tp_test])
y_test = pd.concat([y_ps_test, y_tp_test])

In [None]:
import importlib

importlib.reload(modelbuilderpaardensprong)

In [None]:
pipe = modelbuilderpaardensprong.pipe
pipe.fit(X_train, y_train)

In [None]:
grid = modelbuilderpaardensprong.grid
grid.fit(X_train, y_train)
pipe = grid.best_estimator_

In [None]:
grid.best_params_

In [None]:
def n_columns(columns):
    if columns == "all":
        try:
            columns = modelbuilderpaardensprong.column_selector.columns_
        except AttributeError:
            modelbuilderpaardensprong.pipe.fit(X_train, y_train)
            columns = modelbuilderpaardensprong.column_selector.columns_
    return len(columns)


results = pd.concat(
    [
        pd.DataFrame(grid.cv_results_["params"]),
        pd.Series(grid.cv_results_["mean_test_score"], name="mean_test_score"),
        pd.Series(grid.cv_results_["mean_train_score"], name="mean_train_score"),
        pd.Series(grid.cv_results_["std_test_score"], name="std_test_score"),
    ],
    axis="columns",
).assign(
    Overfit=lambda df: df["mean_train_score"] - df["mean_test_score"],
    columns=lambda df: df["columnselection__columns"].apply(n_columns),
)

results.sort_values("mean_test_score", ascending=False)

### Inspect model

In [None]:
y_pred_train = pipe.predict_proba(X_train)
train_logloss = sklearn.metrics.log_loss(y_train, y_pred_train)
train_auc = sklearn.metrics.roc_auc_score(y_train, y_pred_train[:, 1])

y_pred_proba = pipe.predict_proba(X_ps_test)
test_logloss = sklearn.metrics.log_loss(y_ps_test, y_pred_proba)
test_auc = sklearn.metrics.roc_auc_score(y_ps_test, y_pred_proba[:, 1])

dummy = DummyClassifier()
dummy.fit(X_train, y_train)
y_pred_dummy = dummy.predict_proba(X_ps_test)
dummy_logloss = sklearn.metrics.log_loss(y_ps_test, y_pred_dummy)
dummy_auc = sklearn.metrics.roc_auc_score(y_ps_test, y_pred_dummy[:, 1])

print("Log loss:")
print(" Train - Test  -  Dummy")
print(f"{train_logloss: .3f} - {test_logloss:.3f} - {dummy_logloss: .3f}")

print("AUC:")
print(" Train - Test  -  Dummy")
print(f"{train_auc: .3f} - {test_auc:.3f} - {dummy_auc: .3f}")

In [None]:
y_pred_proba = pipe.predict_proba(X_tp_test)
test_logloss = sklearn.metrics.log_loss(y_tp_test, y_pred_proba)
test_auc = sklearn.metrics.roc_auc_score(y_tp_test, y_pred_proba[:, 1])

dummy = DummyClassifier()
dummy.fit(X_train, y_train)
y_pred_dummy = dummy.predict_proba(X_tp_test)
dummy_logloss = sklearn.metrics.log_loss(y_tp_test, y_pred_dummy)
dummy_auc = sklearn.metrics.roc_auc_score(y_tp_test, y_pred_dummy[:, 1])

print("Log loss:")
print(" Train - Test  -  Dummy")
print(f"{train_logloss: .3f} - {test_logloss:.3f} - {dummy_logloss: .3f}")

print("AUC:")
print(" Train - Test  -  Dummy")
print(f"{train_auc: .3f} - {test_auc:.3f} - {dummy_auc: .3f}")

In [None]:
explainer_tot = ClassifierExplainer(pipe, X_test, y_test, pos_label=0)
db_tot = ExplainerDashboard(explainer_tot, title="Combined", name="combined")
explainer_ps = ClassifierExplainer(pipe, X_ps_test, y_ps_test, pos_label=0)
db_ps = ExplainerDashboard(explainer_ps, title="Paardensprong", name="paardensprong")
explainer_tp = ClassifierExplainer(pipe, X_tp_test, y_tp_test, pos_label=0)
db_tp = ExplainerDashboard(explainer_tp, title="Taartpuzzel", name="taartpuzzel")
hub = ExplainerHub([])
hub.add_dashboard(db_tot)
hub.add_dashboard(db_ps)
hub.add_dashboard(db_tp)
hub.run(host="127.0.0.1")

### Fit final model
Fit on total set to use all data, do some quick fatal flaw inspection on probabilities and logical relations between variables

In [None]:
total_estimator = clone(pipe)
X = pd.concat([X_ps_train, X_ps_test, X_tp_train, X_tp_test])
y = pd.concat([y_ps_train, y_ps_test, y_tp_train, y_tp_test])
total_estimator.fit(X, y)

X_transformed = X.copy()
for transformer in total_estimator.steps[:-1]:
    X_transformed = transformer[1].transform(X_transformed)

In [None]:
pd.Series(total_estimator.predict_proba(X)[:, 0]).plot(kind="hist")

In [None]:
display = PartialDependenceDisplay.from_estimator(
    total_estimator.named_steps["clf"],
    X_transformed,
    features=range(len(X_transformed.columns)),
    kind="both",
)


display.plot(pdp_lim={1: (0.8, 1)})

# Using the model

In [None]:
def generate_all_taartpuzzels():
    words = pd.read_csv(
        "../tweevoortwaalf/Data/suitable_9_letter_words.txt", header=None
    ).squeeze()

    startpoint = range(9)
    missing_letter_index = range(9)
    directions = [-1, 1]

    X_new = pd.DataFrame(
        itertools.product(words, startpoint, directions, missing_letter_index),
        columns=["answer", "startpoint", "direction", "missing_letter_index"],
    ).assign(start_time=pd.Timestamp.now(), IsTaartpuzzel=1)
    return X_new


def generate_all_paardensprongen():
    words = pd.read_csv(
        "../tweevoortwaalf/Data/suitable_8_letter_words.txt", header=None
    ).squeeze()
    startpoint = range(8)
    directions = [-1, 1]

    X_new = pd.DataFrame(
        itertools.product(words, startpoint, directions),
        columns=["answer", "startpoint", "direction"],
    ).assign(start_time=pd.Timestamp.now(), IsTaartpuzzel=0)
    return X_new

In [None]:
def create_puzzle_options(X_new, n_per_answer=4):
    ntimeswordseenbefore = (
        X["answer"].value_counts().to_frame("NTimesWordSeenBefore").reset_index()
    )

    X_new = (
        X_new.merge(ntimeswordseenbefore, how="left")
        .fillna({"NTimesWordSeenBefore": 0})
        .groupby("answer", group_keys=False)
        .apply(lambda x: x.sample(n_per_answer))
    )

    return X_new.reset_index(drop=True)


taartpuzzel_options = generate_all_taartpuzzels().pipe(create_puzzle_options)
paardensprong_options = generate_all_paardensprongen().pipe(create_puzzle_options)

In [None]:
y_pred_tp = total_estimator.predict_proba(taartpuzzel_options)
taartpuzzel_options["probability"] = y_pred_tp[:, 0]

y_pred_ps = total_estimator.predict_proba(paardensprong_options)
paardensprong_options["probability"] = y_pred_ps[:, 0]
taartpuzzel_options["probability"].plot(kind="kde", label="Taartpuzzel")
paardensprong_options["probability"].plot(kind="kde", label="Paardensprong")
plt.legend()
plt.show()

In [None]:
def write_puzzle_options(df, name, engine):
    with engine.connect() as conn:
        df.to_sql(
            "puzzleoptions",
            con=conn,
            schema=name,
            if_exists="replace",
            index=False,
            method="multi",
            chunksize=4000,
        )
        conn.commit()


write_puzzle_options(taartpuzzel_options, "taartpuzzel", engine_dev)
write_puzzle_options(taartpuzzel_options, "taartpuzzel", engine_prod)
write_puzzle_options(paardensprong_options, "paardensprong", engine_dev)
write_puzzle_options(paardensprong_options, "paardensprong", engine_prod)

In [None]:
with engine_prod.connect() as conn:
    conn.rollback()

# A quick investigation in sampling methods
Which method strikes a good balance between exploitation and exploration:
enough balancing so that it is surprising and learns new things, yet also the user gets hard enough puzzles

In [None]:
def probability_option(p, n):
    return (p - p**2) ** n


def iterative_sampling(X_new, sample_size=250, n_to_sample=100):
    served = []
    X_predicted = pd.DataFrame()
    for _ in range(n_to_sample):
        unpredicted = X_new.loc[lambda df: ~df.index.isin(X_predicted.index)]
        if not unpredicted.empty:
            if len(unpredicted) <= sample_size:
                newly_predicted = unpredicted
            else:
                newly_predicted = unpredicted.sample(sample_size)
            X_predicted = pd.concat([X_predicted, newly_predicted])
        n = min(100, 5 * len(X_new) / len(X_predicted))
        X_predicted["weight"] = probability_option(X_predicted["probability"], n)
        served.append(X_predicted.sample(n=1, weights=X_predicted["weight"]).squeeze())
    return pd.concat(served, axis="columns").transpose()


n_to_play = 100
sample_methods = {
    "random": lambda p: p.sample(n_to_play),
    "largest": lambda x: x.nlargest(n_to_play, "probability"),
    "power=1": lambda p: p.sample(
        n_to_play, weights=probability_option(p["probability"], 1)
    ),
    "power=5": lambda p: p.sample(
        n_to_play, weights=probability_option(p["probability"], 5)
    ),
    "power=10": lambda p: p.sample(
        n_to_play, weights=probability_option(p["probability"], 10)
    ),
    "iterative100": lambda p: iterative_sampling(p, 100, n_to_play),
    "iterative250": lambda p: iterative_sampling(p, 250, n_to_play),
}


probs_played = pd.DataFrame()
for method, func in sample_methods.items():
    probs_played[method] = func(taartpuzzel_options)["probability"].reset_index(
        drop=True
    )

ax = probs_played.mean().sort_values().plot(kind="barh")
ax.bar_label(ax.containers[0], fmt="{:.1%}")
probs_played.plot(kind="kde")