# Analyse how well we're doing guessing words
What tactics are working, am I improving, when is it difficult, etc.

In [None]:
import os
import random

import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sklearn.linear_model import LinearRegression

In [None]:
load_dotenv()
PLAYERNAME = os.getenv("playername")

In [None]:
database_url = os.getenv("PROD_DATABASE_URL").replace(
    "postgresql", "postgresql+psycopg"
)

engine = create_engine(database_url)

# Data preparation

In [None]:
with engine.connect() as conn:
    games = pd.read_sql_query(
        "SELECT * FROM woordrader.games", con=conn, index_col="game_id"
    )
    positions = pd.read_sql_query(
        "SELECT * FROM woordrader.shownletters",
        con=conn,
        index_col="letterplacement_id",
    )
    boughtletters = pd.read_sql_query(
        "SELECT * FROM woordrader.boughtletters", con=conn, index_col="buyevent_id"
    )
    guesses = pd.read_sql_query(
        "SELECT * FROM woordrader.guesses", con=conn, index_col="guess_id"
    )

In [None]:
position_relevant = positions.groupby("game_id").agg(
    LettersCorrect=pd.NamedAgg("correct", "sum"),
    LettersUnknown=pd.NamedAgg("shown_letter", lambda s: (s == "-").sum()),
)
buyevents_relevant = boughtletters.groupby("game_id").size().rename("LettersBought")
guesses_relevant = guesses.set_index("game_id").rename(
    columns={"correct": "GuessCorrect"}
)[["guess_time", "GuessCorrect"]]
df = (
    games.join(position_relevant)
    .join(buyevents_relevant)
    .join(guesses_relevant, how="inner")
    .query("playername == @PLAYERNAME")
    .fillna({"LettersBought": 0})
    .astype({"LettersBought": int, "LettersCorrect": int, "LettersUnknown": int})
    .assign(
        LettersBought=lambda df: df["LettersBought"].mask(
            ~df["GuessCorrect"], df["LettersBought"].max()
        ),
        LettersWrong=lambda df: 12 - df["LettersCorrect"] - df["LettersUnknown"],
    )
)
df

# Analysis
## Checking the puzzles

In [None]:
df[["LettersUnknown", "LettersWrong"]].corr()

In [None]:
df["LettersCorrect"].value_counts(normalize=True).sort_index()

In [None]:
pd.Series([random.binomialvariate(12, 0.95) for _ in range(1_000_000)]).value_counts(

    normalize=True

).sort_index()

## Checking how well it is played

In [None]:
df["GuessCorrect"].value_counts(normalize=True)

In [None]:
df["LettersBought"].hist()
display(df["LettersBought"].describe())

## Explanation

In [None]:
df["LettersBought"].rolling(20).mean().plot(ylim=(0, None))

In [None]:
df.groupby("LettersCorrect")["LettersBought"].describe()

In [None]:
df.groupby("LettersCorrect")["LettersBought"].value_counts(
    normalize=True
).unstack().fillna(0).style.format("{:.1%}")

In [None]:
lr = LinearRegression()
lr.fit(
    df.reset_index()[["game_id", "LettersUnknown", "LettersWrong"]], df["LettersBought"]
)

In [None]:
lr.intercept_, lr.coef_