In [None]:
import pickle as pkl
import numpy as np
from matplotlib import pyplot as plt

In [None]:
# Import clean data
with open("data/post_test.pkl", "rb") as f:
    df = pkl.load(f)

In [None]:
genders = [1, 2, 3, 4]

In [None]:
years = ["1st", "2nd", "3rd"]

def hexplot(df, coly, colx=(0, "year"), datax=years, bins=50):    
    fig, ax = plt.subplots(figsize=(16, 9))
    cmap = plt.rcParams['axes.prop_cycle'].by_key()["color"]
    
    col = df[coly]
    col = col[~col.isna()]
    _, g_bins = np.histogram(col, bins=bins)

    for i, y in enumerate(datax):
        sub = df[df[colx] == y][coly]
        sub = sub[~sub.isna()]
        sizes, bins = np.histogram(sub, bins=g_bins)

        cnts = {x: 0 for x in bins}

        for s in sub:
            low_bins = [b for b in bins[:-1] if b <= s]
            last = low_bins[-1]
            total = sizes[len(low_bins)-1]
            odd = (len(low_bins) + total) % 2

            plt.plot(i + cnts[last] / 40 + odd / 80 - total / 80, last, "o", c=cmap[i])
            cnts[last] += 1
            
    #ax.set_xticklabels([1, 2, 3, 4], ["Male", "Female", "Other", "No Answer"])
    return plt.xlim()

In [None]:
df_a = df[[c for c in df.columns if c[1] == "rel"]].applymap(lambda x: (1 if x == 0 else 0) if x is not None else None)

In [None]:
df_b = df[[c for c in df.columns if c[1] == "score" and c[0] < 13]] / 100

In [None]:
import pandas as pd
df_ab = pd.concat([df_a, df_b], axis=1).sum(axis=1)
gscore = pd.concat([df[0]["gender"], df_ab], axis=1)
gscore

In [None]:
hexplot(gscore, 0, colx="gender", datax=genders)

plt.xticks(range(4), ["Male", "Female", "Other", "No answer"], rotation=0)

plt.xlabel("Gender")
plt.ylabel("Score of questions 2-7")
plt.savefig("plots/score_by_gender.png")

In [None]:
df_u = df_a.sum(axis=1)
df_v = df[[(13, 'r_concentration'), (13, 'r_width')]].sum(axis=1)
df_uv = pd.concat([df_u, df_v], axis=1).loc[:228]
df_uv
fig, ax = plt.subplots(figsize=(16, 9))
plt.hist2d(x=df_uv[0], y=df_uv[1])
plt.xlabel("Score for questions 2-5")
plt.ylabel("Understanding of proportionality")
plt.savefig("plots/q25_understanding.png")

In [None]:
df[19][["s_difficult", "s_entertain"]].hist()

In [None]:
df_u = df_b.sum(axis=1)
df_v = df[[(13, 'r_wavelength')]].sum(axis=1)
df_uv = pd.concat([df_u, df_v], axis=1).loc[:228]
df_uv
fig, ax = plt.subplots(figsize=(16, 9))
plt.hist2d(x=df_uv[0], y=df_uv[1])
plt.xlabel("Score for questions 6-7")
plt.ylabel("Understanding of color complementarity")
plt.savefig("plots/q67_understanding.png")

In [None]:
df_v[df_v > 0]

In [None]:
[c for c in df.columns if type(c[1]) == str and c[1][:2] == "r_"]

In [None]:

hexplot(df, ("conf", "avg"), colx=(0, "year"), datax=years, bins=50)

plt.xticks(range(3), years, rotation=0)

plt.xlabel("Study Year")
plt.ylabel("Average confidence")
plt.savefig("plots/conf_by_year.png")

In [None]:

hexplot(df, ("conf", "avg"), colx=(0, "gender"), datax=genders, bins=50)

plt.xticks(range(4), ["Male", "Female", "Other", "No answer"], rotation=0)

plt.xlabel("Study Year")
plt.ylabel("Average confidence")
plt.savefig("plots/conf_by_gender.png")

In [None]:
import pandas as pd
years = ["1st", "2nd", "3rd"]
df_ab = pd.concat([df_a, df_b], axis=1).sum(axis=1)
yscore = pd.concat([df[0]["year"], df_ab], axis=1)
hexplot(yscore, 0, colx="year", datax=years, bins=50)

plt.xticks(range(3), years, rotation=0)

plt.xlabel("Study Year")
plt.ylabel("Score of questions 2-7")
plt.savefig("plots/score_by_year.png")

In [None]:
hexplot(df, (8, "rel"), colx=(0, "gender"), datax=genders)

In [None]:
lims = hexplot(df, ("time", "total"))

In [None]:
hexplot(df, (19, "s_entertain"), colx=(0, "gender"), datax=genders, bins=20)

plt.xticks(range(4), ["Male", "Female", "Other", "No answer"], rotation=0)

plt.xlabel("Gender")
plt.ylabel("How entertaining was it?")
plt.savefig("plots/entertain_by_gender.png")

In [None]:
hexplot(df, (19, "s_entertain"), colx=(0, "year"), datax=years, bins=20)

plt.xticks(range(3), years, rotation=0)

plt.xlabel("Study Year")
plt.ylabel("How entertaining was it?")
plt.savefig("plots/entertain_by_year.png")

In [None]:
hexplot(df, (19, "s_difficult"), colx=(0, "gender"), datax=genders, bins=20)

plt.xticks(range(4), ["Male", "Female", "Other", "No answer"], rotation=0)

plt.xlabel("Gender")
plt.ylabel("How hard was it?")
plt.savefig("plots/diff_by_gender.png")

In [None]:
hexplot(df, (19, "s_difficult"), colx=(0, "year"), datax=years, bins=20)

plt.xticks(range(3), years, rotation=0)

plt.xlabel("Study Year")
plt.ylabel("How hard was it?")
plt.savefig("plots/diff_by_year.png")

In [None]:
df[19]["s_entertain"]

In [None]:
hexplot(df, (19, "s_entertain"), 10)

In [None]:
hexplot(df, (11, "score"))

In [None]:
hexplot(df, (12, "score"))

In [None]:
hexplot(df, (11, "score"), colx=(0, "gender"), datax=genders)

In [None]:
hexplot(df, (12, "score"), colx=(0, "gender"), datax=genders)

In [None]:
df.columns

In [None]:
df[7]["rel"]

In [None]:
(df["conf"] == 0).sum(axis=0)