In [22]:
import numpy as np
import numpy.random as rng 
import pandas as pd
import split_dataset

In [26]:
n_students = 4000
n_trials_per_student = 50

rng.seed(41)

# generate probabilities for 5 core KC (learning, forgetting, correct|0, correct|1, initial)
kc_probs = np.array([
    [0.2, 0.2, 0.2, 0.2, 0.2],
    [0.9, 0, 0.3, 0.95, 0.5],
    [0.05, 0.4, 0.05, 0.75, 0.1],
    [0.1, 0.1, 0.1, 0.8, 0.05],
    [0.4, 0.4, 0.2, 0.2, 0.5]
])
n_core_kcs = kc_probs.shape[0]

# generate assignment matrix
A = rng.choice(n_core_kcs, size=n_trials_per_student)


# generate trials
rows = []
for s in range(n_students):

    # initialize state
    pK = kc_probs[:, 4]
    state = rng.binomial(1, pK)
    
    # generate an ordering of KCs
    kc_ordering = rng.permutation(n_trials_per_student)

    for t in range(n_trials_per_student):
        kc = kc_ordering[t]
        core_kc = A[kc]
        core_kc_state = state[core_kc]

        # get p(correct|state)
        pC = kc_probs[core_kc, 2+core_kc_state]

        # sample 
        ans = rng.binomial(1, pC)

        rows.append({ "student" : s, "correct" : ans, "skill" : kc })

        # transition state
        if core_kc_state == 0:
            state[core_kc] = rng.binomial(1, kc_probs[core_kc, 0])
        else:
            state[core_kc] = 1 - rng.binomial(1, kc_probs[core_kc, 1])
df = pd.DataFrame(rows)
df

Unnamed: 0,student,correct,skill
0,0,0,44
1,0,0,39
2,0,1,14
3,0,0,41
4,0,0,33
...,...,...,...
199995,3999,0,45
199996,3999,0,33
199997,3999,1,27
199998,3999,1,19


In [27]:
df.to_csv("data/datasets/synthetic_ours.csv", index=False)

In [28]:
split_dataset.main("data/datasets/synthetic_ours.csv", "data/splits/synthetic_ours.npy", 5, 5)

[0.64 0.16 0.2 ]
[0.64 0.16 0.2 ]
[0.64 0.16 0.2 ]
[0.64 0.16 0.2 ]
[0.64 0.16 0.2 ]
