In [17]:
duolingo = False
if duolingo:
    dataset, datapath = 'Duolingo', 'duolingo_train'
else:
    dataset, datapath = 'Assistments 2009', 'assistments2009_train'
results = {}
with open(f'data/{datapath}/results') as f:
    for line in f.readlines():
        values = line.split()
        name, values = values[0], values[1:]
        values = list(map(float, values))
        results[name] = {
            'RMSE':   values[0],
            'wRMSE':  values[1],
            'lwRMSE': values[2],
            'AUC':    values[3],
        }

results

{'drop0': {'RMSE': 0.0, 'wRMSE': 0.0, 'lwRMSE': 0.0, 'AUC': 0.913},
 'drop0.25': {'RMSE': 0.093, 'wRMSE': 0.035, 'lwRMSE': 0.073, 'AUC': 0.776},
 'drop0.5': {'RMSE': 0.147, 'wRMSE': 0.064, 'lwRMSE': 0.126, 'AUC': 0.68},
 'drop0.75': {'RMSE': 0.283, 'wRMSE': 0.105, 'lwRMSE': 0.234, 'AUC': 0.588},
 'drop0.99': {'RMSE': 0.719, 'wRMSE': 0.481, 'lwRMSE': 0.664, 'AUC': 0.497},
 'drop0.999': {'RMSE': 0.833, 'wRMSE': 0.692, 'lwRMSE': 0.797, 'AUC': 0.497},
 'rnn': {'RMSE': 0.213, 'wRMSE': 0.061, 'lwRMSE': 0.152, 'AUC': 0.508},
 'markov': {'RMSE': 0.245, 'wRMSE': 0.065, 'lwRMSE': 0.17, 'AUC': 0.495}}

In [9]:
import matplotlib.pyplot as plt
import matplotlib

y = [2.56422, 3.77284, 3.52623, 3.51468, 3.02199]
z = [0.15, 0.3, 0.45, 0.6, 0.75]
n = [58, 651, 393, 203, 123]

matplotlib.use('pgf')
pgf_with_rc_fonts = {
    'font.family': 'serif',
    'font.serif': [],
    # 'text.usetex': True,
}
matplotlib.rcParams.update(pgf_with_rc_fonts)

plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 22.

fig, axs = plt.subplots(1, 1)
axs = [axs]
fig.suptitle(dataset)

prettyname = {
    'drop0': 'Drop 0.000',
    'drop0.25': 'Drop 0.250',
    'drop0.5': 'Drop 0.500',
    'drop0.75': 'Drop 0.750',
    'drop0.99': 'Drop 0.990',
    'drop0.999': 'Drop 0.999',
    'rnn': 'RNN',
    'markov': 'Markov',
}
grey = '#555555'
color = {
    'drop0': grey,
    'drop0.25': grey,
    'drop0.5': grey,
    'drop0.75': grey,
    'drop0.99': grey,
    'drop0.999': grey,
    'rnn': 'r',
    'markov': 'b',
}

for i, rmse in enumerate(['wRMSE']):

    X = sorted(filter(lambda s: s[:4] == 'drop', results.keys()))
    axs[i].plot([results[x]['AUC'] for x in X], [results[x][rmse] for x in X], c=grey)

    for model, res in results.items():
        axs[i].scatter([res['AUC']], [res[rmse]], c=color[model])
        if model[:4] != 'drop' and i == 0:
            if model[0] == 'r':
                axs[i].annotate(prettyname[model], (res['AUC']+0.01, res[rmse]-0.02), c=color[model])
            else:
                axs[i].annotate(prettyname[model], (res['AUC']-(0.1 if duolingo else 0.09), res[rmse]-0.02), c=color[model])
        else:
            axs[i].annotate(prettyname[model], (res['AUC']+0.01, res[rmse]), c=color[model])

    axs[i].set_xlabel('AUC')
    axs[i].set_ylabel(rmse)
    xmin, xmax = axs[i].get_xlim()
    axs[i].set_xlim(xmin, xmax+0.13)
    ymin, ymax = axs[i].get_ylim()
    axs[i].set_ylim(ymin, ymax+0.05)
    ticks = list(filter(lambda x: x <= 1, axs[i].get_xticks()))
    axs[i].set_xticks(ticks)

plt.savefig('auc-wrmse-duolingo.pdf')

In [20]:
for model in ['drop0', 'drop0.25', 'drop0.5', 'drop0.75', 'drop0.99', 'drop0.999', 'markov', 'rnn']:
    print(f"{results[model]['wRMSE']:.3f} &", end=' ')

0.000 & 0.035 & 0.064 & 0.105 & 0.481 & 0.692 & 0.065 & 0.061 & 