In [None]:
from __future__ import division

import random
import math
import copy

import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

from lentil import datatools
from lentil import datasynth
from lentil import evaluate
from lentil import models
from lentil import est

%matplotlib inline

In [None]:
import logging
logging.getLogger().setLevel(logging.DEBUG)

Generate a synthetic 1PL/2PL IRT model and sample an interaction history from it

In [None]:
num_students = 2000
num_assessments = 3000
num_ixns_per_student = 1000

USING_2PL = False # False => using 1PL

In [None]:
proficiencies = np.random.normal(0, 1, num_students)
difficulties = np.random.normal(0, 1, num_assessments)

if USING_2PL:
    discriminabilities = np.random.normal(0, 1, num_assessments)
else:
    discriminabilities = np.ones(num_assessments)

student_ids = ['S'+str(x) for x in xrange(num_students)]
assessment_ids = ['A'+str(x) for x in xrange(num_assessments)]

In [None]:
ixns = [None] * (num_students * num_ixns_per_student)
assessment_idxes = range(num_assessments)
for student_idx, student_id in enumerate(student_ids):
    for t in xrange(num_ixns_per_student):
        module_idx = random.choice(assessment_idxes)
        pass_likelihood = 1 / (1 + math.exp(-(discriminabilities[module_idx]*proficiencies[student_idx] + difficulties[module_idx])))
        ixns[student_idx * num_ixns_per_student + t] = {
            'student_id' : student_id, 
            'module_id' : assessment_ids[module_idx], 
            'module_type' : datatools.AssessmentInteraction.MODULETYPE,
            'outcome' : np.random.random() < pass_likelihood, 
            'timestep' : t+1
        }
history = datatools.InteractionHistory(pd.DataFrame(ixns))
history.idx_of_student_id = lambda x: int(x[1:])
history.idx_of_assessment_id = lambda x: int(x[1:])

In [None]:
mirt_model = models.MIRTModel(history, dims=1, using_assessment_factors=USING_2PL)
estimator = est.MIRTMAPEstimator(
    regularization_constant=1e-3,
    ftol=1e-5,
    debug_mode_on=True)
mirt_model.fit(estimator)

In [None]:
onepl_model = models.OneParameterLogisticModel(
    history.data, select_regularization_constant=True)
onepl_model.fit()

In [None]:
twopl_model = models.TwoParameterLogisticModel(
    history.data, select_regularization_constant=True)
twopl_model.fit()

In [None]:
student_idxes = [int(k[1:]) for k in history.data['student_id'].unique()]
assessment_idxes = [int(k[1:]) for k in history.data['module_id'].unique()]

Verify that `models.OneParameterLogisticModel` can recover parameters. We would only expect this to be possible when `USING_2PL = False`.

In [None]:
plt.xlabel('True difficulties')
plt.ylabel('Estimated difficulties')
plt.scatter(difficulties[assessment_idxes], onepl_model.model.coef_[0, num_students:])
plt.show()

plt.xlabel('Estimated difficulty - true difficulty')
plt.ylabel('Frequency (number of assessments)')
plt.hist(onepl_model.model.coef_[0, num_students:] - difficulties[assessment_idxes], bins=20)
plt.show()

In [None]:
plt.xlabel('True proficiencies')
plt.ylabel('Estimated proficiencies')
plt.scatter(proficiencies[student_idxes], onepl_model.model.coef_[0, :num_students])
plt.show()

plt.xlabel('Estimated proficiency - true proficiency')
plt.ylabel('Frequency (number of students)')
plt.hist(onepl_model.model.coef_[0, :num_students] - proficiencies[student_idxes], bins=20)
plt.show()

Verify that `models.TwoParameterLogisticModel` can recover parameters. We would only expect this to be possible when `USING_2PL = True`.

In [None]:
plt.xlabel('True difficulties')
plt.ylabel('Estimated difficulties')
plt.scatter(difficulties[assessment_idxes], twopl_model.model.coef_[0, (num_students*num_assessments):])
plt.show()

plt.xlabel('Estimated difficulty - true difficulty')
plt.ylabel('Frequency (number of assessments)')
plt.hist(twopl_model.model.coef_[0, (num_students*num_assessments):] - difficulties[assessment_idxes], bins=20)
plt.show()

In [None]:
est_params = twopl_model.model.coef_[0, :(num_students*num_assessments)]
true_params = discriminabilities[:, None].dot(proficiencies[:, None].T).ravel()

plt.xlabel('True proficiency*discriminability')
plt.ylabel('Estimated proficiency*discriminability')
plt.scatter(true_params, est_params)
plt.show()

plt.xlabel('Estimated proficiency*discriminability - true proficiency*discriminability')
plt.ylabel('Frequency (number of student-assessment pairs)')
plt.hist(est_params - true_params, bins=20)
plt.show()

Verify that `models.MIRTModel` can recover parameters

In [None]:
plt.xlabel('True difficulties')
plt.ylabel('Estimated difficulties')
plt.scatter(difficulties, mirt_model.assessment_offsets)
plt.show()

plt.xlabel('Estimated difficulty - true difficulty')
plt.ylabel('Frequency (number of assessments)')
plt.hist(mirt_model.assessment_offsets - difficulties, bins=20)
plt.show()

In [None]:
plt.xlabel('True proficiencies')
plt.ylabel('Estimated proficiencies')
plt.scatter(proficiencies, mirt_model.student_factors[:, 0])
plt.show()

plt.xlabel('Estimated proficiency - true proficiency')
plt.ylabel('Frequency (number of students)')
plt.hist(mirt_model.student_factors[:, 0] - proficiencies, bins=20)
plt.show()

In [None]:
plt.xlabel('True discriminabilities')
plt.ylabel('Estimated discriminabilities')
plt.scatter(discriminabilities, mirt_model.assessment_factors[:, 0])
plt.show()

plt.xlabel('Estimated discriminability - true discriminability')
plt.ylabel('Frequency (number of assessments)')
plt.hist(mirt_model.assessment_factors[:, 0] - discriminabilities, bins=20)
plt.show()

Verify that all models achieve similar training AUCs

In [None]:
# models.OneParameterLogisticModel
evaluate.training_auc(onepl_model, history, plot_roc_curve=True)

In [None]:
# models.TwoParameterLogisticModel
evaluate.training_auc(twopl_model, history, plot_roc_curve=True)

In [None]:
# models.MIRTModel
evaluate.training_auc(mirt_model, history, plot_roc_curve=True)

In [None]:
# true model
true_model = copy.deepcopy(mirt_model)
true_model.student_factors[:, 0] = proficiencies
true_model.assessment_factors[:, 0] = discriminabilities
true_model.assessment_offsets = difficulties
evaluate.training_auc(true_model, history, plot_roc_curve=True)