In [45]:
import requests
import zipfile
import subprocess
import os
import uuid

algorithm_lookup = {
    "bw": "1.1",
    "gd": "1.2",
    "cgd_pr": "1.3.1",
    "cgd_fr": "1.3.2",
    "cgd_hs": "1.3.3"
}

class BKT(object):
    def __init__(self, 
                 hmm_folder='hmm-scalable-818d905234a8600a8e3a65bb0f7aa4cf06423f1a', 
                 git_commit='818d905234a8600a8e3a65bb0f7aa4cf06423f1a'):
        
        # Git commit to download hmm-scalable
        self.git_commit = git_commit
        # Set HMM-scalable folder.
        self.hmm_folder = hmm_folder
    
    def download(self):
        """  This implementation is a wrapper around the 
        HMM-scalable tool ( http://yudelson.info/hmm-scalable).
        This function will download the original implementation."""
        
        # Download zipfile from GitHub
#         results = requests.get('https://github.com/myudelson/hmm-scalable/archive/master.zip')
        results = requests.get('https://github.com/myudelson/hmm-scalable/archive/%s.zip' % self.git_commit)
        with open('/tmp/hmm-scalable.zip', 'wb') as f:
            f.write(results.content)
            
        # Extract zipfile
        file = zipfile.ZipFile('/tmp/hmm-scalable.zip')
        file.extractall(path='.')
        
        # Install
        os.chdir(self.hmm_folder)
        status = subprocess.call("make all", shell=True)
        os.chdir('..')
        if status != 0:
            raise RuntimeError("Could not build HMM tool. Check if the the make "
                               "utility is installed and if the folder has appropriate permissions")
    
    def fit(self, data, q_matrix, solver='bw', iterations=200):
        """ Fit BKT model to data. 
        As of July 2019, just default parameters are allowed.
        
        Parameters
        ----------
        data : {array-like}, shape (n_steps, 3)
            Sequence of students steps. Each of the three dimensions are:
            Observed outcome: 0 for fail and 1 for success
            Student id: student unique identifier
            Question id: question id in q_matrix
            
        q_matrix: matrix, shape (n_questions, n_concepts)
            Each row is a question and each column a concept.
            If the concept is present in the question, the 
            correspondent cell should contain 1, otherwise, 0.
            
        solver: string, optional
            Algorithm used to fit the BKT model. Available solvers are:
            'bw': Baum-Welch (default)
            'gd': Gradient Descent
            'cgd_pr': Conjugate Gradient Descent (Polak-Ribiere)
            'cgd_fr': Conjugate Gradient Descent (Fletcher–Reeves)
            'cgd_hs': Conjugate Gradient Descent (Hestenes-Stiefel)
            
        iterations: integer, optional
            Maximum number of iterations
        
        Returns
        -------
        self : object
        
        Notes
        -----
        This is a wrapper around the HMM-scalable tool (http://yudelson.info/hmm-scalable)
        """
        filename = "hmm_files/%s.txt" % uuid.uuid4().hex
        with open(filename, "w") as step_file:
            for row in data:
                outcome, student_id, question_id = row
                skills = np.where(q_matrix[question_id] == 1)
                skills = "~".join(str(skill) for skill in skills[0])
                step_file.write("%s\t%s\t%s\t%s\n" % (outcome, student_id, question_id, skills))
                
        os.chdir(self.hmm_folder)
        command = "./trainhmm -s %s -m 1 ../%s ../%s_model.txt ../%s_predict.txt" % (
            algorithm_lookup[solver], filename, filename, filename)
        status = subprocess.check_output(command)
        os.chdir('..')
        print(status)

### Unit tests

In [21]:
import unittest
import os

class TestBKT(unittest.TestCase):
    def test_download(self):
        """ Testing HMM-scalable download """
        model = BKT()
        model.download()
        
        # Check if directory exists and it contains items
        self.assertGreater(len(os.listdir(model.hmm_folder)), 1)
        
    def test_fit(self):
        """ Testing if fit tool is able to run and fit data """
        
        # p(L0)
        pi = [0.26, 0.74]
        # p(T)
        A = [[1, 0], [0.17, 0.83]]
        # p(S) and p(G)
        B = [[0.7, 0.3], [0.13, 0.87]]
        
        data = []
        n_questions = 10
        for i in range(50):
            observations = SimulateStudent(pi, A, B).simulate(n_questions)[0]
            for question_id, outcome in enumerate(observations):
                data.append([outcome+1, i, question_id])
        q_matrix = np.array([[1,0,1]]*n_questions)
        model = BKT()
        model.fit(data, q_matrix)

In [22]:
TestBKT().test_download()

In [46]:
TestBKT().test_fit()

FileNotFoundError: [Errno 2] No such file or directory: './trainhmm -s 1.1 -m 1 ../hmm_files/43108ed2cada403baab5172fbc682332.txt ../hmm_files/43108ed2cada403baab5172fbc682332.txt_model.txt ../hmm_files/43108ed2cada403baab5172fbc682332.txt_predict.txt'

In [42]:
subprocess.check_output("pwd")

b'/home/lmoraes/Documentos/machine-teaching/machineteaching\n'

In [41]:
os.chdir('..')

### Generate sample data

In [17]:
import numpy as np
class SimulateStudent(object):
    def __init__(self, pi, A, B):
        self.priors = pi
        self.states_name = ["Learned", "Not learned"]
        self.transition = A
        self.emission = B
        
    def random_MN_draw(self, n, probs):
        """ get X random draws from the multinomial distribution whose probability is given by 'probs' """
        mn_draw = np.random.multinomial(n,probs) # do 1 multinomial experiment with the given probs with probs= [0.5,0.5], this is a coin-flip
#         print(mn_draw)
        return np.where(mn_draw == 1)[0][0] # get the index of the state drawn e.g. 0, 1, etc.

    def simulate(self, nSteps):
        """ given an HMM = (A, B, pi), simulate state and observation sequences """
        observations = np.zeros(nSteps, dtype=np.int) # array of zeros
        states = np.zeros(nSteps, dtype=np.int)
        states[0] = self.random_MN_draw(1, self.priors) # appoint the first state from the prior dist
        
#         print("Starting in state %s" % self.states_name[states[0]])
#         print("Emission with probability %s" % self.emission[states[0]])
        observations[0] = self.random_MN_draw(1, self.emission[states[0]]) # given current state t, pick what row of the B matrix to use
        
        for t in range(1, nSteps): # loop through t
#             print("Transitioning with probability %s" % self.transition[states[t-1]])
            states[t] = self.random_MN_draw(1, self.transition[states[t-1]]) # given prev state (t-1) pick what row of the A matrix to use
#             print("Going to state %s" % self.states_name[states[t]])
            observations[t] = self.random_MN_draw(1, self.emission[states[t]]) # given current state t, pick what row of the B matrix to use
#             print("Emission with probability %s" % self.emission[states[t]])

#         print()
        return observations,states

In [97]:
data = []
for i in range(50):
    observations = SimulateStudent().simulate(10)[0]
    for enumoutcome in observations:
        data.append([outcome+1, i, 0])

Starting in state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going to state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going to state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going to state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going to state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going to state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going to state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going to state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going to state Not learned
Emission with probability [0.13, 0.87]
Transitioning with probability [0.17, 0.83]
Going t

Emission with probability [0.7, 0.3]

Starting in state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]
Transitioning with probability [1, 0]
Going to state Learned
Emission with probability [0.7, 0.3]

Starting in stat