In [2]:
import numpy as np
import em_algorithm
from simulation_framework.simulate_competency import respondent_population
from simulation_framework.simulate_responses import response_simulation
from scipy.stats import multivariate_normal

C:\Users\Jesper_dfki\OneDrive - dfki.de\Dokumente\GitHub\Knowledge-Growth-Prediction\models


## Experiment 1: MIRT-2PL Parameter Recovery

In [8]:
from unittest import result


def mirt_param_recovery(sample_size) -> dict:
    latent_dimension = 2
    item_dimension = 6

    #Define Population
    cov = np.array([[1,0.2],
                    [0.2,1]])

    latent_distribution = multivariate_normal(mean=np.array([0,0]), cov=cov)
    population = respondent_population(latent_dimension=latent_dimension, latent_distribution=latent_distribution)

    #Define Test
    A = np.array([[0.5,0],
                  [2,0],
                  [0,0.4],
                  [0.7,0.6],
                  [2, 0.9],
                  [0, 1.5]])
    Q = np.array([[1,0],
                  [1,0],
                  [0,1],
                  [1,1],
                  [1,1],
                  [0,1]])
    delta = np.array([0, 0.5, 1, 0, 1.5, 0.9])
    early_item_parameters = {"A": A, "delta": delta, "q_matrix": Q, "item_dimension": item_dimension, "latent_dimension": latent_dimension}

    #Sample responses
    response_simulation_obj = response_simulation(population=population, early_item_params=early_item_parameters)
    sample = response_simulation_obj.sample(100)

    #Fit Parameters

    result_dict = {"sample": sample,
                   "early_item_parameters": early_item_parameters,
                   "latent_covariance": cov,
                   "estimated_parameters": None}
    return(result_dict)


In [5]:
import models
import em_algorithm
import pandas as pd

#sample data
exper_dict = mirt_param_recovery(30)
U = exper_dict["sample"]["early_responses"]
Q = exper_dict["early_item_parameters"]["q_matrix"]

#Initialize model
model = models.mirt_2pl(latent_dimension=2, item_dimension=6, A=Q, Q=Q, delta=np.ones(6), sigma=np.identity(2))

e_step = em_algorithm.e_step_ga_mml(incomplete_data=U, model=model)
m_step = em_algorithm.m_step_ga_mml(model)

em = em_algorithm.em_algo(e_step=e_step, m_step=m_step, model=model)
em.fit(U, max_iter=5)


EM Iteration 1
E-step
M-step
Maximize Q-0
Highest Current Fitness:
-281.1969722771756
Length of Population = 20
Highest Current Fitness:
-280.7564887285824
Length of Population = 30
Highest Current Fitness:
-280.7564887285824
Length of Population = 40
Maximize the Q_i's
Highest Current Fitness:
-83.77342931284132
Length of Population = 20
Highest Current Fitness:
-85.16031328419157
Length of Population = 20
Highest Current Fitness:
-70.8869502446766
Length of Population = 20
Highest Current Fitness:
-70.8869502446766
Length of Population = 30
Highest Current Fitness:
-71.92282646331968
Length of Population = 20
Highest Current Fitness:
-71.92282646331968
Length of Population = 30
Step: 1: current parameter_diff: 4.513768598866744, current data likelihood: 0.0
EM Iteration 2
E-step
M-step
Maximize Q-0
Highest Current Fitness:
-271.06937946266163
Length of Population = 20
Maximize the Q_i's
Highest Current Fitness:
-75.69543636690244
Length of Population = 20
Highest Current Fitness:
-73

  factor = np.log(self.model.latent_density(theta, sigma=sigma))


Highest Current Fitness:
-283.3868320657525
Length of Population = 20
Highest Current Fitness:
-283.3868320657525
Length of Population = 30
Maximize the Q_i's
Highest Current Fitness:
-71.94830255981982
Length of Population = 20
Highest Current Fitness:
-79.86424515933354
Length of Population = 20
Highest Current Fitness:
-79.86424515933354
Length of Population = 30
Highest Current Fitness:
-69.69373112538352
Length of Population = 20
Highest Current Fitness:
-67.53481746015434
Length of Population = 30
Highest Current Fitness:
-67.53481746015434
Length of Population = 40
Highest Current Fitness:
-74.11557743206858
Length of Population = 20
Step: 5: current parameter_diff: 4.9922571516043535, current data likelihood: 0.0


### Estimeted Parameters

In [19]:
def rmse(y_pred: np.array, y_true: np.array) -> float:
    MSE = np.square(np.subtract(y_pred.flatten(), y_true.flatten())).mean()
    RMSE = np.sqrt(MSE)
    return(float(RMSE))

def experiment_performance(result_dict, em):
    A_pred = em.model.item_parameters["discrimination_matrix"]
    delta_pred = em.model.item_parameters["intercept_vector"]
    sigma_pred = em.model.person_parameters["covariance"]

    A_true = result_dict["early_item_parameters"]["A"] # TODO: change key's
    delta_true = result_dict["early_item_parameters"]["delta"]
    sigma_true = result_dict["latent_covariance"]

    print("Absolute diff in A:")
    print(np.abs(A_true-A_pred))

    print("Absolute diff in delta:")
    print(np.abs(delta_true-delta_pred))

    print("Absolute diff in sigma:")
    print(np.abs(sigma_true-sigma_pred))    

    rmse_A = rmse(A_pred, A_true)
    rmse_delta = rmse(delta_true, delta_pred)
    rmse_sigma = rmse(sigma_true, sigma_pred) 

    return({"rmse_A": rmse_A, "rmse_delta": rmse_delta, "rmse_sigma": rmse_sigma})

In [20]:
experiment_performance(result_dict=exper_dict, em=em)

Absolute diff in A:
[[0.08534941 0.        ]
 [0.89466606 0.        ]
 [0.         0.5462264 ]
 [0.3758118  0.11807726]
 [1.         0.1       ]
 [0.         0.5       ]]
Absolute diff in delta:
[1.10062348 1.98836146 1.72083674 1.56568859 0.64138087 0.32523404]
Absolute diff in sigma:
[[0.         0.22172733]
 [0.22172733 0.        ]]


{'rmse_A': 0.45837138346516654,
 'rmse_delta': 1.3598259561775639,
 'rmse_sigma': 0.15678489631601697}

In [6]:
em.model.item_parameters

{'discrimination_matrix': array([[0.41465059, 0.        ],
        [1.10533394, 0.        ],
        [0.        , 0.9462264 ],
        [0.3241882 , 0.71807726],
        [1.        , 1.        ],
        [0.        , 1.        ]]),
 'intercept_vector': array([1.10062348, 2.48836146, 2.72083674, 1.56568859, 0.85861913,
        0.57476596]),
 'q_matrix': array([[1, 0],
        [1, 0],
        [0, 1],
        [1, 1],
        [1, 1],
        [0, 1]])}

In [7]:
em.model.person_parameters
#Die Kovarianz ist nicht symmetrisch

{'covariance': array([[1.        , 0.42172733],
        [0.42172733, 1.        ]])}