In [1]:
import numpy as np
from em_algorithm import em_algorithm
from simulation_framework.simulate_competency import respondent_population
from simulation_framework.simulate_responses import response_simulation
from scipy.stats import multivariate_normal

C:\Users\Jesper\Documents\GitHub\Knowledge-Growth-Prediction\models


## Experiment 1: MIRT-2PL Parameter Recovery

In [2]:
from unittest import result


def mirt_param_recovery(sample_size) -> dict:
    latent_dimension = 2
    item_dimension = 4

    #Define Population
    cov = np.array([[1,0.2],
                    [0.2,1]])

    latent_distribution = multivariate_normal(mean=np.array([0,0]), cov=cov)
    population = respondent_population(latent_dimension=latent_dimension, latent_distribution=latent_distribution)

    #Define Test
    A = np.array([[1,0],
                [1,0],
                [0,1],
                [1,1]])
    delta = np.array([0, 0.5, 1, 0])
    early_item_parameters = {"A": A, "delta": delta, "item_dimension": item_dimension, "latent_dimension": latent_dimension}

    #Sample responses
    response_simulation_obj = response_simulation(population=population, early_item_params=early_item_parameters)
    sample = response_simulation_obj.sample(100)

    #Fit Parameters

    result_dict = {"sample": sample,
                   "early_item_parameters": early_item_parameters,
                   "latent_covariance": cov,
                   "estimated_parameters": None}
    return(result_dict)


In [3]:
import models
import em_algorithm
import pandas as pd

#sample data
U = mirt_param_recovery(5)["sample"]["early_responses"]

#Initialize model
model = models.mirt_2pl(latent_dimension=2, item_dimension=4, A=np.ones((4,2)), delta=np.ones(4), sigma=np.array([[1,0.2],
                                                                                                                    [0.2,1]]))

e_step = em_algorithm.e_step_ga_mml(incomplete_data=U, model=model)
m_step = em_algorithm.m_step_ga_mml(model)

em = em_algorithm.em_algorithm.em_algorithm(e_step=e_step, m_step=m_step, model=model)
em.fit(U, max_iter=2)


EM Iteration 1
E-step
M-step
Maximize Q-0
Highest Current Fitness:
-263.0941639964238
Length of Population = 20
Highest Current Fitness:
-263.0941639964238
Length of Population = 30
Maximize the Q_i's
Highest Current Fitness:
-84.51012214667564
Length of Population = 20
Highest Current Fitness:
-91.71309004604373
Length of Population = 20
Highest Current Fitness:
-91.71309004604373
Length of Population = 30
Highest Current Fitness:
-85.82431741567623
Length of Population = 20
Highest Current Fitness:
-89.51867063587132
Length of Population = 20
Highest Current Fitness:
-89.51867063587132
Length of Population = 30
Step: 1: current parameter_diff: 9.124665985337366, current data likelihood: 0.0
EM Iteration 2
E-step


  out = random_state.multivariate_normal(mean, cov, size)


M-step
Maximize Q-0
Highest Current Fitness:
-281.4190533716478
Length of Population = 20
Maximize the Q_i's
Highest Current Fitness:
-77.51199061507559
Length of Population = 20
Highest Current Fitness:
-127.6293846886314
Length of Population = 20
Highest Current Fitness:
-125.48991146701093
Length of Population = 30
Highest Current Fitness:
-115.68856907098952
Length of Population = 40
Highest Current Fitness:
-84.11222915950123
Length of Population = 50
Highest Current Fitness:
-84.11222915950123
Length of Population = 60
Highest Current Fitness:
-84.69735600213079
Length of Population = 20
Highest Current Fitness:
-80.5292455857703
Length of Population = 30
Highest Current Fitness:
-79.9279642231494
Length of Population = 40
Highest Current Fitness:
-79.9279642231494
Length of Population = 50
Highest Current Fitness:
-107.39128385492326
Length of Population = 20
Highest Current Fitness:
-91.44014382627954
Length of Population = 30
Highest Current Fitness:
-91.44014382627954
Length 

  icc_value), r_0_theta) + np.multiply(np.subtract(r_0_theta, r_item_theta), np.log(1-icc_value))


Highest Current Fitness:
-123.5830370395717
Length of Population = 80
Highest Current Fitness:
-93.07414719475713
Length of Population = 20
Highest Current Fitness:
-87.64784587152998
Length of Population = 30
Highest Current Fitness:
-87.64784587152998
Length of Population = 40
Highest Current Fitness:
-103.52366953457441
Length of Population = 20
Highest Current Fitness:
-95.45017618367453
Length of Population = 30
Highest Current Fitness:
-95.45017618367453
Length of Population = 40
Step: 3: current parameter_diff: 14.496767398480065, current data likelihood: 0.0


### Estimeted Parameters

In [6]:
em.model.item_parameters

{'discrimination_matrix': array([[ 0.08574951, -0.81711164],
        [-1.80637254,  8.39569307],
        [-3.82544171,  1.1451595 ],
        [-1.69488726,  2.75686369]]),
 'intercept_vector': array([ 1.44978961,  2.88774431, -1.59902474,  1.87326765])}

In [7]:
em.model.person_parameters
#Die Kovarianz ist nicht symmetrich

{'covariance': array([[ 1.       , -1.7936944],
        [ 0.2      ,  1.       ]])}