In [11]:
import itertools
import omegaconf
import pytorch_lightning as pl
from typing import Callable, Union
import torch
import numpy as np
import pandas as pd

import data
import utils
import models

In [10]:
df = pd.read_csv('results/performance_metrics.csv')
perf_metric_names, perf_metric_names_short, concepts_list, Q_matrix, df_logs, know_graph_edges_df, know_graph_adj = data.get_data(df)
dataset = data.CDMLogsDataset(df_logs)
train_data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True, drop_last=True, num_workers=4)
test_data_loader = torch.utils.data.DataLoader(dataset, batch_size=1024, num_workers=4)

(stu, exe), scores = next(iter(train_data_loader))
assert (stu.dtype, exe.dtype, scores.dtype) == (torch.int64, torch.int64, torch.float32)

  0%|          | 0/149 [00:00<?, ?it/s]

In [7]:
cfg = omegaconf.OmegaConf.create({
    'MIRT': {
        'optimizer': {
            'name': 'torch.optim.Adam',
            'params': {
                'lr': 1e-3
            }
        }
    },
    'NeuralCD': {
        'optimizer': {
            'name': 'torch.optim.Adam',
            'params': {
                'lr': 2e-3
            }
        },
        'itf_layer1_dim': 64,
        'itf_layer2_dim': 32
    },
    'KaNCD': {
        'optimizer': {
            'name': 'torch.optim.Adam',
            'params': {
                'lr': 2e-3
            }
        },
        'hidden_dim': 32,
        'itf_layer1_dim': 64,
        'itf_layer2_dim': 32,
        'itf_type': 'gmf'
    },
    'HierMIRT': {
        'optimizer': {
            'name': 'torch.optim.Adam',
            'params': {
                'lr': 5e-3
            }
        },
        'hidden_dim': 64,
        'itf_type': 'mirt',
        'loss_factor': 1e-3
    },
    'HierNCD': {
        'optimizer': {
            'name': 'torch.optim.Adam',
            'params': {
                'lr': 5e-3
            }
        },
        'hidden_dim': 64,
        'itf_type': 'ncd',
        'loss_factor': 1e-3
    },
    'QCCDM_small': {
        'optimizer': {
            'name': 'torch.optim.Adam',
            'params': {
                'lr': 2e-3
            }
        },
        'layer_num': 2,
        'hidden_dim': 64,
        'nonlinear_fn_type': 'sigmoid',
        'q_matrix_aug_enabled': True
    },
    'QCCDM': {
        'optimizer': {
            'name': 'torch.optim.Adam',
            'params': {
                'lr': 2e-3
            }
        },
        'layer_num': 2,
        'hidden_dim': 512,
        'nonlinear_fn_type': 'sigmoid',
        'q_matrix_aug_enabled': True
    }
})

student_num = df.shape[0]
exercice_num = len(perf_metric_names)
concept_num = len(concepts_list)
q_matrix = torch.FloatTensor(Q_matrix)
concept_graph = know_graph_edges_df
concept_graph_adj = torch.FloatTensor(know_graph_adj)

model_initializers: dict[str, Callable[[], Union[pl.LightningModule, models.CDMMixin]]] = {
    'MIRT': lambda: models.MIRT(
        cfg=cfg['MIRT'],
        student_num=student_num,
        exercise_num=exercice_num,
        concept_num=concept_num,
        q_matrix=q_matrix
    ),
    'NeuralCD': lambda: models.NeuralCD(
        cfg=cfg['NeuralCD'],
        student_num=student_num,
        exercise_num=exercice_num,
        concept_num=concept_num,
        q_matrix=q_matrix
    ),
    'KaNCD': lambda: models.KaNCD(
        cfg=cfg['KaNCD'],
        student_num=student_num,
        exercise_num=exercice_num,
        concept_num=concept_num,
        q_matrix=q_matrix
    ),
    'HierMIRT': lambda: models.HierCDF(
        cfg=cfg['HierMIRT'],
        student_num=student_num,
        exercise_num=exercice_num,
        concept_num=concept_num,
        q_matrix=q_matrix,
        concept_graph=concept_graph
    ),
    'HierNCD': lambda: models.HierCDF(
        cfg=cfg['HierNCD'],
        student_num=student_num,
        exercise_num=exercice_num,
        concept_num=concept_num,
        q_matrix=q_matrix,
        concept_graph=concept_graph
    ),
    'QCCDM_small': lambda: models.QCCDM(
        cfg=cfg['QCCDM_small'],
        student_num=student_num,
        exercise_num=exercice_num,
        concept_num=concept_num,
        q_matrix=q_matrix,
        concept_graph_adj=concept_graph_adj
    ),
    'QCCDM': lambda: models.QCCDM(
        cfg=cfg['QCCDM'],
        student_num=student_num,
        exercise_num=exercice_num,
        concept_num=concept_num,
        q_matrix=q_matrix,
        concept_graph_adj=concept_graph_adj
    )
}
    
model_names = ['MIRT', 'NeuralCD', 'KaNCD', 'HierMIRT', 'HierNCD', 'QCCDM_small', 'QCCDM']

results = { name: [] for name in model_names }

print('Start')

l = list(itertools.product(
    model_names,
    range(10)
))
for model_name, seed in l:
    utils.disable_pl_logger()
    pl.seed_everything(seed)

    model = model_initializers[model_name]()

    trainer = pl.Trainer(
        enable_progress_bar=False,
        max_epochs=100,
        deterministic=True
    )
    trainer.fit(
        model=model,
        train_dataloaders=train_data_loader,
        val_dataloaders=test_data_loader
    )

    validation = trainer.validate(
        model=model,
        dataloaders=test_data_loader,
        verbose=False
    )

    results[model_name].append({
        'mastery': model.get_mastery(),
        'diff': model.get_diff(),
        'disc': model.get_disc(),
        'r2': validation[0]['r2_epoch'],
        'DOA': validation[0]['DOA_epoch'],
        'DOC': validation[0]['DOC_epoch']
    })
    
    print(f"{model_name}_{seed}: r2={validation[0]['r2_epoch']:.3f}, DOA={validation[0]['DOA_epoch']:.3f} DOC={validation[0]['DOC_epoch']:.3f}")

Start
MIRT_0: r2=-0.770, DOA=0.686 DOC=0.689
MIRT_1: r2=-0.773, DOA=0.706 DOC=0.698
MIRT_2: r2=-0.762, DOA=0.696 DOC=0.698
MIRT_3: r2=-0.764, DOA=0.697 DOC=0.698
MIRT_4: r2=-0.771, DOA=0.694 DOC=0.696
MIRT_5: r2=-0.778, DOA=0.698 DOC=0.692
MIRT_6: r2=-0.773, DOA=0.698 DOC=0.705
MIRT_7: r2=-0.768, DOA=0.704 DOC=0.705
MIRT_8: r2=-0.770, DOA=0.707 DOC=0.702
MIRT_9: r2=-0.769, DOA=0.711 DOC=0.707
NeuralCD_0: r2=0.916, DOA=0.846 DOC=0.840
NeuralCD_1: r2=0.914, DOA=0.858 DOC=0.841
NeuralCD_2: r2=0.914, DOA=0.853 DOC=0.843
NeuralCD_3: r2=0.901, DOA=0.849 DOC=0.848
NeuralCD_4: r2=0.910, DOA=0.850 DOC=0.851
NeuralCD_5: r2=0.907, DOA=0.835 DOC=0.850
NeuralCD_6: r2=0.916, DOA=0.860 DOC=0.849
NeuralCD_7: r2=0.914, DOA=0.847 DOC=0.847
NeuralCD_8: r2=0.827, DOA=0.781 DOC=0.771
NeuralCD_9: r2=0.908, DOA=0.843 DOC=0.834
KaNCD_0: r2=0.912, DOA=0.850 DOC=0.843
KaNCD_1: r2=0.912, DOA=0.849 DOC=0.841
KaNCD_2: r2=0.907, DOA=0.850 DOC=0.848
KaNCD_3: r2=0.903, DOA=0.853 DOC=0.846
KaNCD_4: r2=0.910, DOA=0.858

In [9]:
np.save('results/cdm_results.npy', results)
print('Saved all')

Saved all


In [10]:
r2_data = []
doa_data = []
doc_data = []
for model_name in model_names:
    result = results[model_name]
    r2 = np.array([x['r2'] for x in result])
    doa = np.array([x['DOA'] for x in result])
    doc = np.array([x['DOC'] for x in result])
    r2_data.append(r2)
    doa_data.append(doa)
    doc_data.append(doa)

def display_confidence_interval(name, mean, offset):
    print(f"{name}: {mean:.3f} $\pm$ {offset:.3f}")
    
for i, model_name in enumerate(model_names):
    display_confidence_interval(f"{model_name} R2", *utils.compute_confidence_interval(r2_data[i]))
    display_confidence_interval(f"{model_name} DOA", *utils.compute_confidence_interval(doa_data[i]))    
    display_confidence_interval(f"{model_name} DOC", *utils.compute_confidence_interval(doc_data[i]))    

MIRT R2: -0.770 $\pm$ 0.003
MIRT DOA: 0.700 $\pm$ 0.005
MIRT DOC: 0.700 $\pm$ 0.005
NeuralCD R2: 0.903 $\pm$ 0.019
NeuralCD DOA: 0.842 $\pm$ 0.016
NeuralCD DOC: 0.842 $\pm$ 0.016
KaNCD R2: 0.908 $\pm$ 0.003
KaNCD DOA: 0.849 $\pm$ 0.005
KaNCD DOC: 0.849 $\pm$ 0.005
HierMIRT R2: 0.894 $\pm$ 0.008
HierMIRT DOA: 0.838 $\pm$ 0.003
HierMIRT DOC: 0.838 $\pm$ 0.003
HierNCD R2: 0.912 $\pm$ 0.005
HierNCD DOA: 0.853 $\pm$ 0.002
HierNCD DOC: 0.853 $\pm$ 0.002
QCCDM_small R2: 0.761 $\pm$ 0.177
QCCDM_small DOA: 0.683 $\pm$ 0.031
QCCDM_small DOC: 0.683 $\pm$ 0.031
QCCDM R2: 0.931 $\pm$ 0.041
QCCDM DOA: 0.671 $\pm$ 0.032
QCCDM DOC: 0.671 $\pm$ 0.032
