In [None]:
FILE_PREFIX = 'dataset_'

params = {
    'max_iter': 2, # 'number of iterations'
    'show': True, # 'print progress')
    'init_std': 0.1, # 'weight initialization std'
    'init_lr': 0.01, # 'initial learning rate'
    'lr_decay': 0.75, # 'learning rate decay'
    'final_lr': 1E-5, # 'learning rate will not decrease after hitting this threshold'
    'momentum': 0.9, # 'momentum rate'
    'maxgradnorm': 50.0, # 'maximum gradient norm'
    'final_fc_dim': 50, # 'hidden state dim for final fc layer'
    'key_embedding_dim': 50, # 'question embedding dimensions')
    'batch_size': 64, # 'the batch size')
    'value_embedding_dim': 200, # 'answer and question embedding dimensions')
    'memory_size': 20, # 'memory size')
    'n_question': 9, # 'the number of unique questions in the dataset')
    'seqlen': 200, # 'the allowed maximum length of a sequence')
    'data_dir': '../', # 'data directory')
    'data_name': '', # 'data set name')
    'load': FILE_PREFIX + 'dkvmn.params', # 'model file to load')
    'save': FILE_PREFIX + 'dkvmn.params' # 'path to save model')
}

params['lr'] = params['init_lr']
params['key_memory_state_dim'] = params['key_embedding_dim']
params['value_memory_state_dim'] = params['value_embedding_dim']

In [3]:
from load_data import Data

dat = Data(n_question=params['n_question'], seqlen=params['seqlen'], separate_char=',') 

train_data_path = params['data_dir'] + "/" + params['data_name'] + FILE_PREFIX + "train.txt"
test_data_path = params['data_dir'] + "/" + params['data_name'] + FILE_PREFIX + "test.txt"
train_data = dat.load_data(train_data_path)
test_data = dat.load_data(test_data_path)


In [4]:
from EduKTM import DKVMN

model = DKVMN(n_question=params['n_question'],
                batch_size=params['batch_size'],
                key_embedding_dim=params['key_embedding_dim'],
                value_embedding_dim=params['value_embedding_dim'],
                memory_size=params['memory_size'],
                key_memory_state_dim=params['key_memory_state_dim'],
                value_memory_state_dim=params['value_memory_state_dim'],
                final_fc_dim=params['final_fc_dim'])

In [5]:
model.train(params, train_data)

Epoch 0: 100%|██████████| 227/227 [00:36<00:00,  6.29it/s]


Epoch 1/2, loss : 0.47460, auc : 0.67162, accuracy : 0.79589


Epoch 1: 100%|██████████| 227/227 [00:39<00:00,  5.73it/s]


Epoch 2/2, loss : 0.46579, auc : 0.69194, accuracy : 0.79639


In [9]:
model.save(params['save'])

In [10]:
model.load(params['load'])
model.eval(params, test_data)

Evaluating: 100%|██████████| 96/96 [00:09<00:00, 10.14it/s]


valid auc : 0.69275, valid accuracy : 0.79490


(0.4670858650157849, 0.7949034598742039, np.float64(0.6927475560952349))

In [None]:
import optuna
from EduKTM import DKVMN

def objective(trial):
    params = {
        'max_iter': 10, # 'number of iterations'
        'show': False, # 'print progress')
        'init_std': trial.suggest_float('init_std', 0.01, 0.2), # 'weight initialization std'
        'init_lr': trial.suggest_float('init_lr', 0.001, 0.1), # 'initial learning rate'
        'lr_decay': trial.suggest_float('lr_decay', 0.5, 0.99), # 'learning rate decay'
        'final_lr': trial.suggest_float('final_lr', 1E-6, 1E-4), # 'learning rate will not decrease after hitting this threshold'
        'momentum': trial.suggest_float('momentum', 0.5, 0.99), # 'momentum rate'
        'maxgradnorm': trial.suggest_float('maxgradnorm', 10.0, 100.0), # 'maximum gradient norm'
        # 'final_fc_dim': 50, # 'hidden state dim for final fc layer'
        # 'key_embedding_dim': 50, # 'question embedding dimensions')
        'batch_size': 64, # 'the batch size')
        'value_embedding_dim': trial.suggest_int('value_embedding_dim', 50, 500), # 'answer and question embedding dimensions')
        'memory_size': trial.suggest_int('memory_size', 10, 100), # 'memory size')
        'n_question': 123, # 'the number of unique questions in the dataset')
        'seqlen': 200, # 'the allowed maximum length of a sequence')
        'data_dir': '../dkt', # 'data directory')
        'data_name': '', # 'data set name')
    }
    
    params['final_fc_dim'] = params['key_embedding_dim'] = trial.suggest_int('final_fc_dim', 10, 100) # 'hidden state dim for final fc layer'
    
    params['lr'] = params['init_lr']
    params['key_memory_state_dim'] = params['key_embedding_dim']
    params['value_memory_state_dim'] = params['value_embedding_dim']

    
    # Create and train the model
    model = DKVMN(n_question=params['n_question'],
                    batch_size=params['batch_size'],
                    key_embedding_dim=params['key_embedding_dim'],
                    value_embedding_dim=params['value_embedding_dim'],
                    memory_size=params['memory_size'],
                    key_memory_state_dim=params['key_memory_state_dim'],
                    value_memory_state_dim=params['value_memory_state_dim'],
                    final_fc_dim=params['final_fc_dim'])
    
    # Train and evaluate
    model.train(params, train_data)
    loss, auc, accuracy = model.eval(params, test_data)
    
    return auc  # Optimize for AUC


In [None]:
# Create and run the study
study = optuna.create_study(study_name=FILE_PREFIX+"dkvmn_importances", direction='maximize', storage="sqlite:///../studies.db", load_if_exists=True)
study.optimize(objective, n_trials=24, n_jobs=8)  # Adjust number of trials as needed

In [None]:
# Print the results
print("Best trial:")
trial = study.best_trial
print(f"  Value (AUC): {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

In [None]:
# Visualize the importance of hyperparameters
param_importance = optuna.importance.get_param_importances(study)
print("\nParameter importance:")
for param, importance in param_importance.items():
    print(f"  {param}: {importance:.4f}")

In [None]:
print(study.best_params)

In [None]:
import optuna
from EduKTM import DKVMN

def refined_objective(trial):
    # Only optimize the most important parameters based on previous study
    refined_params = {
        'max_iter': 10,
        'show': False, # 'print progress')
        'init_std': 0.1064392520682464, # 'weight initialization std'
        'lr_decay': 0.8769705774588257, # 'learning rate decay'
        'final_lr': 3.0584517647771867E-05, # 'learning rate will not decrease after hitting this threshold'
        'final_fc_dim': 76, # 'hidden state dim for final fc layer'
        'key_embedding_dim': 76, # 'question embedding dimensions')
        'batch_size': 64, # 'the batch size')
        'value_embedding_dim': 226, # 'answer and question embedding dimensions')
        'memory_size': 65, # 'memory size')
        'n_question': 123, # 'the number of unique questions in the dataset')
        'seqlen': 200, # 'the allowed maximum length of a sequence')
        'data_dir': '../dkt', # 'data directory')
        'data_name': '', # 'data set name')
    }
    
    # Only optimize the top 3 most important parameters
    refined_params['init_lr'] = trial.suggest_float('init_lr', 0.01, 0.05)  # Narrow range around best value
    refined_params['maxgradnorm'] = trial.suggest_float('maxgradnorm', 50.0, 100.0)  # Narrow range around best value
    refined_params['momentum'] = trial.suggest_float('momentum', 0.5, 0.8)  # Narrow range around best value
    
    refined_params['lr'] = refined_params['init_lr']
    refined_params['key_memory_state_dim'] = refined_params['key_embedding_dim']
    refined_params['value_memory_state_dim'] = refined_params['value_embedding_dim']
    
    # Create and train the model
    model = DKVMN(n_question=refined_params['n_question'],
                  batch_size=refined_params['batch_size'],
                  key_embedding_dim=refined_params['key_embedding_dim'],
                  value_embedding_dim=refined_params['value_embedding_dim'],
                  memory_size=refined_params['memory_size'],
                  key_memory_state_dim=refined_params['key_memory_state_dim'],
                  value_memory_state_dim=refined_params['value_memory_state_dim'],
                  final_fc_dim=refined_params['final_fc_dim'])
    
    # Train and evaluate
    model.train(refined_params, train_data)
    loss, auc, accuracy = model.eval(refined_params, test_data)
    
    return auc  # Optimize for AUC

# Create and run the refined study
refined_study = optuna.create_study(study_name=FILE_PREFIX+"dkvmn_refined", 
                                   direction='maximize', 
                                   storage="sqlite:///../studies.db", 
                                   load_if_exists=True)
refined_study.optimize(refined_objective, n_trials=12, n_jobs=4)

# Print results
print("Best refined trial:")
refined_trial = refined_study.best_trial
print(f"  Value (AUC): {refined_trial.value}")
print("  Params: ")
for key, value in refined_trial.params.items():
    print(f"    {key}: {value}")

In [None]:
params = {
    'max_iter': 50,
    'show': True, # 'print progress')
    'init_std': trial.params['init_std'], # 'weight initialization std'
    'init_lr': trial.params['init_lr'], # 'initial learning rate'
    'lr_decay': trial.params['lr_decay'], # 'learning rate decay'
    'final_lr': trial.params['final_lr'], # 'learning rate will not decrease after hitting this threshold'
    'momentum': trial.params['momentum'], # 'momentum rate'
    'maxgradnorm': trial.params['maxgradnorm'], # 'maximum gradient norm'
    'final_fc_dim': trial.params['final_fc_dim'], # 'hidden state dim for final fc layer'
    'key_embedding_dim': trial.params['final_fc_dim'], # 'question embedding dimensions')
    'batch_size': 64, # 'the batch size')
    'value_embedding_dim': trial.params['value_embedding_dim'], # 'answer and question embedding dimensions')
    'memory_size': trial.params['memory_size'], # 'memory size')
    'n_question': 123, # 'the number of unique questions in the dataset')
    'seqlen': 200, # 'the allowed maximum length of a sequence')
    'data_dir': '../dkt', # 'data directory')
    'data_name': '', # 'data set name')
    'load': FILE_PREFIX + 'dkvmn.params', # 'model file to load')
    'save': FILE_PREFIX + 'dkvmn.params' # 'path to save model')
}

params['lr'] = params['init_lr']
params['key_memory_state_dim'] = params['key_embedding_dim']
params['value_memory_state_dim'] = params['value_embedding_dim']

In [None]:
from EduKTM import DKVMN

dkvmn = DKVMN(n_question=params['n_question'],
                  batch_size=params['batch_size'],
                  key_embedding_dim=params['key_embedding_dim'],
                  value_embedding_dim=params['value_embedding_dim'],
                  memory_size=params['memory_size'],
                  key_memory_state_dim=params['key_memory_state_dim'],
                  value_memory_state_dim=params['value_memory_state_dim'],
                  final_fc_dim=params['final_fc_dim'])

dkvmn.train(params, train_data)
dkvmn.save(params['save'])

In [None]:
dkvmn.load(params['load'])
dkvmn.eval(params, test_data)