# Model Training

In [None]:
from EduKTM import DKT
import torch
import torch.utils.data as Data
import numpy as np
from sklearn.metrics import roc_auc_score
import optuna

In [None]:
NUM_QUESTIONS = 9

def get_data_loader(data_path, batch_size, shuffle=False, data_percentage=1.0):
    data = torch.FloatTensor(np.load(data_path))
    # Select only a percentage of the data
    if data_percentage < 1.0:
        total_samples = len(data)
        samples_to_keep = int(total_samples * data_percentage)
        if shuffle:
            indices = torch.randperm(total_samples)[:samples_to_keep]
            data = data[indices]
        else:
            data = data[:samples_to_keep]
    
    data_loader = Data.DataLoader(data, batch_size=batch_size, shuffle=shuffle)
    return data_loader

In [None]:
def importance_objective(trial):
    # Define hyperparameters to optimize
    hidden_size = trial.suggest_categorical('hidden_size', [5, 10, 20, 50, 100])
    num_layer = trial.suggest_int('num_layers', 1, 3)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
    lr = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    
    train_subset_loader = get_data_loader( './train_data.npy', batch_size, True, 0.1)
    test_subset_loader = get_data_loader( './test_data.npy', batch_size, False, 0.1)
    
    # Initialize and train model
    dkt_model = DKT(NUM_QUESTIONS, hidden_size, num_layer)
    dkt_model.train(train_subset_loader, test_subset_loader, epoch=10, lr=lr)
    
    # Return the AUC score to be maximized
    return dkt_model.eval(test_subset_loader)

In [None]:
study = optuna.create_study(study_name="importances", storage="sqlite:///importancees_study.db", load_if_exists=True, direction='maximize')
study.optimize(importance_objective, n_trials=30, n_jobs=4)

In [None]:
print(f"Best parameters: {study.best_params}")
print(f"Best AUC: {study.best_value}")

optuna.visualization.plot_optimization_history(study)
optuna.visualization.plot_param_importances(study)

In [None]:
# train_loader = get_data_loader('./train_data.npy', best_params['batch_size'], True)
# test_loader = get_data_loader('./test_data.npy', best_params['batch_size'], False)

# dkt = DKT(NUM_QUESTIONS, HIDDEN_SIZE, NUM_LAYERS)
# dkt.train(train_loader, test_loader, epoch=20)
# dkt.save("dkt.params")


In [None]:
# dkt.load("dkt.params")
# auc = dkt.eval(test_loader)
# print("auc: %.6f" % auc)