# Model Training

In [None]:
import torch
import torch.utils.data as Data
import numpy as np
import optuna
from EduKTM import DKT

In [None]:
NUM_QUESTIONS = 9

def get_train_val_loaders(batch_size, val_split=0.2, shuffle=True, data_percentage=1.0):
    # Load the entire dataset
    data = torch.FloatTensor(np.load('train_data.npy'))
    
    # Apply data_percentage to reduce dataset size if needed
    if data_percentage < 1.0:
        total_samples = len(data)
        samples_to_keep = int(total_samples * data_percentage)
        if shuffle:
            indices = torch.randperm(total_samples)[:samples_to_keep]
            data = data[indices]
        else:
            data = data[:samples_to_keep]
    
    # Get dataset size and calculate split
    dataset_size = len(data)
    val_size = int(dataset_size * val_split)
    train_size = dataset_size - val_size
    
    # Split the dataset
    if shuffle:
        indices = torch.randperm(dataset_size)
        train_indices = indices[:train_size]
        val_indices = indices[train_size:]
        train_data = data[train_indices]
        val_data = data[val_indices]
    else:
        train_data = data[:train_size]
        val_data = data[train_size:]
    
    # Create data loaders
    train_loader = Data.DataLoader(train_data, batch_size=batch_size, shuffle=shuffle)
    val_loader = Data.DataLoader(val_data, batch_size=batch_size, shuffle=False)
    
    print(f"Training samples: {train_size}, Validation samples: {val_size}")
    
    return train_loader, val_loader

def get_test_data_loader(batch_size, shuffle=False, data_percentage=1.0):
    data = torch.FloatTensor(np.load('test_data.npy'))
    # Select only a percentage of the data
    if data_percentage < 1.0:
        total_samples = len(data)
        samples_to_keep = int(total_samples * data_percentage)
        if shuffle:
            indices = torch.randperm(total_samples)[:samples_to_keep]
            data = data[indices]
        else:
            data = data[:samples_to_keep]
    
    data_loader = Data.DataLoader(data, batch_size=batch_size, shuffle=shuffle)
    return data_loader

In [None]:
def importance_objective(trial):
    # Define hyperparameters to optimize
    hidden_size = trial.suggest_categorical('hidden_size', [5, 10, 20, 50, 100])
    num_layer = trial.suggest_int('num_layers', 1, 3)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
    lr = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    
    train_subset_loader, val_subset_loader = get_train_val_loaders(batch_size, data_percentage=0.3)
    # test_subset_loader = get_test_data_loader( './test_data.npy', batch_size, False, 0.1)
    
    # Initialize and train model
    dkt_model = DKT(NUM_QUESTIONS, hidden_size, num_layer)
    dkt_model.train(train_subset_loader, val_subset_loader, epoch=10, lr=lr)
    
    # Return the AUC score to be maximized
    return dkt_model.eval(val_subset_loader)

In [None]:
importance_study = optuna.create_study(study_name="dkt_importances", storage="sqlite:///../studies.db", load_if_exists=True, direction='maximize')
importance_study.optimize(importance_objective, n_trials=100, n_jobs=4)

In [None]:
print(f"Best parameters: {importance_study.best_params}")
print(f"Best AUC: {importance_study.best_value}")

optuna.visualization.plot_optimization_history(importance_study)
optuna.visualization.plot_param_importances(importance_study)

In [None]:
best_params = importance_study.best_params

In [None]:
def learning_rate_objective(trial):
    # Use the best parameters from importance_study but optimize learning rate
    hidden_size = best_params['hidden_size']
    num_layers = best_params['num_layers']
    batch_size = best_params['batch_size']
    
    # Define learning rate range to optimize
    lr = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    
    # Use smaller subset for faster optimization
    train_subset_loader, val_subset_loader = get_train_val_loaders(batch_size, data_percentage=0.4)
    
    # Initialize model with best parameters from previous study
    dkt_model = DKT(NUM_QUESTIONS, hidden_size, num_layers)
    dkt_model.train(train_subset_loader, val_subset_loader, epoch=15, lr=lr)
    
    # Return validation AUC to be maximized
    return dkt_model.eval(val_subset_loader)

# Create a new study focused on learning rate optimization
lr_study = optuna.create_study(study_name="dkt_learning_rate", storage="sqlite:///../studies.db", load_if_exists=True, direction='maximize')
lr_study.optimize(learning_rate_objective, n_trials=30, n_jobs=4)

In [None]:
# Display results
print(f"Best learning rate: {lr_study.best_params['learning_rate']}")
print(f"Best AUC with optimized learning rate: {lr_study.best_value}")

In [None]:


train_loader, val_loader = get_train_val_loaders(batch_size=64)

dkt = DKT(NUM_QUESTIONS, hidden_size=100, num_layers=3)
dkt.train(train_loader, val_loader, epoch=50)
dkt.save("dkt.params")

In [None]:
test_loader = get_test_data_loader(batch_size=best_params['batch_size'])

dkt.load("dkt.params")
auc = dkt.eval(test_loader)
print("auc: %.6f" % auc)