In [6]:
library("aldvmm")

"package 'aldvmm' was built under R version 4.4.2"


In [None]:
# you need to include these variables in your dataset: 
# utility, age, gender, pcs, mcs, kdcs, effect, burden, symptom, 
# as well as their square_effect and interation effect.
data <- read.csv("your_path.csv")

In [17]:
#' k-fold cross validation for ALDVMM with certain number of components.
#'
#' you need to assign the number of components.
#' 5 variables means age, gender, pcs, mcs, kdcs
#' 7 variables means age, gender, pcs, mcs, effect, burden, symptom
#'
#' @param data the dataset.
#' @param ncp the number of components of ALDVMMs
#' @param k the number of folds of the cross_validation
#' @param psiv an interval. Lower bound means the floor value of the value set. Upper bound means the second_highest value of the value set.
#' @param thre sometimes when cross_validating the ALDVMM will not converge. if in a k-fold validation, there are more than thre models not converging, the ME, MAE and RMSE values will be marked as NaN.
#' @return return a dataframe including the MEs, MAEs and RMSEs of 6 ALDVMM model (with 5 / 7 variables, with only main / square / square and interaction effect).
#' 
k_fold_cross_aldvmm <- function(data, ncp = 2, k = 10, thre = 3, psiv = c(-0.391, 0.9555)) {

    n <- nrow(data)
    fold_size <- floor(n / k)

    mae_lst_5_main <- c()
    me_lst_5_main <- c()
    rmse_lst_5_main <- c()

    mae_lst_7_main <- c()
    me_lst_7_main <- c()
    rmse_lst_7_main <- c()

    mae_lst_5_sq <- c()
    me_lst_5_sq <- c()
    rmse_lst_5_sq <- c()

    mae_lst_7_sq <- c()
    me_lst_7_sq <- c()
    rmse_lst_7_sq <- c()

    mae_lst_5_inter <- c()
    me_lst_5_inter <- c()
    rmse_lst_5_inter <- c()

    mae_lst_7_inter <- c()
    me_lst_7_inter <- c()
    rmse_lst_7_inter <- c()

    data <- data[sample(n), ]

    for (i in 1:k) {
        start <- (i - 1) * fold_size + 1
        end <- (i) * fold_size
        test_data <- data[start:end, ]
        l <- nrow(test_data)
        train_data <- data[-c(start:end), ]

        # 5 variables, main effect
        model <- aldvmm(utility ~ age + gender + pcs + mcs + kdcs | 1,
            data = train_data,
            psi = psiv,
            ncmp = ncp
        )

        predicted <- predict(model, test_data)$yhat # ncp >= 2
        if (ncp == 1) {
            vars_temp <- c("age", "gender", "pcs", "mcs", "kdcs")
            data_temp <- test_data[, vars_temp]
            data_temp <- cbind(rep(1, l), data_temp)
            coe <- coef(model)
            coe <- coe[-length(coe)]
            predicted <- as.matrix(data_temp) %*% coe
        } # ncp == 1
        predicted[predicted > 1] <- 1

        me <- mean(test_data$utility - predicted)
        if (!is.nan(me)) {
            me_lst_5_main <- c(me_lst_5_main, me)
        }

        mae <- mean(abs(test_data$utility - predicted))
        if (!is.nan(mae)) {
            mae_lst_5_main <- c(mae_lst_5_main, mae)
        }

        rmse <- sqrt(mean((test_data$utility - predicted)^2))
        if (!is.nan(rmse)) {
            rmse_lst_5_main <- c(rmse_lst_5_main, rmse)
        }

        # 7 variables, main effect
        model <- aldvmm(utility ~ age + gender + pcs + mcs + effect + burden + symptom | 1,
            data = train_data,
            psi = psiv,
            ncmp = ncp
        )

        predicted <- predict(model, test_data)$yhat
        if (ncp == 1) {
            vars_temp <- c("age", "gender", "pcs", "mcs", "effect", "burden", "symptom")
            data_temp <- test_data[, vars_temp]
            data_temp <- cbind(rep(1, l), data_temp)
            coe <- coef(model)
            coe <- coe[-length(coe)]
            predicted <- as.matrix(data_temp) %*% coe
        }
        predicted[predicted > 1] <- 1

        me <- mean(test_data$utility - predicted)
        if (!is.nan(me)) {
            me_lst_7_main <- c(me_lst_7_main, me)
        }

        mae <- mean(abs(test_data$utility - predicted))
        if (!is.nan(mae)) {
            mae_lst_7_main <- c(mae_lst_7_main, mae)
        }

        rmse <- sqrt(mean((test_data$utility - predicted)^2))
        if (!is.nan(rmse)) {
            rmse_lst_7_main <- c(rmse_lst_7_main, rmse)
        }

        # 5 variables, square effect
        model <- aldvmm(utility ~ age + gender + pcs + mcs + kdcs + pcs_sq + mcs_sq + kdcs_sq | 1,
            data = train_data,
            psi = psiv,
            ncmp = ncp
        )

        predicted <- predict(model, test_data)$yhat
        if (ncp == 1) {
            vars_temp <- c("age", "gender", "pcs", "mcs", "kdcs", "pcs_sq", "mcs_sq", "kdcs_sq")
            data_temp <- test_data[, vars_temp]
            data_temp <- cbind(rep(1, l), data_temp)
            coe <- coef(model)
            coe <- coe[-length(coe)]
            predicted <- as.matrix(data_temp) %*% coe
        }
        predicted[predicted > 1] <- 1

        me <- mean(test_data$utility - predicted)
        if (!is.nan(me)) {
            me_lst_5_sq <- c(me_lst_5_sq, me)
        }

        mae <- mean(abs(test_data$utility - predicted))
        if (!is.nan(mae)) {
            mae_lst_5_sq <- c(mae_lst_5_sq, mae)
        }

        rmse <- sqrt(mean((test_data$utility - predicted)^2))
        if (!is.nan(rmse)) {
            rmse_lst_5_sq <- c(rmse_lst_5_sq, rmse)
        }

        # 7 variables, square effect
        model <- aldvmm(utility ~ age + gender + pcs + mcs + effect + burden + symptom + pcs_sq + mcs_sq + effect_sq + burden_sq + symptom_sq | 1,
            data = train_data,
            psi = psiv,
            ncmp = ncp
        )

        predicted <- predict(model, test_data)$yhat
        if (ncp == 1) {
            vars_temp <- c("age", "gender", "pcs", "mcs", "effect", "burden", "symptom", "pcs_sq", "mcs_sq", "effect_sq", "burden_sq", "symptom_sq")
            data_temp <- test_data[, vars_temp]
            data_temp <- cbind(rep(1, l), data_temp)
            coe <- coef(model)
            coe <- coe[-length(coe)]
            predicted <- as.matrix(data_temp) %*% coe
        }
        predicted[predicted > 1] <- 1

        me <- mean(test_data$utility - predicted)
        if (!is.nan(me)) {
            me_lst_7_sq <- c(me_lst_7_sq, me)
        }

        mae <- mean(abs(test_data$utility - predicted))
        if (!is.nan(mae)) {
            mae_lst_7_sq <- c(mae_lst_7_sq, mae)
        }

        rmse <- sqrt(mean((test_data$utility - predicted)^2))
        if (!is.nan(rmse)) {
            rmse_lst_7_sq <- c(rmse_lst_7_sq, rmse)
        }


        # 5 variables, square and interaction effect
        model <- aldvmm(utility ~ age + gender + pcs + mcs + kdcs + pcs_sq + mcs_sq + kdcs_sq + pcs_kdcs + mcs_kdcs + pcs_mcs | 1,
            data = train_data,
            psi = psiv,
            ncmp = ncp
        )

        predicted <- predict(model, test_data)$yhat
        if (ncp == 1) {
            vars_temp <- c("age", "gender", "pcs", "mcs", "kdcs", "pcs_sq", "mcs_sq", "kdcs_sq", "pcs_kdcs", "mcs_kdcs", "pcs_mcs")
            data_temp <- test_data[, vars_temp]
            data_temp <- cbind(rep(1, l), data_temp)
            coe <- coef(model)
            coe <- coe[-length(coe)]
            predicted <- as.matrix(data_temp) %*% coe
        }
        predicted[predicted > 1] <- 1

        me <- mean(test_data$utility - predicted)
        if (!is.nan(me)) {
            me_lst_5_inter <- c(me_lst_5_inter, me)
        }

        mae <- mean(abs(test_data$utility - predicted))
        if (!is.nan(mae)) {
            mae_lst_5_inter <- c(mae_lst_5_inter, mae)
        }

        rmse <- sqrt(mean((test_data$utility - predicted)^2))
        if (!is.nan(rmse)) {
            rmse_lst_5_inter <- c(rmse_lst_5_inter, rmse)
        }

        # 7 variables, square and interaction effect
        model <- aldvmm(utility ~ pcs + mcs + effect + burden + symptom + age + gender + pcs_sq + mcs_sq + effect_sq + burden_sq + symptom_sq + pcs_mcs + pcs_effect + pcs_burden + pcs_symptom + mcs_effect + mcs_burden + mcs_symptom + effect_burden + effect_symptom + burden_symptom | 1,
            data = train_data,
            psi = psiv,
            ncmp = ncp
        )

        predicted <- predict(model, test_data)$yhat
        if (ncp == 1) {
            vars_temp <- c("pcs", "mcs", "effect", "burden", "symptom", "age", "gender", "pcs_sq", "mcs_sq", "effect_sq", "burden_sq", "symptom_sq", "pcs_mcs", "pcs_effect", "pcs_burden", "pcs_symptom", "mcs_effect", "mcs_burden", "mcs_symptom", "effect_burden", "effect_symptom", "burden_symptom")
            data_temp <- test_data[, vars_temp]
            data_temp <- cbind(rep(1, l), data_temp)
            coe <- coef(model)
            coe <- coe[-length(coe)]
            predicted <- as.matrix(data_temp) %*% coe
        }
        predicted[predicted > 1] <- 1

        me <- mean(test_data$utility - predicted)
        if (!is.nan(me)) {
            me_lst_7_inter <- c(me_lst_7_inter, me)
        }

        mae <- mean(abs(test_data$utility - predicted))
        if (!is.nan(mae)) {
            mae_lst_7_inter <- c(mae_lst_7_inter, mae)
        }

        rmse <- sqrt(mean((test_data$utility - predicted)^2))
        if (!is.nan(rmse)) {
            rmse_lst_7_inter <- c(rmse_lst_7_inter, rmse)
        }
    }

    me_5_main <- NaN
    mae_5_main <- NaN
    rmse_5_main <- NaN
    me_7_main <- NaN
    mae_7_main <- NaN
    rmse_7_main <- NaN
    me_5_sq <- NaN
    mae_5_sq <- NaN
    rmse_5_sq <- NaN
    me_7_sq <- NaN
    mae_7_sq <- NaN
    rmse_7_sq <- NaN
    me_5_inter <- NaN
    mae_5_inter <- NaN
    rmse_5_inter <- NaN
    me_7_inter <- NaN
    mae_7_inter <- NaN
    rmse_7_inter <- NaN

    if (length(me_lst_5_main) > thre) {
        me_5_main <- mean(me_lst_5_main)
        mae_5_main <- mean(mae_lst_5_main)
        rmse_5_main <- mean(rmse_lst_5_main)
    }
    if (length(me_lst_7_main) > thre) {
        me_7_main <- mean(me_lst_7_main)
        mae_7_main <- mean(mae_lst_7_main)
        rmse_7_main <- mean(rmse_lst_7_main)
    }
    if (length(me_lst_5_sq) > thre) {
        me_5_sq <- mean(me_lst_5_sq)
        mae_5_sq <- mean(mae_lst_5_sq)
        rmse_5_sq <- mean(rmse_lst_5_sq)
    }
    if (length(me_lst_7_sq) > thre) {
        me_7_sq <- mean(me_lst_7_sq)
        mae_7_sq <- mean(mae_lst_7_sq)
        rmse_7_sq <- mean(rmse_lst_7_sq)
    }
    if (length(me_lst_5_inter) > thre) {
        me_5_inter <- mean(me_lst_5_inter)
        mae_5_inter <- mean(mae_lst_5_inter)
        rmse_5_inter <- mean(rmse_lst_5_inter)
    }
    if (length(me_lst_7_inter) > thre) {
        me_7_inter <- mean(me_lst_7_inter)
        mae_7_inter <- mean(mae_lst_7_inter)
        rmse_7_inter <- mean(rmse_lst_7_inter)
    }


    result_values <- c(
        me_5_main, mae_5_main, rmse_5_main,
        me_7_main, mae_7_main, rmse_7_main,
        me_5_sq, mae_5_sq, rmse_5_sq,
        me_7_sq, mae_7_sq, rmse_7_sq,
        me_5_inter, mae_5_inter, rmse_5_inter,
        me_7_inter, mae_7_inter, rmse_7_inter
    )

    result_names <- c(
        "me_5_main", "mae_5_main", "rmse_5_main",
        "me_7_main", "mae_7_main", "rmse_7_main",
        "me_5_sq", "mae_5_sq", "rmse_5_sq",
        "me_7_sq", "mae_7_sq", "rmse_7_sq",
        "me_5_inter", "mae_5_inter", "rmse_5_inter",
        "me_7_inter", "mae_7_inter", "rmse_7_inter"
    )

    result_df <- data.frame(name = result_names, value = result_values)

    return(result_df)
}
