In [4]:
library(caret)
set.seed(123)
data <- read.table("a24_clas_app.txt", header = TRUE, sep = " ")

cross_validate_linear_regression <- function(data, target) {
    cv_errors <- numeric()
    for (k in 3:20) {
        train_control <- trainControl(method = "cv", number = k)
        model <- train(as.formula(paste(target, "~ .")),
                                     data = data,
                                     method = "lm",
                                     trControl = train_control)
        cv_errors[k - 2] <- mean(model$resample$RMSE)
    }
    best_k <- which.min(cv_errors) + 2
    final_train_control <- trainControl(method = "cv", number = best_k)
    final_model <- train(as.formula(paste(target, "~ .")),
                                             data = data,
                                             method = "lm",
                                             trControl = final_train_control)
    
    return(list(model = final_model, best_k = best_k))
}
result <- cross_validate_linear_regression(data, "y")
print(result$best_k)
print(result$model)

[1] 11
Linear Regression 

500 samples
 50 predictor

No pre-processing
Resampling: Cross-Validated (11 fold) 
Summary of sample sizes: 454, 455, 455, 454, 455, 454, ... 
Resampling results:

  RMSE       Rsquared    MAE      
  0.7295132  0.02541618  0.6237255

Tuning parameter 'intercept' was held constant at a value of TRUE


In [1]:

library(caret)
library(MASS)       # For QDA and LDA
library(splines)    # For Splines
library(class)      # For KNN
library(glmnet)     # For Ridge, Lasso, and Elastic Net Regression
library(rpart)      # For Decision Trees
library(Matrix)
set.seed(123)
data <- read.table("a24_clas_app.txt", header = TRUE, sep = " ")

cross_validate_models <- function(data, target, model_type) {
    cv_errors <- numeric()
    best_k <- 0
    final_model <- NULL

    for (k in 3:20) {
        train_control <- trainControl(method = "cv", number = k)

        # Select method based on model_type
        if (model_type == "linear_regression") {
            model <- train(as.formula(paste(target, "~ .")),
                           data = data,
                           method = "lm",
                           trControl = train_control)

        } else if (model_type == "ridge_regression") {
            formula <- as.formula(paste(target, "~ ."))
            model <- train(formula,
                           data = data,
                           method = "glmnet",
                           trControl = train_control,
                           tuneGrid = expand.grid(alpha = 0, lambda = seq(0, 1, length = 100)))

        } else if (model_type == "lasso_regression") {
            formula <- as.formula(paste(target, "~ ."))
            model <- train(formula,
                           data = data,
                           method = "glmnet",
                           trControl = train_control,
                           tuneGrid = expand.grid(alpha = 1, lambda = seq(0, 1, length = 100)))

        } else if (model_type == "elastic_net_regression") {
            formula <- as.formula(paste(target, "~ ."))
            model <- train(formula,
                           data = data,
                           method = "glmnet",
                           trControl = train_control,
                           tuneGrid = expand.grid(alpha = seq(0, 1, length = 10), lambda = seq(0, 1, length = 100)))

        } else if (model_type == "splines") {
            model <- train(as.formula(paste(target, "~ bs(. , degree=3)")),
                           data = data,
                           method = "lm",
                           trControl = train_control)

        } else if (model_type == "knn") {
            model <- train(as.formula(paste(target, "~ .")),
                           data = data,
                           method = "knn",
                           trControl = train_control,
                           tuneGrid = expand.grid(k = k))  # k here is used in KNN

        } else {
            stop("Invalid model type. Choose from 'linear_regression', 'polynomial_regression', 'ridge_regression', 'lasso_regression', 'elastic_net_regression', 'splines', or 'knn'.")
        }

        # Calculate cross-validation error based on model type
        if (model_type %in% c("linear_regression", "polynomial_regression", "ridge_regression", "lasso_regression", "elastic_net_regression", "splines")) {
            cv_errors[k - 2] <- mean(model$resample$RMSE)
        } else if (model_type == "knn") {
            cv_errors[k - 2] <- 1 - mean(model$resample$Accuracy)
        }
    }

    # Find the value of k that minimizes the cross-validation error
    best_k <- which.min(cv_errors) + 2

    # Retrain the model with the optimal k
    final_train_control <- trainControl(method = "cv", number = best_k)

    if (model_type == "linear_regression") {
        final_model <- train(as.formula(paste(target, "~ .")),
                             data = data,
                             method = "lm",
                             trControl = final_train_control)

    } else if (model_type == "ridge_regression") {
        formula <- as.formula(paste(target, "~ ."))
        final_model <- train(formula,
                             data = data,
                             method = "glmnet",
                             trControl = final_train_control,
                             tuneGrid = expand.grid(alpha = 0, lambda = seq(0, 1, length = 100)))

    } else if (model_type == "lasso_regression") {
        formula <- as.formula(paste(target, "~ ."))
        final_model <- train(formula,
                             data = data,
                             method = "glmnet",
                             trControl = final_train_control,
                             tuneGrid = expand.grid(alpha = 1, lambda = seq(0, 1, length = 100)))

    } else if (model_type == "elastic_net_regression") {
        formula <- as.formula(paste(target, "~ ."))
        final_model <- train(formula,
                             data = data,
                             method = "glmnet",
                             trControl = final_train_control,
                             tuneGrid = expand.grid(alpha = seq(0, 1, length = 10), lambda = seq(0, 1, length = 100)))

    } else if (model_type == "splines") {
        final_model <- train(as.formula(paste(target, "~ bs(. , degree=3)")),
                             data = data,
                             method = "lm",
                             trControl = final_train_control)

    } else if (model_type == "knn") {
        final_model <- train(as.formula(paste(target, "~ .")),
                             data = data,
                             method = "knn",
                             trControl = final_train_control,
                             tuneGrid = expand.grid(k = best_k))
    }

    # Return the final model and the best k
    return(list(model = final_model, best_k = best_k))
}

# Example calls:
# For Linear Regression
result_lr <- cross_validate_models(data, "y", "linear_regression")
print(result_lr$best_k)
print(result_lr$model)

# For Ridge Regression
result_rr <- cross_validate_models(data, "y", "ridge_regression")
print(result_rr$best_k)
print(result_rr$model)

# For Lasso Regression
result_lar <- cross_validate_models(data, "y", "lasso_regression")
print(result_lar$best_k)
print(result_lar$model)

# For Elastic Net Regression
result_enr <- cross_validate_models(data, "y", "elastic_net_regression")
print(result_enr$best_k)
print(result_enr$model)

# For Splines
result_splines <- cross_validate_models(data, "y", "splines")
print(result_splines$best_k)
print(result_splines$model)

# For KNN
result_knn <- cross_validate_models(data, "y", "knn")
print(result_knn$best_k)
print(result_knn$model)

Le chargement a nécessité le package : ggplot2

Le chargement a nécessité le package : lattice

Le chargement a nécessité le package : Matrix

Loaded glmnet 4.1-8



[1] 11
Linear Regression 

500 samples
 50 predictor

No pre-processing
Resampling: Cross-Validated (11 fold) 
Summary of sample sizes: 454, 455, 455, 454, 455, 454, ... 
Resampling results:

  RMSE       Rsquared    MAE      
  0.7295132  0.02541618  0.6237255

Tuning parameter 'intercept' was held constant at a value of TRUE
[1] 6
glmnet 

500 samples
 50 predictor

No pre-processing
Resampling: Cross-Validated (6 fold) 
Summary of sample sizes: 416, 417, 417, 417, 416, 417, ... 
Resampling results across tuning parameters:

  lambda      RMSE       Rsquared    MAE      
  0.00000000  0.7350037  0.01479933  0.6279928
  0.01010101  0.7350034  0.01479924  0.6279927
  0.02020202  0.7338014  0.01468580  0.6273272
  0.03030303  0.7324389  0.01455935  0.6265442
  0.04040404  0.7311659  0.01444135  0.6258013
  0.05050505  0.7299728  0.01433127  0.6251221
  0.06060606  0.7288528  0.01422893  0.6244730
  0.07070707  0.7277993  0.01413354  0.6238507
  0.08080808  0.7268066  0.01404520  0.62324

"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance mea

[1] 20
glmnet 

500 samples
 50 predictor

No pre-processing
Resampling: Cross-Validated (20 fold) 
Summary of sample sizes: 476, 476, 475, 475, 476, 476, ... 
Resampling results across tuning parameters:

  lambda      RMSE       Rsquared    MAE      
  0.00000000  0.7244437  0.06254667  0.6247353
  0.01010101  0.7129744  0.06579675  0.6170971
  0.02020202  0.7088486  0.06617849  0.6141602
  0.03030303  0.7058542  0.06045724  0.6114457
  0.04040404  0.7034212  0.05908947  0.6081487
  0.05050505  0.6994629  0.06958997  0.6032743
  0.06060606  0.6982015  0.07835674  0.6006974
  0.07070707  0.6991259  0.08275922  0.6001442
  0.08080808  0.7005777  0.08663625  0.6001628
  0.09090909  0.7025751  0.08527043  0.6006741
  0.10101010  0.7037351  0.07220860  0.6002159
  0.11111111  0.7035386  0.05701579  0.5989757
  0.12121212  0.7029630  0.05402080  0.5977495
  0.13131313  0.7025306  0.12849127  0.5972319
  0.14141414  0.7024411         NaN  0.5971462
  0.15151515  0.7024411         NaN  0.597

"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance measures."
"There were missing values in resampled performance mea

[1] 18
glmnet 

500 samples
 50 predictor

No pre-processing
Resampling: Cross-Validated (18 fold) 
Summary of sample sizes: 472, 472, 473, 473, 472, 473, ... 
Resampling results across tuning parameters:

  alpha      lambda      RMSE       Rsquared     MAE      
  0.0000000  0.00000000  0.7294614  0.050597204  0.6293045
  0.0000000  0.01010101  0.7294614  0.050597204  0.6293045
  0.0000000  0.02020202  0.7282052  0.050508682  0.6284437
  0.0000000  0.03030303  0.7268117  0.050413746  0.6274504
  0.0000000  0.04040404  0.7255132  0.050322494  0.6264943
  0.0000000  0.05050505  0.7243002  0.050235776  0.6255908
  0.0000000  0.06060606  0.7231653  0.050153669  0.6247519
  0.0000000  0.07070707  0.7221014  0.050075944  0.6239569
  0.0000000  0.08080808  0.7211021  0.050002872  0.6231872
  0.0000000  0.09090909  0.7201617  0.049933876  0.6224408
  0.0000000  0.10101010  0.7192763  0.049868170  0.6217321
  0.0000000  0.11111111  0.7184408  0.049806115  0.6210489
  0.0000000  0.12121212  0.

ERROR: Error in eval(predvars, data, env): objet '.' introuvable
