# Data Splitting

In [7]:
# Run this cell before continuing.
library(tidyverse)
library(repr)
library(infer)
library(gridExtra)
library(caret)
library(pROC)
library(boot)
library(glmnet)
library(broom)
library(leaps)
library(repr)
library(faraway)
library(mltools)

In [107]:
set.seed(123)
employee_dat <- read_csv("https://raw.githubusercontent.com/jtan29/stat-301-project/main/Employee.csv")
employee_dat <- employee_dat %>%
mutate(PaymentTier = as.factor(PaymentTier)) %>%

head(employee_dat)

[1mRows: [22m[34m4653[39m [1mColumns: [22m[34m9[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (4): Education, City, Gender, EverBenched
[32mdbl[39m (5): JoiningYear, PaymentTier, Age, ExperienceInCurrentDomain, LeaveOrNot

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


ERROR: Error in head.data.frame(., employee_dat): invalid 'n' - must be numeric, possibly NA.


In [108]:
employee_dat$id <- 1:nrow(employee_dat) # add a new ID column

# 70% into train split
employee_train <- 
    employee_dat %>% 
    slice_sample(prop = 0.7)

# 30% into test split
employee_test <- 
    employee_dat %>% 
    anti_join(employee_train, by = "id") 

# drop the ID column
employee_train <- 
    employee_train %>% 
    select(-id)

employee_test <-
    employee_test %>%
    select(-id)

head(employee_train, 5)
head(employee_test, 5)

Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot
<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>
Bachelors,2015,Pune,3,28,Male,No,1,0
Bachelors,2014,Bangalore,3,29,Male,No,1,0
Masters,2017,New Delhi,3,26,Male,No,4,1
Bachelors,2015,Bangalore,3,25,Male,No,3,0
Masters,2017,New Delhi,2,31,Male,No,4,0


Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot
<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>
Bachelors,2014,New Delhi,3,38,Female,No,2,0
Bachelors,2016,Bangalore,3,22,Male,No,0,0
Bachelors,2016,Bangalore,3,34,Female,No,2,1
Masters,2017,New Delhi,2,37,Male,No,2,0
Bachelors,2016,Bangalore,3,39,Male,No,2,0


In [109]:
model_matrix_X_train <- 
    model.matrix(LeaveOrNot ~ ., employee_train)
model_matrix_X_train <- model_matrix_X_train[,-1]

matrix_Y_train <- 
    as.matrix(employee_train %>% select(LeaveOrNot), ncol = 1)

## LASSO Model

In [110]:
lasso_model = cv.glmnet(x = model_matrix_X_train,
                        y = matrix_Y_train,
                         alpha = 1)
lasso_model


Call:  cv.glmnet(x = model_matrix_X_train, y = matrix_Y_train, alpha = 1) 

Measure: Mean-Squared Error 

      Lambda Index Measure       SE Nonzero
min 0.002201    42  0.1947 0.003260      10
1se 0.018705    19  0.1976 0.002917       8

In [111]:
selected_variables <- as_tibble(as.matrix(coef(lasso_model, s = "lambda.1se")),
                                rownames='covariate')
selected_variables

covariate,s1
<chr>,<dbl>
(Intercept),-62.674739661
EducationMasters,0.082279905
EducationPHD,0.0
JoiningYear,0.031393651
CityNew Delhi,-0.011940981
CityPune,0.129756203
PaymentTier,-0.054620585
Age,-0.002283375
GenderMale,-0.141466955
EverBenchedYes,0.061964429


In [112]:
data_train <- employee_train |>
            select(-EverBenched, -ExperienceInCurrentDomain)
data_test <- employee_test |>
            select(-EverBenched, -ExperienceInCurrentDomain)
head(data_train)

Education,JoiningYear,City,PaymentTier,Age,Gender,LeaveOrNot
<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>
Bachelors,2015,Pune,3,28,Male,0
Bachelors,2014,Bangalore,3,29,Male,0
Masters,2017,New Delhi,3,26,Male,1
Bachelors,2015,Bangalore,3,25,Male,0
Masters,2017,New Delhi,2,31,Male,0
Bachelors,2013,Bangalore,3,26,Male,0


In [113]:
final_model <- glm(LeaveOrNot ~ ., family = "binomial", data = data_train)
tidy(final_model)

term,estimate,std.error,statistic,p.value
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
(Intercept),-431.62752711,45.529545719,-9.4801633,2.538855e-21
EducationMasters,0.68164208,0.111253509,6.1269265,8.959283e-10
EducationPHD,0.02633311,0.220966229,0.1191725,0.9051387
JoiningYear,0.21490525,0.022599751,9.5091867,1.92159e-21
CityNew Delhi,-0.44891707,0.114270522,-3.9285466,8.54608e-05
CityPune,0.64668123,0.097890352,6.6061794,3.943649e-11
PaymentTier,-0.31417348,0.07201015,-4.3629056,1.283464e-05
Age,-0.03334676,0.008361416,-3.9881719,6.658441e-05
GenderMale,-0.89654767,0.083683085,-10.7136068,8.787023e-27


In [114]:
prediction = round(predict(final_model, newdata = data_test |> select(-LeaveOrNot), type = "response"), 0)
head(prediction)

In [115]:
conf_matrix <- 
    confusionMatrix(
    data = as.factor(prediction),
    reference = data_test |> select(LeaveOrNot) |> unlist() |> as.factor(),
    positive = "1"
)
conf_matrix

Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 824 291
         1  83 198
                                         
               Accuracy : 0.7321         
                 95% CI : (0.708, 0.7552)
    No Information Rate : 0.6497         
    P-Value [Acc > NIR] : 2.698e-11      
                                         
                  Kappa : 0.3475         
                                         
 Mcnemar's Test P-Value : < 2.2e-16      
                                         
            Sensitivity : 0.4049         
            Specificity : 0.9085         
         Pos Pred Value : 0.7046         
         Neg Pred Value : 0.7390         
             Prevalence : 0.3503         
         Detection Rate : 0.1418         
   Detection Prevalence : 0.2013         
      Balanced Accuracy : 0.6567         
                                         
       'Positive' Class : 1              
                                         

In [116]:
lasso_acc <- conf_matrix$overall['Accuracy']
lasso_acc

## OLS with Forward Selection Model

In [117]:
##Since some categorical variables are present numeric ways,use factor() to transform them into categorical form.
ly_employee_train <- data.frame(employee_train)
ly_employee_test <- data.frame(employee_test)
ly_employee_train$PaymentTier<-factor(ly_employee_train$PaymentTier, levels = c(1, 2, 3), labels = c("Low", "Median", "High"),ordered = TRUE)
ly_employee_train$LeaveOrNot<-factor(ly_employee_train$LeaveOrNot, levels = c(0, 1), labels = c("Not Leave", "Leave"), ordered = TRUE)
head(ly_employee_train)
ly_employee_test$PaymentTier<-factor(ly_employee_test$PaymentTier, levels = c(1, 2, 3), labels = c("Low", "Median", "High"),ordered = TRUE)
ly_employee_test$LeaveOrNot<-factor(ly_employee_test$LeaveOrNot, levels = c(0, 1), labels = c("Not Leave", "Leave"), ordered = TRUE)
head(ly_employee_test)


Unnamed: 0_level_0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<ord>,<dbl>,<chr>,<chr>,<dbl>,<ord>
1,Bachelors,2015,Pune,High,28,Male,No,1,Not Leave
2,Bachelors,2014,Bangalore,High,29,Male,No,1,Not Leave
3,Masters,2017,New Delhi,High,26,Male,No,4,Leave
4,Bachelors,2015,Bangalore,High,25,Male,No,3,Not Leave
5,Masters,2017,New Delhi,Median,31,Male,No,4,Not Leave
6,Bachelors,2013,Bangalore,High,26,Male,No,4,Not Leave


Unnamed: 0_level_0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<ord>,<dbl>,<chr>,<chr>,<dbl>,<ord>
1,Bachelors,2014,New Delhi,High,38,Female,No,2,Not Leave
2,Bachelors,2016,Bangalore,High,22,Male,No,0,Not Leave
3,Bachelors,2016,Bangalore,High,34,Female,No,2,Leave
4,Masters,2017,New Delhi,Median,37,Male,No,2,Not Leave
5,Bachelors,2016,Bangalore,High,39,Male,No,2,Not Leave
6,Bachelors,2012,Bangalore,High,37,Male,No,4,Not Leave


In [118]:
###using forward selection to find the variables to best fit predict modeling
employee_forward_sel <- regsubsets(x = LeaveOrNot ~ ., nvmax = NULL,
                                  data = ly_employee_train,
                                  method = "forward")

employee_forward_summary <- summary(employee_forward_sel)
employee_forward_summary

Subset selection object
Call: regsubsets.formula(x = LeaveOrNot ~ ., nvmax = NULL, data = ly_employee_train, 
    method = "forward")
11 Variables  (and intercept)
                          Forced in Forced out
EducationMasters              FALSE      FALSE
EducationPHD                  FALSE      FALSE
JoiningYear                   FALSE      FALSE
CityNew Delhi                 FALSE      FALSE
CityPune                      FALSE      FALSE
PaymentTier.L                 FALSE      FALSE
PaymentTier.Q                 FALSE      FALSE
Age                           FALSE      FALSE
GenderMale                    FALSE      FALSE
EverBenchedYes                FALSE      FALSE
ExperienceInCurrentDomain     FALSE      FALSE
1 subsets of each size up to 11
Selection Algorithm: forward
          EducationMasters EducationPHD JoiningYear CityNew Delhi CityPune
1  ( 1 )  " "              " "          " "         " "           " "     
2  ( 1 )  " "              " "          " "         " "      

In [119]:
##store and examine different evaluation metrics to determine the best one in terms of its goodness of fit.
employee_forward_summary_df <- tibble(
    n_input_variables = 1:11,
    RSQ = employee_forward_summary$rsq,
    RSS = employee_forward_summary$rss,
    ADJ_R2 = employee_forward_summary$adjr2,
    Cp = employee_forward_summary$cp,
    BIC = employee_forward_summary$bic,
)
employee_forward_summary_df

n_input_variables,RSQ,RSS,ADJ_R2,Cp,BIC
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.06544388,684.1186,0.06515677,313.01063,-204.2684
2,0.08796898,667.6297,0.08740842,229.06103,-275.643
3,0.10757788,653.2755,0.10675486,156.23883,-338.3445
4,0.12504012,640.4927,0.12396391,91.6077,-394.6183
5,0.1309682,636.1532,0.12963164,70.98776,-408.6718
6,0.13700853,631.7315,0.13541531,49.93951,-423.3005
7,0.14322972,627.1774,0.1413838,28.20113,-438.7763
8,0.14711021,624.3368,0.1450095,15.39425,-445.4729
9,0.14899282,622.9587,0.14663401,10.21071,-444.5816
10,0.1494145,622.65,0.14679409,10.60171,-438.1073


In [120]:
#select the model that minimizes the Cp and present its predictor variables
cp_min = which.min(employee_forward_summary$cp) 

selected_var <- names(coef(employee_forward_sel, cp_min))[-1]
selected_var

In [121]:
##rearrange the training dataset to ensure it contain the best fiting variables
rearrange_training <- ly_employee_train |>
mutate(EducationMasters = ifelse(Education == "Masters", "Yes", "No"))|>
mutate(CityNewDelhi = ifelse(City == "New Delhi", "Yes", "No"))|>
mutate(CityPune = ifelse(City == "Pune", "Yes", "No"))|>
mutate(PaymentTier.Q = ifelse(PaymentTier == "Median", "Yes", "No"))
head(rearrange_training)

Unnamed: 0_level_0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot,EducationMasters,CityNewDelhi,CityPune,PaymentTier.Q
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<ord>,<dbl>,<chr>,<chr>,<dbl>,<ord>,<chr>,<chr>,<chr>,<chr>
1,Bachelors,2015,Pune,High,28,Male,No,1,Not Leave,No,No,Yes,No
2,Bachelors,2014,Bangalore,High,29,Male,No,1,Not Leave,No,No,No,No
3,Masters,2017,New Delhi,High,26,Male,No,4,Leave,Yes,Yes,No,No
4,Bachelors,2015,Bangalore,High,25,Male,No,3,Not Leave,No,No,No,No
5,Masters,2017,New Delhi,Median,31,Male,No,4,Not Leave,Yes,Yes,No,Yes
6,Bachelors,2013,Bangalore,High,26,Male,No,4,Not Leave,No,No,No,No


In [122]:
##employing logistic regression, using binomial distribution
rearrange_training_log <- 
    glm(formula = LeaveOrNot ~ EducationMasters+JoiningYear+CityNewDelhi+
        CityPune+PaymentTier.Q+Age+Gender+EverBenched+ExperienceInCurrentDomain,
        data = rearrange_training,
        family = binomial)

summary(rearrange_training_log)


Call:
glm(formula = LeaveOrNot ~ EducationMasters + JoiningYear + CityNewDelhi + 
    CityPune + PaymentTier.Q + Age + Gender + EverBenched + ExperienceInCurrentDomain, 
    family = binomial, data = rearrange_training)

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)               -3.871e+02  4.613e+01  -8.392  < 2e-16 ***
EducationMastersYes        6.539e-01  1.112e-01   5.879 4.13e-09 ***
JoiningYear                1.925e-01  2.290e-02   8.404  < 2e-16 ***
CityNewDelhiYes           -5.292e-01  1.154e-01  -4.585 4.53e-06 ***
CityPuneYes                5.235e-01  1.019e-01   5.137 2.79e-07 ***
PaymentTier.QYes           7.146e-01  1.097e-01   6.516 7.23e-11 ***
Age                       -3.566e-02  8.533e-03  -4.180 2.92e-05 ***
GenderMale                -8.732e-01  8.438e-02 -10.349  < 2e-16 ***
EverBenchedYes             5.765e-01  1.255e-01   4.595 4.32e-06 ***
ExperienceInCurrentDomain -7.030e-02  2.616e-02  -2.687  0.00721 ** 
---
S

In [123]:
##rearrange the testing dataset to ensure it contain the best fiting variables
rearrange_testing<-ly_employee_test|>
mutate(EducationMasters = ifelse(Education == "Masters", "Yes", "No"))|>
mutate(CityNewDelhi = ifelse(City == "New Delhi", "Yes", "No"))|>
mutate(CityPune = ifelse(City == "Pune", "Yes", "No"))|>
mutate(PaymentTier.Q = ifelse(PaymentTier == "Median", "Yes", "No"))

                              

In [124]:
##Use the resulting predictive values to compute the error and the RMSE of the predictive values based on training set
predicted_probabilities_training <- predict(rearrange_training_log, 
                                   newdata=rearrange_training,
                                   type = "response")


rearrange_training<-mutate(rearrange_training,
                          LeaveOrNot_P = ifelse(LeaveOrNot == "Leave", 1, 0))

#true probability of leave
p_true_training <- rearrange_training$LeaveOrNot_P

#calculate residuals
residuals_training <- p_true_training - predicted_probabilities_training

rmse_red_glm_training<-sqrt(mean(residuals_training^2))

rmse_red_glm_training

In [125]:
prediction = round(predict(rearrange_training_log, newdata = rearrange_testing, type = "response"), 0)
head(prediction)

In [126]:
conf_matrix <- 
    confusionMatrix(
    data = as.factor(prediction),
    reference = as.factor(employee_test$LeaveOrNot),
    positive = "1"
)
conf_matrix

Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 825 289
         1  82 200
                                          
               Accuracy : 0.7342          
                 95% CI : (0.7102, 0.7573)
    No Information Rate : 0.6497          
    P-Value [Acc > NIR] : 8.142e-12       
                                          
                  Kappa : 0.353           
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.4090          
            Specificity : 0.9096          
         Pos Pred Value : 0.7092          
         Neg Pred Value : 0.7406          
             Prevalence : 0.3503          
         Detection Rate : 0.1433          
   Detection Prevalence : 0.2020          
      Balanced Accuracy : 0.6593          
                                          
       'Positive' Class : 1               
                              

In [127]:
ols_forward_acc <- conf_matrix$overall['Accuracy']
ols_forward_acc

## Ridge Model

In [128]:
employee_cv_lambda_ridge <- 
  cv.glmnet(
       x = model_matrix_X_train, 
       y = matrix_Y_train,
       alpha = 0,
       family = "binomial",
       type.measure = "auc",
       nfolds = 10)

employee_cv_lambda_ridge


Call:  cv.glmnet(x = model_matrix_X_train, y = matrix_Y_train, type.measure = "auc",      nfolds = 10, alpha = 0, family = "binomial") 

Measure: AUC 

    Lambda Index Measure       SE Nonzero
min   0.01    98  0.7257 0.007160      10
1se  90.95     2  0.7213 0.008386      10

In [129]:
employee_ridge_max_AUC <- 
  glmnet(
  x = model_matrix_X_train, y = matrix_Y_train,
  alpha = 0,
  family = "binomial",
  lambda = employee_cv_lambda_ridge$lambda.min
)

coef(employee_ridge_max_AUC)

11 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
(Intercept)               -392.06706230
EducationMasters             0.64572996
EducationPHD                 0.04765214
JoiningYear                  0.19531327
CityNew Delhi               -0.38822958
CityPune                     0.61892205
PaymentTier                 -0.32029380
Age                         -0.03266386
GenderMale                  -0.84277777
EverBenchedYes               0.54724008
ExperienceInCurrentDomain   -0.05936960

In [130]:
employee_class <- 
  predict(employee_ridge_max_AUC, model_matrix_X_train, type = "response") %>% round(0)
head(employee_class, 5)

Unnamed: 0,s0
1,0
2,0
3,0
4,0
5,0


In [131]:
employee_confusion_matrix <- 
    confusionMatrix(
    data = as.factor(employee_class),
    reference = as.factor(employee_train$LeaveOrNot),
    positive = "1"
)

employee_confusion_matrix

Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 1932  682
         1  214  429
                                          
               Accuracy : 0.7249          
                 95% CI : (0.7092, 0.7402)
    No Information Rate : 0.6589          
    P-Value [Acc > NIR] : 3.571e-16       
                                          
                  Kappa : 0.3188          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.3861          
            Specificity : 0.9003          
         Pos Pred Value : 0.6672          
         Neg Pred Value : 0.7391          
             Prevalence : 0.3411          
         Detection Rate : 0.1317          
   Detection Prevalence : 0.1974          
      Balanced Accuracy : 0.6432          
                                          
       'Positive' Class : 1               
                        

In [132]:
options(repr.plot.width = 8, repr.plot.height = 8)

ROC_ridge <- roc(
  response = employee_train$LeaveOrNot,
  predictor = predict(employee_ridge_max_AUC, newx = model_matrix_X_train)[,"s0"])

best_threshold <- coords(ROC_ridge, "best")["threshold"] %>% pull()
best_threshold

Setting levels: control = 0, case = 1

Setting direction: controls < cases



In [133]:
train_pred <- as.integer(predict(employee_ridge_max_AUC, newx = model_matrix_X_train, newy = matrix_Y_train) > best_threshold)

employee_confusion_matrix_adj <- 
    confusionMatrix(
    data = as.factor(train_pred),
    reference = as.factor(employee_train$LeaveOrNot),
    positive = "1"
)
employee_confusion_matrix_adj

Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 1755  505
         1  391  606
                                          
               Accuracy : 0.7249          
                 95% CI : (0.7092, 0.7402)
    No Information Rate : 0.6589          
    P-Value [Acc > NIR] : 3.571e-16       
                                          
                  Kappa : 0.3725          
                                          
 Mcnemar's Test P-Value : 0.00016         
                                          
            Sensitivity : 0.5455          
            Specificity : 0.8178          
         Pos Pred Value : 0.6078          
         Neg Pred Value : 0.7765          
             Prevalence : 0.3411          
         Detection Rate : 0.1861          
   Detection Prevalence : 0.3061          
      Balanced Accuracy : 0.6816          
                                          
       'Positive' Class : 1               
                        

In [134]:
# preparing the test matrices
model_matrix_X_test <- 
    model.matrix(LeaveOrNot ~ ., employee_test)
model_matrix_X_test <- model_matrix_X_test[,-1]

matrix_Y_test <- 
    as.matrix(employee_test %>% select(LeaveOrNot), ncol = 1)

In [135]:
test_pred <- as.integer(predict(employee_ridge_max_AUC, newx = model_matrix_X_test, newy = matrix_Y_test) > best_threshold)

employee_confusion_matrix_test <- 
    confusionMatrix(
    data = as.factor(test_pred),
    reference = as.factor(employee_test$LeaveOrNot),
    positive = "1"
)
employee_confusion_matrix_test

Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 753 215
         1 154 274
                                          
               Accuracy : 0.7357          
                 95% CI : (0.7117, 0.7586)
    No Information Rate : 0.6497          
    P-Value [Acc > NIR] : 3.597e-12       
                                          
                  Kappa : 0.4021          
                                          
 Mcnemar's Test P-Value : 0.001787        
                                          
            Sensitivity : 0.5603          
            Specificity : 0.8302          
         Pos Pred Value : 0.6402          
         Neg Pred Value : 0.7779          
             Prevalence : 0.3503          
         Detection Rate : 0.1963          
   Detection Prevalence : 0.3066          
      Balanced Accuracy : 0.6953          
                                          
       'Positive' Class : 1               
                              

In [136]:
test_pred <- as.integer(predict(employee_ridge_max_AUC, newx = model_matrix_X_test, newy = matrix_Y_test) > best_threshold)

employee_confusion_matrix_test <- 
    confusionMatrix(
    data = as.factor(test_pred),
    reference = as.factor(employee_test$LeaveOrNot),
    positive = "1"
)
employee_confusion_matrix_test

Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 753 215
         1 154 274
                                          
               Accuracy : 0.7357          
                 95% CI : (0.7117, 0.7586)
    No Information Rate : 0.6497          
    P-Value [Acc > NIR] : 3.597e-12       
                                          
                  Kappa : 0.4021          
                                          
 Mcnemar's Test P-Value : 0.001787        
                                          
            Sensitivity : 0.5603          
            Specificity : 0.8302          
         Pos Pred Value : 0.6402          
         Neg Pred Value : 0.7779          
             Prevalence : 0.3503          
         Detection Rate : 0.1963          
   Detection Prevalence : 0.3066          
      Balanced Accuracy : 0.6953          
                                          
       'Positive' Class : 1               
                              

In [137]:
ridge_acc <- employee_confusion_matrix_test$overall['Accuracy']
ridge_acc

# Comparison

In [138]:
data.frame(x = c("LASSO", "OLS with Forward", "Ridge"),  y = c(lasso_acc, ols_forward_acc, ridge_acc))

x,y
<chr>,<dbl>
LASSO,0.7320917
OLS with Forward,0.7342407
Ridge,0.7356734


The performance of the model is fairly consistent with all approaches.