In [None]:
library(tidyverse)
library(repr)
library(tidymodels)
library(RColorBrewer)
options(repr.matrix.max.rows = 5)
set.seed(69)

In [None]:
url <- "https://raw.githubusercontent.com/2shar86/DSCI-100/main/nba_2022-23_all_stats_with_salary.csv"
nba_salaries <- read_csv(url)

nba_salaries

colnames(nba_salaries)

In [None]:
clean_nba_salaries <- nba_salaries |>
    select(-Position, -Age, -Team, -'Player Name', -'...1', -'USG%', -OWS, -DWS, -WS, -"WS/48", -OBPM, -DBPM, -BPM, -VORP) |>
    na.omit()
clean_nba_salaries
sum(map_df(clean_nba_salaries, is.na))
str(clean_nba_salaries)

In [None]:
salaries_split <- initial_split(clean_nba_salaries, prop = 0.75, strata = Salary)

salaries_training <- training(salaries_split)
salaries_testing <- testing(salaries_split)

head(salaries_training)

nrow(salaries_training)
nrow(salaries_testing)

In [36]:
# create an empty tibble to store the results
accuracies <- tibble(size = integer(),
                     model_string = character(),
                     accuracy = numeric())

# create a model specification
knn_spec <- nearest_neighbor(weight_func = "rectangular",
                             neighbors = tune()) |>
     set_engine("kknn") |>
     set_mode("regression")

# create a 5-fold cross-validation object
salary_vfold <- vfold_cv(salaries_training, v = 5, strata = Salary)

# store the total number of predictors
n_total <- length(names)

# stores selected predictors
selected <- c()

# for every size from 1 to the total number of predictors
for (i in 1:n_total) {
    # for every predictor still not added yet
    accs <- list()
    models <- list()
    for (j in 1:length(names)) {
        # create a model string for this combination of predictors
        preds_new <- c(selected, names[[j]])
        model_string <- paste("Class", "~", paste(preds_new, collapse="+"))

        # create a recipe from the model string
        salary_recipe <- recipe(as.formula(model_string),
                                data = salaries_training) |>
                          step_scale(all_predictors()) |>
                          step_center(all_predictors())

        # tune the K-NN classifier with these predictors,
        # and collect the accuracy for the best K
        acc <- workflow() |>
          add_recipe(salary_recipe) |>
          add_model(knn_spec) |>
          tune_grid(resamples = salary_vfold, grid = 10) |>
          collect_metrics() |>
          filter(.metric == "accuracy") |>
          summarize(mx = max(mean))
        acc <- acc$mx |> unlist()

        # add this result to the dataframe
        accs[[j]] <- acc
        models[[j]] <- model_string
    }
    jstar <- which.max(unlist(accs))
    accuracies <- accuracies |>
      add_row(size = i,
              model_string = models[[jstar]],
              accuracy = accs[[jstar]])
    selected <- c(selected, names[[jstar]])
    names <- names[-jstar]
}
accuracies

ERROR: Error in names[[j]]: object of type 'builtin' is not subsettable
