In [None]:
# Classification template 

set.seed(....)
df <- read_csv("...")
df_split <- initial_split(df,prop=...,strata = ...)
df_training <- training(df_split)
df_testing <- testing(df_split)

##testing the number of neighbors 
knn_spec_tune <- nearest_neighbor(weight_func = "rectangular",neighbors = tune())|>
  set_engine("kknn")|>
  set_mode("classification")

##recipe of data 
df_recipe <- recipe(...~... ,data=df_training)|>
  step_center(all_predictors())|>
  step_scale(all_predictors())

##cross-validation fold
df_vfold <- vfold_cv(df_training, v = ..., strata = ...)

gridvals = tibble(neighbors = seq(from=...,to=..., by=...))

## collect the accuracies of different neighbour and give mean accuracy from validation sets
knn_tune_result <- workflow()|>
  add_recipe(df_recipe)|>
  add_model(df_model)|>
  tune_grid(resamples = df_vfold, grid = gridvals)|>
  collect_metrics()|>
  filter(.metric=="accuracy")|>
  arrange(desc(mean))

##pull the best number of neighbors
best_k <- knn_tune_result|>
    slice(1)|>
    pull(neighbor)

knn_spec <- nearest_neighbor(weight_func = "rectangular",neighbors = best_k)|>
  set_engine("kknn")|>
  set_mode("classification")

##fit the prediction model
df_fit <- workflow()|>
  add_recipe(df_recipe)|>
  add_model(knn_spec)|>
  fit(data=df_training)

##predict the test set 
result <- df_fit|>
  predict(df_testing)|>
  bind_cols(df_testing)

result_metrics <- result|>
    metrics(truth=...,estimate = .pred_class)|>
    filter(.metric="accuracy")

result_confusion <- result|>
    conf_mat(truth=...,estimate = .pred_class)

In [2]:
# ##prediction template 

# set.seed(....)
# df <- read_csv("...")
# df_split <- initial_split(df,prop=...,strata = ...)
# df_training <- training(df_split)
# df_testing <- testing(df_split)

# ##testing the number of neighbors 
# knn_tune <- nearest_neighbor(weight_func = "rectangular",neighbors = tune())|>
#   set_engine("kknn")|>
#   set_mode("regression")

# ##recipe of data 
# df_recipe <- recipe(...~... ,data=df_training)

# ##cross-validation fold
# df_vfold <- vfold_cv(df_training, v = ..., strata = ...)

# gridvals = tibble(neighbors = seq(from=...,to=..., by=...))


# ##produce the metrics of cross validation sets with different number of neighbours
# tune_result <- workflow()|>
#   add_model(knn_tune)|>
#   add_recipe(df_recipe)|>
#   tune_grid(resamples=df_vfold,grid=gridvals)|>
#   collect_metrics()

# ##produce the rmse of cross validation sets
# tune_rmse <- tune_result|>
#     filter(.metric=="rmse")

# ##pick the number of neighbors that has the minimal rmse
# best_k <- tune_rmse|>
#   arrange(mean)|>
#   slice(1)|>
#   pull(neighbors)

# knn_spec <- nearest_neighbor(weight_func="rectangular",neighbors = best_k)|>
#   set_engine("kknn")|>
#   set_mode("regression")

# best_fit <- workflow()|>
#   add_recipe(df_recipe)|>
#   add_model(knn_spec)|>
#   fit(data=df_training)

# prediction_result <- best_fit|>
#   predict(df_testing)|>
#   bind_cols(df_testing)

# ##produce the rms
# prediction_rmspe <- prediction_result|>
#   metrics(truth=... ,estimate=.pred)|>
#   filter(.metric=="rmse")
#   pull(.estimate)

# training_rmse <- best_fit |>
#   predict(df_training) |>
#   bind_cols(df_training)|>
#   metrics(truth=... ,estimate=.pred)|>
#   filter(.metric=="rmse")
#   pull(.estimate)

# ##regression plot 

# training_pred -> best_fit|>
#     predict(df_training)|>
#     bind_cols(df_training)

# prediction_plot <- training_pred|>
#   ggplot(aes(x=...,y=...))+
#   geom_point()+
#   geom_line(data=training_preds,aes(x=...,y=.pred),col="blue")