In [1]:
library(tidyverse)
library(repr)
library(tidymodels)
library(rvest)
library(readxl)
library(RColorBrewer)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.3     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0

“package ‘ggplot2’ was built under R version 4.0.1”
“package ‘tibble’ was built under R version 4.0.2”
“package ‘tidyr’ was built under R version 4.0.2”
“package ‘dplyr’ was built under R version 4.0.2”
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

“package ‘tidymodels’ was built under R version 4.0.2”
── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 0.1.1 ──

[32m✔

In [41]:
download.file(destfile = "user_knowledge.xls", url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00257/Data_User_Modeling_Dataset_Hamdi%20Tolga%20KAHRAMAN.xls")

user_training <- read_excel("user_knowledge.xls", sheet = 2) %>%
            select(STG, LPR, PEG, UNS, STR) %>%
            rename(study_time_goal = STG,
                  exam_performance_related = LPR,
                  exam_performance_goal = PEG,
                  knowledge_level = UNS,
                  study_time_related = STR) %>%
            mutate(knowledge_level = recode(knowledge_level, very_low = '1', Low = '2', Middle =  '3', High = '4' )) %>%
            mutate(knowledge_level = as.numeric(knowledge_level)) 
user_training

user_testing <- read_excel("user_knowledge.xls", sheet = 3) %>%
            select(STG, LPR, PEG, UNS, STR) %>%
            rename(study_time_goal = STG,
                  exam_performance_related = LPR,
                  exam_performance_goal = PEG,
                  knowledge_level = UNS,
                  study_time_related = STR) %>%
            mutate(knowledge_level = recode(knowledge_level, 'Very Low' = '1', Low = '2', Middle =  '3', High = '4' )) %>%
            mutate(knowledge_level = as.numeric(knowledge_level))
user_testing

New names:
* `` -> ...7
* `` -> ...8



study_time_goal,exam_performance_related,exam_performance_goal,knowledge_level,study_time_related
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
0.00,0.00,0.00,1,0.00
0.08,0.24,0.90,4,0.10
0.06,0.25,0.33,2,0.05
0.10,0.65,0.30,3,0.15
0.08,0.98,0.24,2,0.08
0.09,0.10,0.66,3,0.40
0.10,0.29,0.56,3,0.43
0.15,0.40,0.01,1,0.34
0.20,0.72,0.25,2,0.35
0.00,0.20,0.85,4,0.50


New names:
* `` -> ...7
* `` -> ...8



study_time_goal,exam_performance_related,exam_performance_goal,knowledge_level,study_time_related
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
0.00,0.26,0.05,1,0.50
0.05,0.60,0.14,2,0.55
0.08,0.60,0.85,4,0.63
0.20,0.67,0.85,4,0.68
0.22,0.30,0.90,4,0.90
0.14,0.50,0.30,2,0.70
0.16,0.50,0.50,3,0.80
0.12,0.68,0.15,2,0.75
0.20,0.77,0.80,4,0.88
0.16,0.10,0.07,1,0.01


In [54]:
set.seed(2357)

# data already standardized
knowledge_recipe <- recipe(exam_performance_goal ~ study_time_goal + exam_performance_related + knowledge_level, 
                          data = user_training) 

training_spec <- nearest_neighbor(weight_func = "rectangular", neighbors = tune()) %>%
                set_engine("kknn") %>%
                set_mode("regression")

training_vfold <- vfold_cv(user_training, v = 5, strata = exam_performance_goal)
neighbors <- tibble(neighbors = 1:20)

training_fit <- workflow() %>%
                add_recipe(knowledge_recipe) %>%
                add_model(training_spec) %>%
                tune_grid(resamples = training_vfold, grid = neighbors) %>%
                collect_metrics() %>%
                filter(.metric == "rmse") %>%
                arrange(mean) %>%
                slice(1)
training_fit

neighbors,.metric,.estimator,mean,n,std_err,.config
<int>,<chr>,<chr>,<dbl>,<int>,<dbl>,<chr>
4,rmse,standard,0.06705298,5,0.005270866,Model04


In [55]:
knowledge_spec <- training_spec <- nearest_neighbor(weight_func = "rectangular", neighbors = 4) %>%
                set_engine("kknn") %>%
                set_mode("regression")

knowledge_fit <- workflow() %>%
                add_recipe(knowledge_recipe) %>%
                add_model(knowledge_spec) %>%
                fit(user_training)

knowledge_results <- predict(knowledge_fit, user_testing) %>%
                    bind_cols(user_testing) %>%
                    metrics(truth = exam_performance_goal, estimate = .pred) %>%
                    filter(.metric == "rmse")
knowledge_results

.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
rmse,standard,0.1055438
