### Importing Libraries

In [None]:
library(tidyverse)
library(repr)
library(tidymodels)
library(cowplot)
options(repr.matrix.max.rows = 6)

### Loading and Cleaning Data

In [None]:
player_data_raw <- read_csv("data/players.csv")

player_data_clean <- player_data_raw |>
    na.omit() |>
    mutate(age = Age) |>
    select(age, played_hours) 

### Regression

set.seed(2000) # Set a seed so that we can compare knn regression to knnear regression to see which one is better
player_split <- initial_split(player_data_clean,prop = 3/4, strata = played_hours)
player_training <- training(player_split)
player_testing <- testing(player_split)

In [None]:
#knn regression
knn_player_spec <- nearest_neighbor(weight_func = "rectangular") |> 
       set_engine("kknn") |>
       set_mode("regression") 

knn_player_recipe <- recipe( played_hours ~ age, data = player_training) |>
       step_scale(all_predictors()) |>
       step_center(all_predictors())

knn_player_fit <- workflow() |>
       add_recipe(knn_player_recipe) |>
       add_model(knn_player_spec) |>
       fit(player_training)


#linear regression
lin_player_spec <- linear_reg() |>
       set_engine("lm") |>
      set_mode("regression")

lin_player_recipe <- recipe(played_hours ~ age, data = player_training)

lin_player_fit <- workflow() |>
       add_recipe(lin_player_recipe) |>
       add_model(lin_player_spec) |>
       fit(player_training)

### Displaying Lin Reg

In [None]:
options(repr.plot.width = 8, repr.plot.height = 7)

 lin_player_preds <- lin_player_fit |>
   predict(player_training) |>
   bind_cols(player_training)
 lin_player_predictions <- lin_player_preds |>
     ggplot(aes(x = age, y = played_hours)) +
         geom_point(alpha = 0.4) +
         geom_line(
             mapping = aes(x = age, y = .pred), 
             color = "blue") +
         xlab("Age (yrs)") +
         ylab("Total Playtime (hrs)") +
         theme(text = element_text(size = 20))

# your code here
lin_player_predictions



### Checking Which One is better

 lin_player_test_results <- lin_player_fit |>
          predict(player_testing) |>
          bind_cols(player_testing) |>
          metrics(truth = played_hours, estimate = .pred)
lin_player_rmspe <- lin_player_test_results |>
          filter(.metric == "rmse") |>
          select(.estimate) |>
          as.numeric()
lin_player_test_results
lin_player_rmspe


In [None]:
 knn_player_test_results <- knn_player_fit |>
          predict(player_testing) |>
          bind_cols(player_testing) |>
          metrics(truth = played_hours, estimate = .pred)
knn_player_rmspe <- knn_player_test_results |>
          filter(.metric == "rmse") |>
          select(.estimate) |>
          as.numeric()
knn_player_test_results
knn_player_rmspe

I'm coding using vscode rn, which doesn't wanna work with r, I have already ran this using jupyter, and everything should work. However, jupyter might try to run the code as plaintext rn, so if it doesnt work, double check that its recognized as r code. Also found lin reg to be more accurate, I checked both knn and lin reg to be safe though.