In [None]:
library(tidyverse)
library(repr)
library(infer)
library(cowplot)
library(tidymodels)
options(repr.matrix.max.rows = 6)

**DATA WRANGLING**

In [None]:
players <- read_csv("data/players.csv") |>
    mutate(subscribe = as_factor(subscribe)) |>
    filter(!is.na(Age))
players

In [None]:
players_wrangled <- players |> select(subscribe, played_hours, Age)
#sessions_wrangled

In [None]:
players_wrangled_true <- players_wrangled |> filter(subscribe == TRUE)
players_wrangled_true

In [None]:
players_wrangled_false <- players_wrangled |> filter(subscribe == FALSE)
players_wrangled_false

In [None]:
players_wrangled_true_avghours <- players_wrangled_true |> summarise(mean_hours = mean(played_hours))
players_wrangled_true_avghours

In [None]:
players_wrangled_false_avghours <- players_wrangled_false |> summarise(mean_hours = mean(played_hours))
players_wrangled_false_avghours

In [None]:
subscribed_player_count <- players_wrangled_true |> nrow()
subscribed_player_count

In [None]:
nonsubscribed_player_count <- players_wrangled_false |> nrow()
nonsubscribed_player_count

In [None]:
players_split <- initial_split(players_wrangled, prop = 0.75, strata = subscribe)  
players_train <- training(players_split)   
players_test <- testing(players_split)
players_train
players_test

In [None]:
subscribe_recipe <- recipe(subscribe ~ played_hours + Age, data = players_wrangled) |>
  step_center(all_predictors()) |>  
  step_scale(all_predictors())

subscribe_model <- nearest_neighbor(weight_func = "rectangular", neighbors = 5) |>
      set_engine("kknn") |>
      set_mode("classification")

subscribe_fit <- workflow() |>
      add_recipe(subscribe_recipe) |>
      add_model(subscribe_model) |>
      fit(data = players_train)

In [None]:
players_predictions <- predict(subscribe_fit, players_test)
players_test_predictions <- bind_cols(players_test, players_predictions)

players_test_predictions

In [None]:
players_test
players_predictions

In [None]:
players_dist <- players_wrangled |>
    ggplot(aes(x = played_hours, fill = subscribe)) +
    geom_histogram(binwidth = 1, color = "fill") +
    scale_x_continuous(limits = c(-1, 20)) +
    labs(x = "Number of Hours Played", title = "Distribution of Players")

players_dist

In [None]:
new_obs <- tibble(played_hours = 1000,Age =90)
class_prediction <- predict(subscribe_fit, new_obs)
class_prediction