In [None]:
# all libraries needed
library(tidyverse)
library(tidymodels)
library(forcats)
library(repr)
library(dplyr)

In [None]:
# wrangling
player_data <- read_csv('data/players.csv')
session_data <- read_csv('data/sessions.csv')


merged_player_data <- left_join(player_data, session_data, by = 'hashedEmail')

write.csv(merged_player_data, "merged_file.csv", row.names = FALSE)

final_data_players <- merged_player_data |>
    group_by(hashedEmail) |>
    summarize(num_sessions = n(), Age = first(Age), played_hours = first(played_hours)) |>
    mutate(
    age_group = cut(Age,
                    breaks = c(-Inf, 10, 17, 25, Inf),
                    labels = c("0-10", "11-17", "18-25", "26+"),
                    right = TRUE)
  ) |>
    mutate(age_group = as_factor(age_group)) |>
    select(-Age)

final_data_players

In [None]:
options(repr.plot.width = 8, repr.plot.height = 7)
#first visualization for showing a correlation

session_to_hours_plot <- final_data_players |>
    ggplot(aes(x = num_sessions, y = played_hours, color = experience_group)) +
    geom_point() +
    labs(x = 'number of sessions played',
         y = 'total hours played',
         color = 'Minecraft experince') +
    xlim(0, 50) +
    ylim(0)
session_to_hours_plot

#if considering that the experince levels go from amatur, beginner, regular, pro, and then veteran. This data actually shows that experinced players play less. 

In [None]:
# split training and testing data
set.seed(3456) 

player_split <- initial_split(final_data_players, prop = .75, strata = experience_group)  
player_train <- training(player_split)   
player_test <- testing(player_split)

player_train
player_test

In [None]:
# create workflow

player_recipe <- recipe(experience_group ~ num_sessions + played_hours , data = player_train) |>
   step_scale(all_predictors()) |>
   step_center(all_predictors())

knn_spec <- nearest_neighbor(weight_func = 'rectangular', neighbors = 3) |>
      set_engine('kknn') |>
      set_mode('classification')

player_vfold <- vfold_cv(player_train, v = 8, strata = experience_group)

player_resample_fit <- workflow() |>
      add_recipe(player_recipe) |>
      add_model(knn_spec) |>
      fit_resamples(resamples = player_vfold) 

player_metrics <- collect_metrics(player_resample_fit)
player_metrics

In [None]:
# test work



player_test_predictions <- predict(player_fit , player_test) |>
      bind_cols(player_test)

player_test_predictions

player_metrics <- collect_metrics(player_resample_fit)
player_metrics