In [1]:
library(tidyverse)
library(repr)
library(tidymodels)
library(GGally)
library(ISLR)
library(dplyr)
options(repr.matrix.max.rows = 6)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.1.1 ──

[32m✔[39m [34mbroom       [39m 1.0.6     [32m✔[39m [34mrsample     [39

In [2]:
players <- read_csv("https://raw.githubusercontent.com/Chalkkk/dsci-final-group-project/refs/heads/main/players.csv", show_col_types = FALSE) |>
           as_tibble()
players

experience,subscribe,hashedEmail,played_hours,name,gender,Age
<chr>,<lgl>,<chr>,<dbl>,<chr>,<chr>,<dbl>
Pro,TRUE,f6daba428a5e19a3d47574858c13550499be23603422e6a0ee9728f8b53e192d,30.3,Morgan,Male,9
Veteran,TRUE,f3c813577c458ba0dfef80996f8f32c93b6e8af1fa939732842f2312358a88e9,3.8,Christian,Male,17
Veteran,FALSE,b674dd7ee0d24096d1c019615ce4d12b20fcbff12d79d3c5a9d2118eb7ccbb28,0.0,Blake,Male,17
⋮,⋮,⋮,⋮,⋮,⋮,⋮
Amateur,FALSE,d572f391d452b76ea2d7e5e53a3d38bfd7499c7399db299bd4fedb06a46ad5bb,0.0,Dylan,Prefer not to say,17
Amateur,FALSE,f19e136ddde68f365afc860c725ccff54307dedd13968e896a9f890c40aea436,2.3,Harlow,Male,17
Pro,TRUE,d9473710057f7d42f36570f0be83817a4eea614029ff90cf50d8889cdd729d11,0.2,Ahmed,Other,


In [3]:
players <- players |>
        rename(age = Age) |>
        select(experience, played_hours, age)
players

experience,played_hours,age
<chr>,<dbl>,<dbl>
Pro,30.3,9
Veteran,3.8,17
Veteran,0.0,17
⋮,⋮,⋮
Amateur,0.0,17
Amateur,2.3,17
Pro,0.2,


In [4]:
# by all data points
hours_played_sum <- players |>
                    summarize(max_played_hours = max(played_hours, na.rm = TRUE),
                              min_played_hours = min(played_hours, na.rm = TRUE),
                              total_played_hours = sum(played_hours, na.rm = TRUE),
                              average_played_hours = mean(played_hours, na.rm = TRUE),
                              median_played_hours = median(played_hours, na.rm = TRUE))                            
hours_played_sum

max_played_hours,min_played_hours,total_played_hours,average_played_hours,median_played_hours
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
223.1,0,1145.8,5.845918,0.1


In [5]:
# Total and average played hours of players by experience level
hours_experience_sum <- players |>
                    group_by(experience) |>
                    summarize(total_played_hours = sum(played_hours, na.rm = TRUE),
                              average_played_hours = mean(played_hours, na.rm = TRUE),
                              median_played_hours = median(played_hours, na.rm = TRUE))
hours_experience_sum

experience,total_played_hours,average_played_hours,median_played_hours
<chr>,<dbl>,<dbl>,<dbl>
Amateur,379.1,6.0174603,0.1
Beginner,43.7,1.2485714,0.1
Pro,36.4,2.6,0.3
Regular,655.5,18.2083333,0.1
Veteran,31.1,0.6479167,0.1


In [6]:
# Maximum and minimum played hours for each experience level
hours_experience_range <- players |>
                    group_by(experience) |>
                    summarize(max_played_hours = max(played_hours, na.rm = TRUE),
                              min_played_hours = min(played_hours, na.rm = TRUE))
hours_experience_range

experience,max_played_hours,min_played_hours
<chr>,<dbl>,<dbl>
Amateur,150.0,0
Beginner,23.7,0
Pro,30.3,0
Regular,223.1,0
Veteran,12.5,0


In [7]:
# Total and average played hours of players by age
hours_age_sum <- players |>
                    group_by(age) |>
                    summarize(total_played_hours = sum(played_hours, na.rm = TRUE),
                              average_played_hours = mean(played_hours, na.rm = TRUE),
                              median_played_hours = median(played_hours, na.rm = TRUE))
hours_age_sum

age,total_played_hours,average_played_hours,median_played_hours
<dbl>,<dbl>,<dbl>,<dbl>
8,0.3,0.3,0.3
9,30.3,30.3,30.3
10,3.6,3.6,3.6
⋮,⋮,⋮,⋮
49,18.5,18.50,18.50
50,0.0,0.00,0.00
,0.3,0.15,0.15


In [8]:
# Maximum and minimum played hours for each age
hours_age_range <- players |>
                    group_by(age) |>
                    summarize(max_played_hours = max(played_hours, na.rm = TRUE),
                              min_played_hours = min(played_hours, na.rm = TRUE))
hours_age_range

age,max_played_hours,min_played_hours
<dbl>,<dbl>,<dbl>
8,0.3,0.3
9,30.3,30.3
10,3.6,3.6
⋮,⋮,⋮
49,18.5,18.5
50,0.0,0.0
,0.2,0.1


In [9]:
hours_age_exp_sum <- players |>
             group_by(age, experience) |>
             summarize(total_played_hours = sum(played_hours, na.rm = TRUE),
                       average_played_hours = mean(played_hours, na.rm = TRUE),
                       median_played_hours = median(played_hours, na.rm = TRUE))
hours_age_exp_sum

[1m[22m`summarise()` has grouped output by 'age'. You can override using the `.groups`
argument.


age,experience,total_played_hours,average_played_hours,median_played_hours
<dbl>,<chr>,<dbl>,<dbl>,<dbl>
8,Regular,0.3,0.3,0.3
9,Pro,30.3,30.3,30.3
10,Regular,3.6,3.6,3.6
⋮,⋮,⋮,⋮,⋮
50,Regular,0.0,0.0,0.0
,Pro,0.2,0.2,0.2
,Regular,0.1,0.1,0.1


In [10]:
hours_age_exp_range <- players |>
             group_by(age, experience) |>
             summarize(max_played_hours = max(played_hours, na.rm = TRUE),
                       min_played_hours = min(played_hours, na.rm = TRUE))
hours_age_exp_range

[1m[22m`summarise()` has grouped output by 'age'. You can override using the `.groups`
argument.


age,experience,max_played_hours,min_played_hours
<dbl>,<chr>,<dbl>,<dbl>
8,Regular,0.3,0.3
9,Pro,30.3,30.3
10,Regular,3.6,3.6
⋮,⋮,⋮,⋮
50,Regular,0.0,0.0
,Pro,0.2,0.2
,Regular,0.1,0.1


In [11]:
hours_exp_age_sum <- players |>
             group_by(experience, age) |>
             summarize(total_played_hours = sum(played_hours, na.rm = TRUE),
                       average_played_hours = mean(played_hours, na.rm = TRUE),
                       median_played_hours = median(played_hours, na.rm = TRUE))
hours_exp_age_sum

[1m[22m`summarise()` has grouped output by 'experience'. You can override using the
`.groups` argument.


experience,age,total_played_hours,average_played_hours,median_played_hours
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Amateur,11,2.9,2.9,2.9
Amateur,14,17.2,17.2,17.2
Amateur,16,150.0,75.0,75.0
⋮,⋮,⋮,⋮,⋮
Veteran,38,0.2,0.2,0.2
Veteran,44,0.1,0.1,0.1
Veteran,46,0.0,0.0,0.0


In [13]:
hours_exp_age_range <- players |>
             group_by(experience, age) |>
             summarize(max_played_hours = max(played_hours, na.rm = TRUE),
                       min_played_hours = min(played_hours, na.rm = TRUE))
hours_exp_age_range

[1m[22m`summarise()` has grouped output by 'experience'. You can override using the
`.groups` argument.


experience,age,max_played_hours,min_played_hours
<chr>,<dbl>,<dbl>,<dbl>
Amateur,11,2.9,2.9
Amateur,14,17.2,17.2
Amateur,16,150.0,0.0
⋮,⋮,⋮,⋮
Veteran,38,0.2,0.2
Veteran,44,0.1,0.1
Veteran,46,0.0,0.0
