In [None]:
library(tidyverse)
library(tidymodels)

In [None]:
player_data <- read_csv("https://raw.githubusercontent.com/PraiseBeTheSun/DSCI-100-GROUP-PROJECT/refs/heads/main/data/players.csv")
session_data <- read_csv("https://raw.githubusercontent.com/PraiseBeTheSun/DSCI-100-GROUP-PROJECT/refs/heads/main/data/sessions.csv")

player_data <- player_data |>
mutate(subscribe = as.factor(subscribe),
       gender = as.factor(gender),
       experience = as.factor(experience))

player_means <- player_data |>
summarize(mean_played_hours = mean(played_hours, na.rm = TRUE),
          mean_age = mean(Age, na.rm = TRUE))

player_means |>
pivot_longer(cols = c(mean_played_hours, mean_age), names_to = "variable", values_to = "mean_value")

In [None]:
options(repr.plot.width = 12, repr.plot.height = 8)
ggplot(player_data, aes(x = experience, fill = subscribe)) +
geom_bar(position = "fill") +
labs(title = "Proportion of Subscribers by Experience", x = "Experience Level", y = "proportion of Players")

Summary Table

In [None]:
summary_table <- player_data |>
  summarize(
    n_players = n(),
    mean_age = mean(Age, na.rm = TRUE),
    median_age = median(Age, na.rm = TRUE),
    subscribe_rate = sum(subscribe == "TRUE") / n(),
    mean_hours_sub = mean(played_hours[subscribe == TRUE], na.rm = TRUE),
    mean_hours_nosub = mean(played_hours[subscribe == FALSE], na.rm = TRUE))
summary_table

Age vs Subscription

In [None]:
ggplot(player_data, aes(x = subscribe, y = Age, fill = subscribe)) +
  geom_boxplot() +
  labs(title = "Age Distribution: Subscribers vs Non-Subscribers",
       x = "Subscription Status",
       y = "Age")

Experience vs Subscription â€” Count Plot

In [None]:
ggplot(player_data, aes(x = experience, fill = subscribe)) +
  geom_bar(position = "dodge") +
  labs(title = "Subscription Count by Experience Level",
       x = "Experience Level",
       y = "Number of Players")