In [None]:

library(tidyverse)

players <- read_csv("players.csv")
sessions <- read_csv("sessions.csv")

sessions <- sessions %>%
  mutate(start = lubridate::dmy_hm(start_time),
         end = lubridate::dmy_hm(end_time),
         duration = as.numeric(difftime(end, start, units = "mins")))

usage <- sessions %>%
  group_by(hashedEmail) %>%
  summarise(total_minutes = sum(duration, na.rm = TRUE), .groups = "drop")

df <- players %>%
  left_join(usage, by = "hashedEmail") %>%
  mutate(total_minutes = replace_na(total_minutes, 0),
         subscribe = as.numeric(subscribe)) %>%
  filter(!is.na(Age))


In [None]:

ggplot(df, aes(x = experience, y = total_minutes, fill = experience)) +
  geom_boxplot() +
  labs(title = "Figure 1: Experience vs Total Play Time (mins)") +
  theme_minimal()


In [None]:

ggplot(df, aes(x = Age, y = total_minutes)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Figure 2: Age vs Total Play Time (mins)") +
  theme_minimal()


In [None]:

model <- lm(total_minutes ~ Age + subscribe + played_hours, data = df)
summary(model)
