In [None]:
library(tidyverse)
library(knitr)

In [None]:
players <- read_csv("players.csv") |>
    mutate(experience = factor(experience),
           gender = factor(gender))
sessions <- read_csv("sessions.csv") |>
    mutate(duration_min = (original_end_time - original_start_time) / (1000 * 60),
           hour = as.numeric(substr(start_time, 12, 13)))

glimpse(players)
glimpse(sessions)

In [None]:
players_means <- players |>
    summarize(mean_played_hours = mean(played_hours, na.rm = TRUE),
              mean_age = mean(Age, na.rm = TRUE))
players_means

In [None]:
sessions_hour <- sessions |>
    mutate(hour = as.numberic(substr(start_time, 12, 13)))
# Subscription proportion by experience level
ggplot(players, aes(x = experience, fill = subscribe)) +
    geom_bar(position = "fill") +
    labs(x = "Experience level", y = "Proportion subscribed", title = "Newsletter subscription by experience")

# Played hours vs subscription
ggplot(players, aes(x = subscribe, y = played_hours)) +
    geom_boxplot() +
    labs(x = "Subscribed", y = "Total played hours", title = "Played hours vs subscription")

#Sessions by hour of day
ggplot(sessions_hour, aes(x = hour)) +
    geom_bar() +
    labs(x = "Hour of day", y = "Number of sessions", title = "Session starts by hour (UTC)")

# Distribution of session duration
ggplot(sessions, aes(x = duration_min)) +
    geom_histogram() +
    labs(x = "Session length (minutes)", y = "Count", title = "Distribution of session duration")

# Per-player session features and plots
per_player <- sessions |>
    group_by(hashedEmail) |>
    summarize(n_sessions = n(), mean_session_min = mean(duration_min, na.rm = TRUE))

player_df <- players |>
    left_join(per_player, by = "hashedEmail")

# Number of sessions vs subscription
ggplot(player_df, aes(x = subscribe, y = n_sessions)) +
    geom_boxplot() +
    labs(x = "Subscribed", y = "Number of sessions", title = "Sessions per player vs subscription")

# Played hours by experience
ggplot(players, aes(x = experience, y = played_hours)) +
    geom_boxplot() +
    labs(x = "Experience level", y = "Total played hours", title = "Played hours by experience level")