In [None]:
#load libraries
library(tidyverse)
library(dplyr)
library(ggplot2)

In [None]:
#read in data
players <- read_csv("project_data/players.csv")
sessions <- read_csv("project_data/sessions.csv")

In [None]:
# Inspect structure and variables
glimpse(players)
glimpse(sessions)

In [None]:
library(lubridate)
# Add a column for session duration (in minutes) for each session
# - Converts start_time and end_time to date-time format
# - Calculates the difference in minutes between end and start

sessions <- sessions |>
  mutate(
    start_time = ymd_hms(start_time), # Convert start_time to date-time
    end_time = ymd_hms(end_time),     # Convert end_time to date-time
    session_length = as.numeric(difftime(end_time, start_time, units = "mins"))
  )

In [None]:
# Summarize each player's session activity:
# - total_sessions: total number of sessions for each player
# - total_playtime: total minutes played for each player
# - avg_session_length: average session length (in minutes) for each player

player_summary <- sessions |>
  group_by(hashedEmail) |>
  summarise(
    total_sessions = n(),
    total_playtime = sum(session_length, na.rm = TRUE),
    avg_session_length = mean(session_length, na.rm = TRUE)
  )
# Get summary statistics for the new player_summary table
player_summary |>
  slice_head(n = 5)