In [None]:
import pandas as pd
import glob
import matplotlib.pyplot as plt

from src.visual_utils import plot_data_bar, plot_data_line, plot_data_line_multiple, plot_by_segment
from src.stats_utils import calculate_retention, print_basic_stats

In [None]:
files = glob.glob("data/raw/*.csv.gz")

# Import data
dfs = [pd.read_csv(f, compression="gzip") for f in files]
df = pd.concat(dfs)

# Convert date values
df["install_date"] = pd.to_datetime(df["install_date"])
df["event_date"] = pd.to_datetime(df["event_date"])

# Add a new column for later use.
df["days_after_install"] = (df["event_date"] - df["install_date"]).dt.days

In [None]:
platforms = df["platform"].unique().tolist()

In [None]:
def total_revenue(segment_df: pd.DataFrame) -> pd.Series:
    ad_revenue = segment_df.groupby("event_date")["ad_revenue"].sum()
    iap_revenue = segment_df.groupby("event_date")["iap_revenue"].sum()
    total_revenue = ad_revenue + iap_revenue
    return total_revenue

plot_by_segment(df, platforms, "platform", total_revenue)

In [None]:
def match_count(segment_df: pd.DataFrame) -> pd.Series:
    match_start_count = segment_df.groupby("event_date")["match_start_count"].sum()
    return match_start_count

plot_by_segment(df, platforms, "platform", match_count)