In [None]:
import argparse
import os
from datetime import date, datetime, timedelta
from pprint import pprint

import dash_auth
import mpld3
import numpy as np
import pandas as pd
import plotly.express as px
import pytz
from dash import Dash, dcc, html
from plotnine import *
from pymongo import MongoClient

import sys
sys.path.append("./app/")
from log import get_logger

In [None]:
# ACTIVITY_COOLOFF_MINS = 10
# NUM_SESSIONS_THRESHOLD = 5
LOOKBACK_PERIOD_DAYS = 20
# SESSION_COUNT_THRESHOLDS = [1, 3, 5]
CONNECTION_TEMPLATE = """mongodb://{user}:{password}@cluster0-shard-00-00.dbkij.mongodb.net:27017,cluster0-shard-00-01.dbkij.mongodb.net:27017,cluster0-shard-00-02.dbkij.mongodb.net:27017/myFirstDatabase?authSource=admin&replicaSet=atlas-xn7hxv-shard-0&w=majority&readPreference=primary&appname=MongoDB%20Compass&retryWrites=true&ssl=true"""
logger = get_logger(__name__)

mongo_user = os.getenv("MONGO_USER")
mongo_password = os.getenv("MONGO_PASSWORD")
if not mongo_user or not mongo_password:
    logger.fatal("MONGO_USER or MONGO_PASSWORD not set!")
    exit(1)


In [None]:
dt = datetime.today().strftime("%Y-%m-%d")
window = LOOKBACK_PERIOD_DAYS

In [None]:
client = MongoClient(
    CONNECTION_TEMPLATE.format(user=mongo_user, password=mongo_password),
    unicode_decode_error_handler='ignore',
)

main_db = client.main
events_collection = main_db.log_events
user_collection = main_db.users


end = datetime.strptime(
    dt, "%Y-%m-%d").astimezone(pytz.timezone("US/Pacific"))

In [None]:
print(end.date(), window)

In [None]:
# get user data

cursor = user_collection.find()
df_users = pd.DataFrame(list(cursor))
df_users = df_users.rename(columns={"_id": "user_id"}, errors="raise")
df_users = df_users[["user_id", "email", "name"]]
df_users["user_id"] = df_users["user_id"].astype(str)

print(df_users.shape)
df_users.head()

In [None]:
# generate event level data

# query events table
date_filter = {"created_at": {
    "$gt": end - timedelta(days=window), "$lt": end}}
cursor = events_collection.find(date_filter)
events_df = pd.DataFrame(list(cursor))
events_df["user_id"] = events_df["user_id"].astype(str)
print(events_df.shape)

# merge with users
events_df = events_df.merge(df_users, on="user_id", how="outer")
print(events_df.shape)

# # add PST timestamps and sort by user and timestamps
# events_df = events_df.rename(columns={"_id": "event_id"}, errors="raise")
# events_df["time_since_previous_event_this_day"] = (
#     events_df
#     .sort_values(by=["user_id", "created_at"])
#     .groupby(by='user_id')["created_at"]
#     .diff()
# )
# events_df["ts_pst"] = events_df.created_at.dt.tz_localize(
#     pytz.utc).dt.tz_convert('US/Pacific')
# events_df["dt"] = events_df.ts_pst.dt.date  # date in PST

# print(events_df.shape)
# events_df.head(3)

In [None]:
churned_users = (
    events_df
    [events_df.event_type.isna()]
    # .groupby("user_id")
    # .agg({})
)
churned_users

In [None]:
active_users = (
    events_df
    [events_df.event_type.notna()]
    .groupby("user_id")
    .first()
    # .agg({})
)
active_users

In [None]:
churned_without_active_account = (
    churned_users
    [~churned_users.name.isin(set(active_users.name))]
)
churned_without_active_account

In [None]:
churned_without_active_account

In [None]:
churned_without_active_account[['name', 'email', 'user_id']].to_csv("./churned_users_dec_1_20_day_lookback.csv")

In [None]:
df_users