In [1]:
import argparse
import os
from datetime import date, datetime, timedelta
from pprint import pprint

import dash_auth
import mpld3
import numpy as np
import pandas as pd
import plotly.express as px
import pytz
from dash import Dash, dcc, html
from plotnine import *
from pymongo import MongoClient

import sys
sys.path.append("./app/")
from log import get_logger

The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html
The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc


In [2]:
# ACTIVITY_COOLOFF_MINS = 10
# NUM_SESSIONS_THRESHOLD = 5
LOOKBACK_PERIOD_DAYS = 10
# SESSION_COUNT_THRESHOLDS = [1, 3, 5]
CONNECTION_TEMPLATE = """mongodb://{user}:{password}@cluster0-shard-00-00.dbkij.mongodb.net:27017,cluster0-shard-00-01.dbkij.mongodb.net:27017,cluster0-shard-00-02.dbkij.mongodb.net:27017/myFirstDatabase?authSource=admin&replicaSet=atlas-xn7hxv-shard-0&w=majority&readPreference=primary&appname=MongoDB%20Compass&retryWrites=true&ssl=true"""
logger = get_logger(__name__)

mongo_user = os.getenv("MONGO_USER")
mongo_password = os.getenv("MONGO_PASSWORD")
if not mongo_user or not mongo_password:
    logger.fatal("MONGO_USER or MONGO_PASSWORD not set!")
    exit(1)


In [3]:
dt = datetime.today().strftime("%Y-%m-%d")
window = LOOKBACK_PERIOD_DAYS

In [4]:
client = MongoClient(
    CONNECTION_TEMPLATE.format(user=mongo_user, password=mongo_password),
    unicode_decode_error_handler='ignore',
)

main_db = client.main
events_collection = main_db.log_events
user_collection = main_db.users


end = datetime.strptime(
    dt, "%Y-%m-%d").astimezone(pytz.timezone("US/Pacific"))

In [5]:
print(end.date(), window)

2022-11-25 10


In [6]:
# get user data

cursor = user_collection.find()
df_users = pd.DataFrame(list(cursor))
df_users = df_users.rename(columns={"_id": "user_id"}, errors="raise")
df_users = df_users[["user_id", "email", "name"]]
df_users["user_id"] = df_users["user_id"].astype(str)

print(df_users.shape)
df_users.head()

(746, 3)


Unnamed: 0,user_id,email,name
0,61463e014cf3dfd537c399fb,john@generaltask.com,John Reinstra
1,6146889f4cf3dfd537c8d987,jack_hamilton@me.com,Jack Hamilton
2,616cd20a2dbdcd0a7ba1642c,jreinstra@gmail.com,John Reinstra
3,61722cb1e1abefac8feddc31,scottmai702@gmail.com,Scott Mai
4,617343f4e1abefac8f00ab42,nolan1299@gmail.com,Nolan Jimenez


In [11]:
# generate event level data

# query events table
date_filter = {"created_at": {
    "$gt": end - timedelta(days=window), "$lt": end}}
cursor = events_collection.find(date_filter)
events_df = pd.DataFrame(list(cursor))
events_df["user_id"] = events_df["user_id"].astype(str)
print(events_df.shape)

# merge with users
events_df = events_df.merge(df_users, on="user_id", how="outer")
print(events_df.shape)

# # add PST timestamps and sort by user and timestamps
# events_df = events_df.rename(columns={"_id": "event_id"}, errors="raise")
# events_df["time_since_previous_event_this_day"] = (
#     events_df
#     .sort_values(by=["user_id", "created_at"])
#     .groupby(by='user_id')["created_at"]
#     .diff()
# )
# events_df["ts_pst"] = events_df.created_at.dt.tz_localize(
#     pytz.utc).dt.tz_convert('US/Pacific')
# events_df["dt"] = events_df.ts_pst.dt.date  # date in PST

# print(events_df.shape)
# events_df.head(3)

(2348687, 4)
(2348925, 6)


In [16]:
churned_users = (
    events_df
    [events_df.event_type.isna()]
    # .groupby("user_id")
    # .agg({})
)

In [17]:
churned_users

Unnamed: 0,_id,user_id,event_type,created_at,email,name
2348687,,616cd20a2dbdcd0a7ba1642c,,NaT,jreinstra@gmail.com,John Reinstra
2348688,,61722cb1e1abefac8feddc31,,NaT,scottmai702@gmail.com,Scott Mai
2348689,,617343f4e1abefac8f00ab42,,NaT,nolan1299@gmail.com,Nolan Jimenez
2348690,,6194444356e4da3c4754c8b4,,NaT,najimene@usc.edu,Nolan Jimenez
2348691,,61a9106e56e4da3c47f36b80,,NaT,mlandolfi99@gmail.com,Michael l
...,...,...,...,...,...,...
2348920,,637388525b2a48f782b884ed,,NaT,abhi010196@gmail.com,Abhishek Singh
2348921,,6373cf155b2a48f782d27462,,NaT,rubenaramirez5@gmail.com,Ruben Ramirez (RubesWorld)
2348922,,6373d70c5b2a48f782d78679,,NaT,alexandrajdebow@gmail.com,Ali Debow
2348923,,6382069d77a071471dd255a0,,NaT,danvernon@gmail.com,Dan


In [19]:
churned_users[['name', 'email', 'user_id']].to_csv("./churned_users_nov_26_10_day_lookback.csv")

In [10]:
df_users

Unnamed: 0,user_id,email,name
0,61463e014cf3dfd537c399fb,john@generaltask.com,John Reinstra
1,6146889f4cf3dfd537c8d987,jack_hamilton@me.com,Jack Hamilton
2,616cd20a2dbdcd0a7ba1642c,jreinstra@gmail.com,John Reinstra
3,61722cb1e1abefac8feddc31,scottmai702@gmail.com,Scott Mai
4,617343f4e1abefac8f00ab42,nolan1299@gmail.com,Nolan Jimenez
...,...,...,...
741,63807541f4cf4fb4804652b0,tilmannb@gmail.com,Tilmann Böhme
742,6380d68ef4cf4fb480774eb9,pjones@tangamgaming.com,Phil Jones
743,638158b577a071471d8fdcd2,afroman144@gmail.com,Jon Doe
744,6382069d77a071471dd255a0,danvernon@gmail.com,Dan
