In [3]:
import pandas as pd

events_df = pd.read_csv('simple_interview_events.csv')

Unnamed: 0,user_id,event_type,event_time,event_params
0,32001,onboarding_start,2024-01-01T00:01:40,"{""funnel_type"": ""female""}"
1,99564,onboarding_start,2024-01-01T00:01:53,"{""funnel_type"": ""male""}"
2,32001,profile_start,2024-01-01T00:01:58,{}
3,99564,profile_start,2024-01-01T00:02:07,{}
4,71575,onboarding_start,2024-01-01T00:02:18,"{""funnel_type"": ""female""}"


In [6]:
unique_event_types = events_df['event_type'].unique

unique_event_types

array(['onboarding_start', 'profile_start', 'email_submit',
       'paywall_show', 'payment_done', 'experiment_exposure'],
      dtype=object)

In [7]:
funnel_stages = ['onboarding_start', 'profile_start', 'email_submit', 'paywall_show', 'payment_done']

funnel_df = events_df[events_df['event_type'].isin(funnel_stages)]

user_funnel_counts = funnel_df.groupby('event_type')['user_id'].nunique().reindex(funnel_stages)

user_funnel_counts

event_type
onboarding_start    100000
profile_start        83051
email_submit         65227
paywall_show         67478
payment_done          5430
Name: user_id, dtype: int64

In [9]:
# Calculate the percentage of users who progress from the initial onboarding_start event
initial_users = user_funnel_counts['onboarding_start']
user_funnel_percentage = (user_funnel_counts / initial_users) * 100

user_funnel_percentage

event_type
onboarding_start    100.000
profile_start        83.051
email_submit         65.227
paywall_show         67.478
payment_done          5.430
Name: user_id, dtype: float64

In [10]:
# Calculate the percentage of users lost at each stage
user_funnel_dropoff = user_funnel_percentage.diff().fillna(user_funnel_percentage)

user_funnel_dropoff

event_type
onboarding_start    100.000
profile_start       -16.949
email_submit        -17.824
paywall_show          2.251
payment_done        -62.048
Name: user_id, dtype: float64

In [15]:
#Funnel of those whom was shown paywall without submitting to email
paywall_users = funnel_df[funnel_df['event_type'] == 'paywall_show']['user_id'].unique()

email_submit_users = funnel_df[funnel_df['event_type'] == 'email_submit']['user_id'].unique()

users_paywall_no_email = set(paywall_users) - set(email_submit_users)

funnel_no_email_df = funnel_df[funnel_df['user_id'].isin(users_paywall_no_email)]

funnel_no_email_sorted = funnel_no_email_df.sort_values(by=['user_id', 'event_time'])

funnel_no_email_sorted.head(30)

Unnamed: 0,user_id,event_type,event_time,event_params
317565,1,onboarding_start,2024-03-24T07:48:11,"{""funnel_type"": ""main""}"
317566,1,profile_start,2024-03-24T07:48:21,{}
317606,1,paywall_show,2024-03-24T07:58:24,{}
115356,26,onboarding_start,2024-01-30T16:03:55,"{""funnel_type"": ""main""}"
115357,26,profile_start,2024-01-30T16:04:05,{}
115386,26,paywall_show,2024-01-30T16:14:09,{}
152509,34,onboarding_start,2024-02-09T15:28:29,"{""funnel_type"": ""main""}"
152510,34,profile_start,2024-02-09T15:28:39,{}
152533,34,paywall_show,2024-02-09T15:38:42,{}
242662,35,onboarding_start,2024-03-04T21:36:49,"{""funnel_type"": ""main""}"


In [18]:
experiment_df = events_df[events_df['event_type'] == 'experiment_exposure'].copy()

experiment_df['experiment_name'] = experiment_df['event_params'].apply(lambda x: eval(x).get('experiment_name', ''))

In [20]:
experiment_counts = experiment_df.groupby('experiment_name')['user_id'].nunique().sort_values(ascending=False)

top_3_experiments = experiment_counts.head(3)
top_3_experiments

experiment_name
exp_2    5731
exp_8    4586
exp_1    4173
Name: user_id, dtype: int64