## Relax Inc. Take Home Challenge

**Objective**: Identify the factors that predict future user adoption.

An adopted user is  a user who has logged into the product on three separate days in at least one seven day period.


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from scipy.stats import ks_2samp, kstest



In [213]:

users = pd.read_csv("takehome_users.csv", encoding='latin-1')
users.head()

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,1398139000.0,1,0,11,10803.0
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,1396238000.0,0,0,1,316.0
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,1363735000.0,0,0,94,1525.0
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,1369210000.0,0,0,1,5151.0
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,1358850000.0,0,0,193,5240.0


In [214]:
users.isna().sum()

object_id                        0
creation_time                    0
name                             0
email                            0
creation_source                  0
last_session_creation_time    3177
opted_in_to_mailing_list         0
enabled_for_marketing_drip       0
org_id                           0
invited_by_user_id            5583
dtype: int64

In [215]:
users.creation_time.min(), users.creation_time.max()

('2012-05-31 00:43:27', '2014-05-30 23:59:19')

In [216]:

engagement = pd.read_csv("takehome_user_engagement.csv")
engagement.head()


Unnamed: 0,time_stamp,user_id,visited
0,2014-04-22 03:53:30,1,1
1,2013-11-15 03:45:04,2,1
2,2013-11-29 03:45:04,2,1
3,2013-12-09 03:45:04,2,1
4,2013-12-25 03:45:04,2,1


In [217]:
engagement.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207917 entries, 0 to 207916
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   time_stamp  207917 non-null  object
 1   user_id     207917 non-null  int64 
 2   visited     207917 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 4.8+ MB


In [218]:
engagement['time_stamp'] = pd.to_datetime(engagement['time_stamp'])
engagement = engagement.sort_values(by='time_stamp', ascending=True).reset_index()
engagement = engagement.drop(columns=['index'])
engagement.set_index("time_stamp", inplace=True)
engagement.head()


Unnamed: 0_level_0,user_id,visited
time_stamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-05-31 08:20:06,10012,1
2012-05-31 15:47:36,3428,1
2012-05-31 17:19:37,9899,1
2012-05-31 21:58:33,1693,1
2012-06-01 00:17:30,6102,1


In [219]:
user_ids = list(engagement.user_id.unique())
user_ids.sort()

logins = pd.DataFrame()

for id in user_ids:
    temp = engagement[engagement.user_id == id]
    temp['user_rolling_7day_logins#'] = temp.visited.rolling("7D", min_periods=1).sum()
    logins = pd.concat([logins, temp])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['user_rolling_7day_logins#'] = temp.visited.rolling("7D", min_periods=1).sum()


In [220]:
logins.head(10)

Unnamed: 0_level_0,user_id,visited,user_rolling_7day_logins#
time_stamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-04-22 03:53:30,1,1,1.0
2013-11-15 03:45:04,2,1,1.0
2013-11-29 03:45:04,2,1,1.0
2013-12-09 03:45:04,2,1,1.0
2013-12-25 03:45:04,2,1,1.0
2013-12-31 03:45:04,2,1,2.0
2014-01-08 03:45:04,2,1,1.0
2014-02-03 03:45:04,2,1,1.0
2014-02-08 03:45:04,2,1,2.0
2014-02-09 03:45:04,2,1,3.0


In [221]:
adopted_users = logins[logins["user_rolling_7day_logins#"] >= 3].user_id.unique()
users['is_adopted_user'] = users['object_id'].isin(adopted_users)
users.head()


Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id,is_adopted_user
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,1398139000.0,1,0,11,10803.0,False
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,1396238000.0,0,0,1,316.0,True
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,1363735000.0,0,0,94,1525.0,False
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,1369210000.0,0,0,1,5151.0,False
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,1358850000.0,0,0,193,5240.0,False


In [222]:
users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12000 entries, 0 to 11999
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   object_id                   12000 non-null  int64  
 1   creation_time               12000 non-null  object 
 2   name                        12000 non-null  object 
 3   email                       12000 non-null  object 
 4   creation_source             12000 non-null  object 
 5   last_session_creation_time  8823 non-null   float64
 6   opted_in_to_mailing_list    12000 non-null  int64  
 7   enabled_for_marketing_drip  12000 non-null  int64  
 8   org_id                      12000 non-null  int64  
 9   invited_by_user_id          6417 non-null   float64
 10  is_adopted_user             12000 non-null  bool   
dtypes: bool(1), float64(2), int64(4), object(4)
memory usage: 949.3+ KB


In [223]:
users['creation_time'] = pd.to_datetime(users['creation_time'])
users['last_session_creation_time'] = pd.to_datetime(users['last_session_creation_time'], unit='s')
users['invited_by_user_id'] = users['invited_by_user_id'].astype('Int64')
users['email_domain'] = users.email.map(lambda x: x.split('@')[1])
users['invited_by_user_id'].fillna(-1, inplace=True)
users.head()

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id,is_adopted_user,email_domain
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,2014-04-22 03:53:30,1,0,11,10803,False,yahoo.com
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,2014-03-31 03:45:04,0,0,1,316,True,gustr.com
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,2013-03-19 23:14:52,0,0,94,1525,False,gustr.com
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,2013-05-22 08:09:28,0,0,1,5151,False,yahoo.com
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,2013-01-22 10:14:20,0,0,193,5240,False,yahoo.com


In [224]:
print(f"There are {users['is_adopted_user'].sum()} adopted users")
print(f"The overall user adoption rate is {round(users['is_adopted_user'].mean()*100, 2)}%")

There are 1602 adopted users
The overall user adoption rate is 13.35%


In [225]:
users.head(10)

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id,is_adopted_user,email_domain
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,2014-04-22 03:53:30,1,0,11,10803,False,yahoo.com
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,2014-03-31 03:45:04,0,0,1,316,True,gustr.com
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,2013-03-19 23:14:52,0,0,94,1525,False,gustr.com
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,2013-05-22 08:09:28,0,0,1,5151,False,yahoo.com
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,2013-01-22 10:14:20,0,0,193,5240,False,yahoo.com
5,6,2013-12-17 03:37:06,Cunha Eduardo,EduardoPereiraCunha@yahoo.com,GUEST_INVITE,2013-12-19 03:37:06,0,0,197,11241,False,yahoo.com
6,7,2012-12-16 13:24:32,Sewell Tyler,TylerSewell@jourrapide.com,SIGNUP,2012-12-20 13:24:32,0,1,37,-1,False,jourrapide.com
7,8,2013-07-31 05:34:02,Hamilton Danielle,DanielleHamilton@yahoo.com,PERSONAL_PROJECTS,NaT,1,1,74,-1,False,yahoo.com
8,9,2013-11-05 04:04:24,Amsel Paul,PaulAmsel@hotmail.com,PERSONAL_PROJECTS,NaT,0,0,302,-1,False,hotmail.com
9,10,2013-01-16 22:08:03,Santos Carla,CarlaFerreiraSantos@gustr.com,ORG_INVITE,2014-06-03 22:08:03,1,1,318,4143,True,gustr.com


In [226]:
adopted_by_source = pd.DataFrame(pd.crosstab(users['creation_source'], users['is_adopted_user']))
adopted_by_source['Total'] = adopted_by_source.sum(axis=1)
adopted_by_source['percent_adopted'] = np.round(adopted_by_source[True] * 100.0/ adopted_by_source['Total'],2)
adopted_by_source

is_adopted_user,False,True,Total,percent_adopted
creation_source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GUEST_INVITE,1803,360,2163,16.64
ORG_INVITE,3701,553,4254,13.0
PERSONAL_PROJECTS,1947,164,2111,7.77
SIGNUP,1794,293,2087,14.04
SIGNUP_GOOGLE_AUTH,1153,232,1385,16.75


In [147]:
adopted_by_org_id = pd.DataFrame(pd.crosstab(users['org_id'], users['is_adopted_user']))
adopted_by_org_id['Total'] = adopted_by_org_id.sum(axis=1)
adopted_by_org_id['percent_adopted'] = np.round(adopted_by_org_id[True] * 100.0/ adopted_by_org_id['Total'],2)
adopted_by_org_id

is_adopted_user,False,True,Total,percent_adopted
org_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,308,11,319,3.45
1,219,14,233,6.01
2,186,15,201,7.46
3,154,14,168,8.33
4,143,16,159,10.06
...,...,...,...,...
412,17,0,17,0.00
413,13,3,16,18.75
414,18,2,20,10.00
415,10,6,16,37.50


In [156]:
adopted_by_org_id = adopted_by_org_id.sort_values(by='percent_adopted', ascending=False)
adopted_by_org_id


is_adopted_user,False,True,Total,percent_adopted
org_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
387,5,7,12,58.33
235,7,6,13,46.15
270,8,6,14,42.86
399,8,5,13,38.46
392,10,6,16,37.50
...,...,...,...,...
329,13,0,13,0.00
310,15,0,15,0.00
307,19,0,19,0.00
412,17,0,17,0.00


In [146]:
adopted_by_mlist_optin = pd.DataFrame(pd.crosstab(users['opted_in_to_mailing_list'], users['is_adopted_user']))
adopted_by_mlist_optin['Total'] = adopted_by_mlist_optin.sum(axis=1)
adopted_by_mlist_optin['percent_adopted'] = np.round(adopted_by_mlist_optin[True] * 100.0/ adopted_by_mlist_optin['Total'],2)
adopted_by_mlist_optin

is_adopted_user,False,True,Total,percent_adopted
opted_in_to_mailing_list,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,7818,1188,9006,13.19
1,2580,414,2994,13.83


In [151]:
adopted_by_mktg_drip = pd.DataFrame(pd.crosstab(users['enabled_for_marketing_drip'], users['is_adopted_user']))
adopted_by_mktg_drip['Total'] = adopted_by_mktg_drip.sum(axis=1)
adopted_by_mktg_drip['percent_adopted'] = np.round(adopted_by_mktg_drip[True] * 100.0/ adopted_by_mktg_drip['Total'],2)
adopted_by_mktg_drip

is_adopted_user,False,True,Total,percent_adopted
enabled_for_marketing_drip,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,8852,1356,10208,13.28
1,1546,246,1792,13.73


In [152]:
adopted_by_who_invited = pd.DataFrame(pd.crosstab(users['invited_by_user_id'], users['is_adopted_user']))
adopted_by_who_invited['Total'] = adopted_by_who_invited.sum(axis=1)
adopted_by_who_invited['percent_adopted'] = np.round(adopted_by_who_invited[True] * 100.0/ adopted_by_who_invited['Total'],2)
adopted_by_who_invited

is_adopted_user,False,True,Total,percent_adopted
invited_by_user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1,4894,689,5583,12.34
3,1,0,1,0.00
7,5,0,5,0.00
10,1,0,1,0.00
21,1,0,1,0.00
...,...,...,...,...
11981,1,0,1,0.00
11986,1,0,1,0.00
11994,5,2,7,28.57
11997,1,0,1,0.00


In [209]:
adopted_by_who_invited = adopted_by_who_invited.sort_values(by=True, ascending=False)
adopted_by_who_invited

is_adopted_user,False,True,Total,percent_adopted
invited_by_user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1,4894,689,5583,12.34
10628,6,4,10,40.00
2354,6,4,10,40.00
2776,1,3,4,75.00
11267,3,3,6,50.00
...,...,...,...,...
4593,1,0,1,0.00
4584,8,0,8,0.00
4574,2,0,2,0.00
4643,1,0,1,0.00


In [227]:
user_ids = list(engagement.user_id.unique())
user_ids.sort()
empty = np.full(shape=len(user_ids), fill_value=np.nan).tolist()

initial_logins = pd.DataFrame({"user_id": user_ids, 
                    'logins_first_7days': empty, 
                    'logins_first_14days': empty,
                    'logins_first_30days': empty,
                    'logins_first_60days': empty
                   })
initial_logins = new.set_index("user_id")

for id in user_ids:
    temp = engagement[engagement.user_id == id]
    weekly = temp.visited.resample("7D").sum()
    initial_logins.loc[id,'logins_first_7days'] = weekly[0]
    if len(weekly) >=2 :
        initial_logins.loc[id,'logins_first_14days'] = weekly[0] + weekly[1]
    else:
        initial_logins.loc[id,'logins_first_14days'] = weekly[0]
    monthly = temp.visited.resample("30D").sum()
    initial_logins.loc[id,'logins_first_30days'] = monthly[0]
    if len(monthly) >=2 :
        initial_logins.loc[id,'logins_first_60days'] = monthly[0] + monthly[1]
    else:
        initial_logins.loc[id,'logins_first_60days'] = monthly[0]

initial_logins = initial_logins.reset_index()
initial_logins.head()

Unnamed: 0,user_id,logins_first_7days,logins_first_14days,logins_first_30days,logins_first_60days
0,1,1.0,1.0,1.0,1.0
1,2,1.0,1.0,3.0,6.0
2,3,1.0,1.0,1.0,1.0
3,4,1.0,1.0,1.0,1.0
4,5,1.0,1.0,1.0,1.0


In [228]:
users = users.merge(new, left_on='object_id', right_on='user_id',how='left')
users.head()

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id,is_adopted_user,email_domain,user_id,logins_first_7days,logins_first_14days,logins_first_30days,logins_first_60days
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,2014-04-22 03:53:30,1,0,11,10803,False,yahoo.com,1.0,1.0,1.0,1.0,1.0
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,2014-03-31 03:45:04,0,0,1,316,True,gustr.com,2.0,1.0,1.0,3.0,6.0
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,2013-03-19 23:14:52,0,0,94,1525,False,gustr.com,3.0,1.0,1.0,1.0,1.0
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,2013-05-22 08:09:28,0,0,1,5151,False,yahoo.com,4.0,1.0,1.0,1.0,1.0
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,2013-01-22 10:14:20,0,0,193,5240,False,yahoo.com,5.0,1.0,1.0,1.0,1.0


In [236]:
users['logins_first_7days'].fillna(0, inplace=True)
users['logins_first_14days'].fillna(0, inplace=True)
users['logins_first_30days'].fillna(0, inplace=True)
users['logins_first_60days'].fillna(0, inplace=True)
users.head(10)

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id,is_adopted_user,email_domain,user_id,logins_first_7days,logins_first_14days,logins_first_30days,logins_first_60days,login_first_30days
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,2014-04-22 03:53:30,1,0,11,10803,False,yahoo.com,1.0,1.0,1.0,1.0,1.0,1 or less
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,2014-03-31 03:45:04,0,0,1,316,True,gustr.com,2.0,1.0,1.0,3.0,6.0,2 or more
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,2013-03-19 23:14:52,0,0,94,1525,False,gustr.com,3.0,1.0,1.0,1.0,1.0,1 or less
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,2013-05-22 08:09:28,0,0,1,5151,False,yahoo.com,4.0,1.0,1.0,1.0,1.0,1 or less
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,2013-01-22 10:14:20,0,0,193,5240,False,yahoo.com,5.0,1.0,1.0,1.0,1.0,1 or less
5,6,2013-12-17 03:37:06,Cunha Eduardo,EduardoPereiraCunha@yahoo.com,GUEST_INVITE,2013-12-19 03:37:06,0,0,197,11241,False,yahoo.com,6.0,1.0,1.0,1.0,1.0,1 or less
6,7,2012-12-16 13:24:32,Sewell Tyler,TylerSewell@jourrapide.com,SIGNUP,2012-12-20 13:24:32,0,1,37,-1,False,jourrapide.com,7.0,1.0,1.0,1.0,1.0,1 or less
7,8,2013-07-31 05:34:02,Hamilton Danielle,DanielleHamilton@yahoo.com,PERSONAL_PROJECTS,NaT,1,1,74,-1,False,yahoo.com,,0.0,0.0,0.0,0.0,2 or more
8,9,2013-11-05 04:04:24,Amsel Paul,PaulAmsel@hotmail.com,PERSONAL_PROJECTS,NaT,0,0,302,-1,False,hotmail.com,,0.0,0.0,0.0,0.0,2 or more
9,10,2013-01-16 22:08:03,Santos Carla,CarlaFerreiraSantos@gustr.com,ORG_INVITE,2014-06-03 22:08:03,1,1,318,4143,True,gustr.com,10.0,2.0,2.0,6.0,14.0,2 or more


In [237]:
adopted_by_logins = pd.DataFrame(pd.crosstab(users['logins_first_7days'], users['is_adopted_user']))
adopted_by_logins['Total'] = adopted_by_logins.sum(axis=1)
adopted_by_logins['percent_adopted'] = np.round(adopted_by_logins[True] * 100.0/ adopted_by_logins['Total'],2)
adopted_by_logins

is_adopted_user,False,True,Total,percent_adopted
logins_first_7days,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,3177,0,3177,0.0
1.0,6920,1011,7931,12.75
2.0,301,401,702,57.12
3.0,0,165,165,100.0
4.0,0,21,21,100.0
5.0,0,2,2,100.0
6.0,0,1,1,100.0
7.0,0,1,1,100.0


In [238]:
adopted_by_logins = pd.DataFrame(pd.crosstab(users['logins_first_14days'], users['is_adopted_user']))
adopted_by_logins['Total'] = adopted_by_logins.sum(axis=1)
adopted_by_logins['percent_adopted'] = np.round(adopted_by_logins[True] * 100.0/ adopted_by_logins['Total'],2)
adopted_by_logins

is_adopted_user,False,True,Total,percent_adopted
logins_first_14days,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,3177,0,3177,0.0
1.0,6656,652,7308,8.92
2.0,429,468,897,52.17
3.0,120,273,393,69.47
4.0,16,131,147,89.12
5.0,0,48,48,100.0
6.0,0,21,21,100.0
7.0,0,5,5,100.0
8.0,0,2,2,100.0
9.0,0,1,1,100.0


In [239]:
users['login_first_30days'] = users['logins_first_30days'].map(lambda x: "1 or less" if x <=1 else "2 or more")
users.head(10)

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id,is_adopted_user,email_domain,user_id,logins_first_7days,logins_first_14days,logins_first_30days,logins_first_60days,login_first_30days
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,2014-04-22 03:53:30,1,0,11,10803,False,yahoo.com,1.0,1.0,1.0,1.0,1.0,1 or less
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,2014-03-31 03:45:04,0,0,1,316,True,gustr.com,2.0,1.0,1.0,3.0,6.0,2 or more
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,2013-03-19 23:14:52,0,0,94,1525,False,gustr.com,3.0,1.0,1.0,1.0,1.0,1 or less
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,2013-05-22 08:09:28,0,0,1,5151,False,yahoo.com,4.0,1.0,1.0,1.0,1.0,1 or less
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,2013-01-22 10:14:20,0,0,193,5240,False,yahoo.com,5.0,1.0,1.0,1.0,1.0,1 or less
5,6,2013-12-17 03:37:06,Cunha Eduardo,EduardoPereiraCunha@yahoo.com,GUEST_INVITE,2013-12-19 03:37:06,0,0,197,11241,False,yahoo.com,6.0,1.0,1.0,1.0,1.0,1 or less
6,7,2012-12-16 13:24:32,Sewell Tyler,TylerSewell@jourrapide.com,SIGNUP,2012-12-20 13:24:32,0,1,37,-1,False,jourrapide.com,7.0,1.0,1.0,1.0,1.0,1 or less
7,8,2013-07-31 05:34:02,Hamilton Danielle,DanielleHamilton@yahoo.com,PERSONAL_PROJECTS,NaT,1,1,74,-1,False,yahoo.com,,0.0,0.0,0.0,0.0,1 or less
8,9,2013-11-05 04:04:24,Amsel Paul,PaulAmsel@hotmail.com,PERSONAL_PROJECTS,NaT,0,0,302,-1,False,hotmail.com,,0.0,0.0,0.0,0.0,1 or less
9,10,2013-01-16 22:08:03,Santos Carla,CarlaFerreiraSantos@gustr.com,ORG_INVITE,2014-06-03 22:08:03,1,1,318,4143,True,gustr.com,10.0,2.0,2.0,6.0,14.0,2 or more


In [240]:
adopted_by_logins = pd.DataFrame(pd.crosstab(users['login_first_30days'], users['is_adopted_user']))
adopted_by_logins['Total'] = adopted_by_logins.sum(axis=1)
adopted_by_logins['percent_adopted'] = np.round(adopted_by_logins[True] * 100.0/ adopted_by_logins['Total'],2)
adopted_by_logins

is_adopted_user,False,True,Total,percent_adopted
login_first_30days,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1 or less,9507,237,9744,2.43
2 or more,891,1365,2256,60.51
