### Data Ingestion

In [2]:
import pandas as pd
from pandas import DataFrame, Series

In [3]:
users = pd.read_csv("https://s3.amazonaws.com/asana-data-interview/takehome_users-intern.csv")
user_engagement = pd.read_csv("https://s3.amazonaws.com/asana-data-interview/takehome_user_engagement-intern.csv")

In [4]:
def keep_repeat_users(df, visited=3):
	"""
	INPUT
	df: full dataframe of user activity
	visited = 3: minimum amount of times logged to be considered an adopted user

	OUTPUT
	new_df: dataframe containing all users who have logged on at least the 'visited' time, grouped by user_id
	"""

	new_df = df.groupby('user_id').filter(lambda x: len(x) >= visited)
	return new_df

def active_users(period, days_logged, user):
	"""
	INPUT
	period: time period we want to look at, default 7
	days_logged: is the number of days of the period we want the user to have logged in, default 3
	user is the unique users

	OUPUT
	active_user: returns whether or not the user had 3 consecutive logins within a 7 day period
	"""

	visited = len(user.index) #get the number of times the user logged in
	i, count = 0, 1
	active_user = False

	while count < days_logged:
		if (i+2) < visited: #needs to be at least 3 entries left	
			if (user['time_stamp2'].iloc[i + 1] - user['time_stamp2'].iloc[i]) <= pd.Timedelta(days=period) and (user['time_stamp2'].iloc[i + 1] - user['time_stamp2'].iloc[i]) > pd.Timedelta(days=1) :
				count += 1 #logged in twice within a 7 day period
				new_timeframe = (user['time_stamp2'].iloc[i + 1] - user['time_stamp2'].iloc[i])
				if (user['time_stamp2'].iloc[i + 2] - user['time_stamp2'].iloc[i + 1]) <= new_timeframe and (user['time_stamp2'].iloc[i + 2] - user['time_stamp2'].iloc[i + 1]) > pd.Timedelta(days=1):
					active_user = True #they logged in three times within a 7 period window
					count += 1
				else: 
					i += 1
					count = 1
			else:
				i += 1
				count = 1
		else:
			count = days_logged
	return active_user

def keep_active_users(df):
	"""
	OUTPUT
	unique_active_peeps: a dataframe of unique adopted users
	"""

	active_peeps = df.filter(lambda x: active_users(period=7, days_logged=3, user=x) ==True)

	unique_active_peeps = DataFrame(Series.unique(active_peeps['user_id']))
	unique_active_peeps.columns = ['user_id']

	return unique_active_peeps

### 1) Calculating Adoption Rate

In [5]:
users

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id,email_domain
0,1,4/22/14 3:53,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,1.398139e+09,1,0,11,10803.0,yahoo.com
1,2,11/15/13 3:45,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,1.396238e+09,0,0,1,316.0,gustr.com
2,3,3/19/13 23:14,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,1.363735e+09,0,0,94,1525.0,gustr.com
3,4,5/21/13 8:09,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,1.369210e+09,0,0,1,5151.0,yahoo.com
4,5,1/17/13 10:14,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,1.358850e+09,0,0,193,5240.0,yahoo.com
...,...,...,...,...,...,...,...,...,...,...,...
11995,11996,9/6/13 6:14,Meier Sophia,SophiaMeier@gustr.com,ORG_INVITE,1.378448e+09,0,0,89,8263.0,gustr.com
11996,11997,1/10/13 18:28,Fisher Amelie,AmelieFisher@gmail.com,SIGNUP_GOOGLE_AUTH,1.358275e+09,0,0,200,,gmail.com
11997,11998,4/27/14 12:45,Haynes Jake,JakeHaynes@cuvox.de,GUEST_INVITE,1.398603e+09,1,1,83,8074.0,cuvox.de
11998,11999,5/31/12 11:55,Faber Annett,mhaerzxp@iuxiw.com,PERSONAL_PROJECTS,1.338638e+09,0,0,6,,iuxiw.com


In [6]:
activity=user_engagement
user_engagement

Unnamed: 0,time_stamp,user_id,visited
0,2014-04-22 03:53:30,1,1
1,2013-11-15 03:45:04,2,1
2,2013-11-29 03:45:04,2,1
3,2013-12-09 03:45:04,2,1
4,2013-12-25 03:45:04,2,1
...,...,...,...
207912,2013-09-06 06:14:15,11996,1
207913,2013-01-15 18:28:37,11997,1
207914,2014-04-27 12:45:16,11998,1
207915,2012-06-02 11:55:59,11999,1


In [7]:
activity['time_stamp2'] = pd.to_datetime(user_engagement['time_stamp'])

In [8]:
repeated_users_df = keep_repeat_users(activity)

grouped_users_df = repeated_users_df.groupby('user_id')
unique_active_peeps_df = keep_active_users(grouped_users_df)
unique_active_peeps_df['adopted_user'] = 1

adopted_user_info = pd.merge(unique_active_peeps_df, users, how='outer',
                  left_on='user_id', right_on='object_id')
adopted_user_info['adopted_user'] = adopted_user_info['adopted_user'].fillna(0)
len(adopted_user_info)



12000

In [1]:
adopted_user_info.describe()

NameError: name 'adopted_user_info' is not defined

### 2) Methodology

##### 2a) Writeup associated with methodology

### 3) What Factors Predict User Adoption?

##### 3a) Writeup associated with what factors predict user adoption?

### 4) Additional Commentary (Optional)