### Data Ingestion

In [1]:
import pandas as pd
from pandas import DataFrame, Series

In [103]:
users = pd.read_csv("https://s3.amazonaws.com/asana-data-interview/takehome_users-intern.csv")
user_engagement = pd.read_csv("https://s3.amazonaws.com/asana-data-interview/takehome_user_engagement-intern.csv")

### 1) Calculating Adoption Rate

In [104]:
# sort user_engagemenet and drop rows with similar user_id and date
user_engagement = user_engagement.sort_values(by=['user_id', 'time_stamp'])
user_engagement['time_stamp'] = pd.to_datetime(user_engagement['time_stamp'])
user_engagement['date'] = user_engagement['time_stamp'].dt.date
user_engagement = user_engagement[["date","user_id"]].drop_duplicates()

In [105]:
def is_adopted_user(df, period=7, freq=3):
	"""
	INPUT
	df: dataframe of user activity
	period: time period we want to look at, default 7
	freq: is the number of days of the period we want the user to have logged in, default 3

	OUTPUT
	adopted_user: returns whether or not the user had 3 consecutive logins within a 7 day period
	"""
	adopted_user = False
	if len(df) < freq:
		return adopted_user
	else:
		for i in range(len(df)-freq+1):
			if (df['date'].iloc[i+freq-1] - df['date'].iloc[i]) < pd.Timedelta(days=period):
				adopted_user = True
				return adopted_user
	return adopted_user

In [106]:
# find adopted users
grouped_users = user_engagement.groupby('user_id')
adoted_users = pd.DataFrame(grouped_users.filter(lambda x : is_adopted_user(df=x, period=7, freq=3) ==True)["user_id"].drop_duplicates())
adoted_users['adopted_user'] = 1

# add column adopted_user to users
users = pd.merge(adoted_users, users, how='outer', left_on='user_id', right_on='object_id')
users['adopted_user'] = users['adopted_user'].fillna(0)

In [107]:
users['adopted_user'].agg('mean')

0.1335

The adoption rate is 13.35%

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
num_adopted_users = len(adopted_user_info[adopted_user_info['adopted_user'] == 1])
print(num_adopted_users)
total_users = len(adopted_user_info)
adoption_rate = num_adopted_users / total_users

# define features and target variable
X = adopted_user_info[['creation_source', 'opted_in_to_mailing_list', 'enabled_for_marketing_drip', 'org_id', 'invited_by_user_id']]
y = adopted_user_info['adopted_user']

# split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# train random forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# get feature importances
importances = rf.feature_importances_

# print feature importances
for feature, importance in zip(X.columns, importances):
    print(f"{feature}: {importance}")


1615
0.13458333333333333


### 2) Methodology

##### 2a) Writeup associated with methodology

### 3) What Factors Predict User Adoption?

##### 3a) Writeup associated with what factors predict user adoption?

### 4) Additional Commentary (Optional)