In [225]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from lib import load_data
%matplotlib inline

In [226]:
users_df, costs_df, relations_df, visits_df, orders_df = load_data()

### Задание №2:
### Рассчитать для показателей Revenue (выручка) и Retention (процент вернувшихся пользователей на сайт) целевые lifetimes в разрезе маркетинговых источников.

In [227]:
print('Users registrations slice: {} -- {}'.format(
    users_df['Reg_date'].max().strftime('%Y-%m-%d'), 
    users_df['Reg_date'].min().strftime('%Y-%m-%d')))

Users registrations slice: 2017-12-31 -- 2017-06-01


#### Limit lifetimes to 30 days

In [228]:
lifetime_days_limit = 30
mask = users_df['Reg_date'] < (users_df['Reg_date'] - np.timedelta64(lifetime_days_limit, 'D'))
users_df.drop(users_df.index[mask], inplace=True)

#### Join the tables

In [229]:
users_df = users_df.merge(relations_df, how='inner', on=['name', 'id_partner'])

In [230]:
visits_df = visits_df.merge(users_df[['Reg_date','chanel','id']], 
                            how='right', left_on='id_user', right_on='id')

#### Find how many days passed till visit since registration

In [231]:
visits_df['Visit_date'] = visits_df['Visit_date'].dt.normalize()

In [232]:
no_visits_mask = visits_df['Visit_date'].isnull()
visits_df.loc[no_visits_mask, 'Visit_date'] = visits_df.loc[no_visits_mask, 'Reg_date']

In [233]:
visits_df['day'] = visits_df['Visit_date'] - visits_df['Reg_date']

#### Drop some strange visits before registration date

In [234]:
mask = (visits_df['day'] < np.timedelta64(0, 'D'))
visits_df.drop(visits_df.index[mask], inplace=True)

In [235]:
grouped = visits_df.groupby(['chanel', 'day'])

lifetimes = grouped.agg({'id': pd.Series.nunique})
lifetimes.rename(columns={'id': 'num_users'}, inplace=True)


In [236]:
chanel_user_num = lifetimes['num_users'].groupby(level=0).first()

#### Drop chanels with less than 10 registered users

In [243]:
chanel_user_drop = chanel_user_num.index[chanel_user_num < 10]
chanel_user_num.drop(chanel_user_drop, inplace=True)
lifetimes.drop(chanel_user_drop, inplace=True)


In [253]:
lifetimes.drop('305',inplace=True)
lifetimes.size

Unnamed: 0_level_0,Unnamed: 1_level_0,num_users
chanel,day,Unnamed: 2_level_1
10,0 days,9981
10,1 days,1847
10,2 days,1097
10,3 days,919
10,4 days,813
10,5 days,723
10,6 days,659
10,7 days,610
10,8 days,541
10,9 days,493


In [216]:
user_retention = 100 * lifetimes['num_users'].unstack(0).divide(cohort_group_size, axis=1)
user_retention = user_retention[:lifetime_days_limit+1]

In [217]:
users_df[users_df['chanel']=='305']

Unnamed: 0,id,Reg_date,id_partner,name,chanel
2494234,7886557,2017-12-28,305,VB_GDN_Key_Meet,305


In [219]:
user_retention

chanel,10,13,14,15,29,305,309,32,34,37,...,Youmi,Youmi_Mobile,Zero,cpamatica_adult,cpamatica_mobile,cpamatica_mobile_adult,iDrive,spheredigital,vh_mgid,wefef
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0 days,100.0,100.0,,100.0,100.0,,,100.0,100.0,100.0,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,,
1 days,18.50516,30.0,,20.051414,17.863493,,,8.76494,5.405405,20.823082,...,7.8125,5.263158,12.195122,5.109489,22.525381,14.705882,24.858769,20.655412,,
2 days,10.990883,16.666667,,14.395887,10.285871,,,6.374502,3.603604,14.292105,...,3.125,1.547988,7.317073,1.751825,14.022843,8.28877,17.173151,13.406157,,
3 days,9.207494,15.0,,14.138817,8.28888,,,3.984064,2.702703,11.451577,...,1.5625,1.857585,9.756098,1.313869,11.548223,7.352941,13.989536,11.817279,,
4 days,8.145476,20.0,,11.053985,6.606483,,,4.780876,2.702703,9.617535,...,1.5625,0.619195,9.146341,0.875912,9.64467,6.149733,11.864112,9.731877,,
5 days,7.243763,13.333333,,11.311054,6.264533,,,1.992032,5.405405,8.834713,...,1.5625,0.619195,8.536585,0.948905,8.56599,5.347594,10.767657,8.540218,,
6 days,6.602545,8.333333,,8.997429,5.307072,,,2.390438,4.504505,8.365019,...,,1.23839,10.365854,0.80292,8.56599,3.609626,9.888267,8.142999,,
7 days,6.111612,11.666667,,8.226221,4.978799,,,3.585657,2.702703,7.940058,...,1.5625,0.619195,7.317073,0.656934,7.614213,3.475936,9.535537,8.540218,,
8 days,5.420299,10.0,,8.226221,4.48639,,,0.796813,2.702703,6.553344,...,,0.928793,6.097561,0.875912,6.598985,3.877005,8.535092,7.944389,,
9 days,4.939385,6.666667,,6.940874,4.322254,,,1.195219,2.702703,6.754641,...,,0.928793,4.878049,0.729927,5.837563,3.475936,7.945817,7.646475,,
