# Virality Dashboard

**NOTE: The output of some cells is not displayed to protect sensitive information**

## Conexión y librerías

In [1]:
import sys
sys.path.append('../')
import conexion_database

Laptop Tandamos
Spartan PC


In [2]:
import pandas as pd
pd.options.display.max_columns = None
import numpy as np

# Para guardar los archivos con la fecha de hoy
from datetime import datetime, date, timedelta

# Limpieza de query
from clean_query import clean_query

# La función de weeknum es equivalente a la de WEEKNUM en Excel, modo 1, en la que se basa Alejo
from calweek import weeknum

In [3]:
# Conexión con Google Sheets
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import json

scopes = [
'https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive'
]
credentials = ServiceAccountCredentials.from_json_keyfile_name("../gsheets_key.json", scopes) #access the json key you downloaded earlier 
client = gspread.authorize(credentials) # authenticate the JSON key with gspread

## Carga de data

**Virality Velocity Multiplier Breakdown**
* Fecha de afiliación: el campo más grande entre `app_user_onboarding_approved_date` y `terms_conditions_accepted_date` TAUO.
* Cohorte por fecha de afiliación: cálculo manual con base en fecha de afiliación
* Clic en pestaña de Friends: `screen` es "/INVITE" en TAUA
* Clic en botón de Send Invite: `screen` es "INVITE" o "INVITAR AMIGOS" en TAUA
* Número de invites por usuario afiliado: fecha de registro de los invitados con su `signup_cohort_date` TAU
* Affiliation rate per Invite (%): checar la afiliación de los usuarios que llegaron por invitación 


### Data affiliated users

In [4]:
# Redactar SQL query
query = """
SELECT tau.id, tau.name, app_user_onboarding_approved_date AS onboarding_approved_date, terms_conditions_accepted_date AS terms_accepted_date
FROM tdm_app_user tau 
	JOIN tdm_app_user_onboarding tauo ON tau.id = tauo.app_user_id
WHERE tau.id NOT IN (7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 41, 90, 188, 369, 636, 929, 1574, 1788, 2394, 2440, 2432, 2442, 2443, 2444, 2445, 2447, 2446, 2450, 2451, 2452,  2454, 2455, 2477, 3227, 4773, 4776, 4816, 7572, 12912, 13697, 19651)
	AND app_user_onboarding_approved_date IS NOT NULL
	AND terms_conditions_accepted_date IS NOT NULL
;
"""
query = clean_query(query)
print(query)

SELECT tau.id, tau.name, app_user_onboarding_approved_date AS onboarding_approved_date, terms_conditions_accepted_date AS terms_accepted_date FROM tdm_app_user tau JOIN tdm_app_user_onboarding tauo ON tau.id = tauo.app_user_id WHERE tau.id NOT IN (7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 41, 90, 188, 369, 636, 929, 1574, 1788, 2394, 2440, 2432, 2442, 2443, 2444, 2445, 2447, 2446, 2450, 2451, 2452, 2454, 2455, 2477, 3227, 4773, 4776, 4816, 7572, 12912, 13697, 19651) AND app_user_onboarding_approved_date IS NOT NULL AND terms_conditions_accepted_date IS NOT NULL ; 


In [None]:
# Cargar query a un DataFrame
df_affiliated_users = pd.read_sql_query(query, conexion_database.engine)
print(df_affiliated_users.shape)
df_affiliated_users.head(4)

In [6]:
# Calcular columna con fecha de afiliación
df_affiliated_users['fecha_afiliacion'] = df_affiliated_users[['onboarding_approved_date', 'terms_accepted_date']].max(axis=1, skipna=False)

# Sacar año y semana de afiliación (para las gráficas por cohortes)
df_affiliated_users['affiliation_year'] = df_affiliated_users.fecha_afiliacion.dt.year
df_affiliated_users['affiliation_week'] = df_affiliated_users.fecha_afiliacion.apply(weeknum).astype('int')

### Data clic en tab de amigos y en invitar

In [7]:
# Redactar SQL query
query = """
SELECT tau.id, tau.name, app_user_onboarding_approved_date AS onboarding_approved_date, terms_conditions_accepted_date AS terms_accepted_date, screen, taua.insert_date AS date_open_screen, tlv.name
FROM tdm_app_user tau 
	JOIN tdm_app_user_onboarding tauo ON tau.id = tauo.app_user_id
	JOIN tdm_app_user_action AS taua ON tau.id = taua.app_user_id
	JOIN tdm_list_value AS tlv ON tlv.id = taua.app_user_action_id
WHERE tau.id NOT IN (7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 41, 90, 188, 369, 636, 929, 1574, 1788, 2394, 2440, 2432, 2442, 2443, 2444, 2445, 2447, 2446, 2450, 2451, 2452,  2454, 2455, 2477, 3227, 4773, 4776, 4816, 7572, 12912, 13697, 19651)
	AND taua.screen IN ("/INVITE", "INVITAR", "INVITAR AMIGOS")
    AND app_user_onboarding_approved_date IS NOT NULL
	AND terms_conditions_accepted_date IS NOT NULL
ORDER BY id, taua.insert_date ASC
;
"""
query = clean_query(query)
print(query)

SELECT tau.id, tau.name, app_user_onboarding_approved_date AS onboarding_approved_date, terms_conditions_accepted_date AS terms_accepted_date, screen, taua.insert_date AS date_open_screen, tlv.name FROM tdm_app_user tau JOIN tdm_app_user_onboarding tauo ON tau.id = tauo.app_user_id JOIN tdm_app_user_action AS taua ON tau.id = taua.app_user_id JOIN tdm_list_value AS tlv ON tlv.id = taua.app_user_action_id WHERE tau.id NOT IN (7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 41, 90, 188, 369, 636, 929, 1574, 1788, 2394, 2440, 2432, 2442, 2443, 2444, 2445, 2447, 2446, 2450, 2451, 2452, 2454, 2455, 2477, 3227, 4773, 4776, 4816, 7572, 12912, 13697, 19651) AND taua.screen IN ("/INVITE", "INVITAR", "INVITAR AMIGOS")   AND app_user_onboarding_approved_date IS NOT NULL AND terms_conditions_accepted_date IS NOT NULL ORDER BY id, taua.insert_date ASC ; 


In [None]:
# Cargar query a un DataFrame
df_clics = pd.read_sql_query(query, conexion_database.engine)
print(df_clics.shape)
df_clics.head(4)

In [9]:
# Calcular columna con fecha de afiliación
df_clics['fecha_afiliacion'] = df_clics[['onboarding_approved_date', 'terms_accepted_date']].max(axis=1, skipna=False)

# Sacar año y semana de afiliación (para las gráficas por cohortes)
df_clics['affiliation_year'] = df_clics.fecha_afiliacion.dt.year
df_clics['affiliation_week'] = df_clics.fecha_afiliacion.apply(weeknum).astype('int')

### Data referidos

In [10]:
query = """
SELECT codes.id_referidor, codes.referidor, codes.onboarding_approved_date_referidor, codes.terms_accepted_date_referidor, tau.id AS id_referido, tau.name AS referido, tau.invitation_code_used, signup_cohort_date, app_user_onboarding_approved_date AS onboarding_approved_date, terms_conditions_accepted_date AS terms_accepted_date 
FROM tdm_app_user AS tau
	JOIN
        (
        SELECT tau.id AS id_referidor, name AS referidor, invitation_code AS invitation_code_referidor, app_user_onboarding_approved_date AS onboarding_approved_date_referidor, terms_conditions_accepted_date AS terms_accepted_date_referidor
        FROM tdm_app_user AS tau
        	LEFT JOIN tdm_app_user_onboarding AS tauo ON tauo.app_user_id = tau.id
        WHERE tau.id NOT IN (8, 9, 10, 11, 12, 13, 15, 16, 17, 41, 90, 188, 369, 1574, 1788, 2394, 2440, 2432, 2442, 2443, 2444, 2445, 2447, 2446, 2450, 2451, 2452, 2455, 2477, 3227, 4773, 4776, 4816, 7572, 12912, 13697) 
        AND tau.name <> ""
        ) AS codes ON tau.invitation_code_used = codes.invitation_code_referidor
	LEFT JOIN tdm_app_user_onboarding AS tauo ON tauo.app_user_id = tau.id
ORDER BY id_referidor
;
"""
query = clean_query(query)
print(query)

SELECT codes.id_referidor, codes.referidor, codes.onboarding_approved_date_referidor, codes.terms_accepted_date_referidor, tau.id AS id_referido, tau.name AS referido, tau.invitation_code_used, signup_cohort_date, app_user_onboarding_approved_date AS onboarding_approved_date, terms_conditions_accepted_date AS terms_accepted_date FROM tdm_app_user AS tau JOIN     (    SELECT tau.id AS id_referidor, name AS referidor, invitation_code AS invitation_code_referidor, app_user_onboarding_approved_date AS onboarding_approved_date_referidor, terms_conditions_accepted_date AS terms_accepted_date_referidor     FROM tdm_app_user AS tau     LEFT JOIN tdm_app_user_onboarding AS tauo ON tauo.app_user_id = tau.id     WHERE tau.id NOT IN (8, 9, 10, 11, 12, 13, 15, 16, 17, 41, 90, 188, 369, 1574, 1788, 2394, 2440, 2432, 2442, 2443, 2444, 2445, 2447, 2446, 2450, 2451, 2452, 2455, 2477, 3227, 4773, 4776, 4816, 7572, 12912, 13697)     AND tau.name <> ""     ) AS codes ON tau.invitation_code_used = codes.in

In [None]:
# Cargar query a un DataFrame
df_referidos = pd.read_sql_query(query, conexion_database.engine)
print(df_referidos.shape)
df_referidos.head(4)

In [12]:
# Calcular columna con fecha de afiliación de referidor
df_referidos['fecha_afiliacion_referidor'] = df_referidos[['onboarding_approved_date_referidor', 'terms_accepted_date_referidor']].max(axis=1, skipna=False)

# Calcular columna con fecha de afiliación de referido
df_referidos['fecha_afiliacion_referido'] = df_referidos[['onboarding_approved_date', 'terms_accepted_date']].max(axis=1, skipna=False)

---

## Virality Velocity Multiplier Breakdown

**IMPORTANTE: Las cifras de afiliados, clics, etc. deben estar en relación con los cohortes de afiliación. Así, por ejemplo, los clics en la pestaña de friends deben ser para los usuarios que se afiliaron en las fechas seleccionadas.** 

In [13]:
metricas_por_dia = pd.DataFrame({'dia': pd.date_range(start='2021-10-12', end=date.today().strftime('%Y-%m-%d')),
                           })
metricas_por_dia.head()

Unnamed: 0,dia
0,2021-10-12
1,2021-10-13
2,2021-10-14
3,2021-10-15
4,2021-10-16


### Aff. Users

In [14]:
# Total de afiliated users
aff_users = len(df_affiliated_users)

Necesito sacar el número de usuarios afiliados por día para poder filtrar en el dashboard. 

In [None]:
# Calcular número de afiliados por día
aff_users_por_dia = df_affiliated_users.resample('D', on='fecha_afiliacion')['id'].count()

# Mezclar con el DataFrame general
metricas_por_dia = metricas_por_dia.merge(aff_users_por_dia, how='left', left_on='dia', right_index=True).rename(columns={'id': 'afiliados'}).fillna(0)
metricas_por_dia.tail()

### Aff. Users Clicked Friends Tab (%)

**De los que se afiliaron en ese día, cuántos dieron clic en la pestaña de amigos eventualmente.**

In [None]:
# Filtrar por usuarios afiliados que navegaron a pestaña de Amigos
data_navegaron_a_amigos = df_clics[df_clics.screen == '/INVITE'].drop_duplicates('id').copy()

# Descomponer por día | Aquí es importante poner por "fecha_afiliacion" para cumplir con las negritas de arriba
navegaron_a_amigos_por_dia = data_navegaron_a_amigos.resample('D', on='fecha_afiliacion')['date_open_screen'].count()

# Mezclar con el DataFrame general
metricas_por_dia = metricas_por_dia.merge(navegaron_a_amigos_por_dia, how='left', left_on='dia', right_index=True).rename(columns={'date_open_screen': 'cohorte_navegaron_a_amigos'}).fillna(0)
metricas_por_dia.tail()

### Aff. Users Clicked Send Invite Button (%)

In [None]:
# Filtrar por usuarios afiliados que hicieron clic en invitar amigos
data_clic_invitar_amigos = df_clics[df_clics.screen.isin(['INVITAR', 'INVITAR AMIGOS'])].drop_duplicates('id').copy()

# Descomponer por día
clic_invitar_amigos_por_dia = data_clic_invitar_amigos.resample('D', on='fecha_afiliacion')['id'].count()

# Mezclar con el DataFrame general
metricas_por_dia = metricas_por_dia.merge(clic_invitar_amigos_por_dia, how='left', left_on='dia', right_index=True).rename(columns={'id': 'cohorte_clic_invitar_amigos'}).fillna(0)
metricas_por_dia.tail()

### Avg. Invites per Aff User (#)

**Necesito sacar cuántos referidos fueron traídos por gente que se afilió en ese día**

In [None]:
# La data ya está filtrada por las personas que fueron invitadas por otros

# Descomponer cuántos fueron invitados por día
invitados_por_dia = df_referidos.resample('D', on='fecha_afiliacion_referidor')['id_referido'].count()

# Mezclar con el DataFrame general
metricas_por_dia = metricas_por_dia.merge(invitados_por_dia, how='left', left_on='dia', right_index=True).rename(columns={'id_referido': 'cohorte_referidos'}).fillna(0)
metricas_por_dia.tail()

### Affiliation Rate per Invite (%)

**Número de referidos que se terminaron afiliando, PERO con relación a la fecha de afiliación del referidor (es decir, lo que nos importa son los referidos se afiliaron que fueon traídos con cierto cohorte de afiliación)**

In [None]:
# Filtrar por aquellas personas que sí se afiliaron
invitados_afiliados = df_referidos[df_referidos.fecha_afiliacion_referido.notnull()]

# Descomponer cuántos se afiliaron por día
invitados_afiliados_por_dia = invitados_afiliados.resample('D', on='fecha_afiliacion_referidor')['id_referido'].count()

# Mezclar con el DataFrame general
metricas_por_dia = metricas_por_dia.merge(invitados_afiliados_por_dia, how='left', left_on='dia', right_index=True).rename(columns={'id_referido': 'cohorte_referidos_que_se_terminaron_afiliando'}).fillna(0)
metricas_por_dia.head(60)

In [None]:
metricas_por_dia.info()

### Pasar a G Sheets

In [21]:
sheet = client.open("Business_Health_Dashboard")  #open sheet

#replace sheet_name with the name that corresponds to yours, e.g, it can be sheet1
sheet = sheet.worksheet("data_virality_multiplier")

In [22]:
# Transformar columna datetime a string
metricas_por_dia['dia'] = metricas_por_dia['dia'].dt.strftime("%Y-%m-%d")

In [23]:
sheet.update([metricas_por_dia.columns.values.tolist()] + metricas_por_dia.values.tolist(), value_input_option='USER_ENTERED')

{'spreadsheetId': '1ZKpgLgKOSt-8JuokkfHOcTBAG72ER73-RcE-g7sttJ4',
 'updatedRange': 'data_virality_multiplier!A1:F207',
 'updatedRows': 207,
 'updatedColumns': 6,
 'updatedCells': 1242}

## Clicked Friends Tab by Cohort

In [24]:
nombre_variable = "dias_en_dar_clic_amigos"

### Cleaning

In [25]:
data_clics = df_clics[df_clics.screen == '/INVITE'].drop_duplicates('id').copy()

In [None]:
data_clics.head()

In [27]:
# Agregar columna de días en dar clic en Friends Tab
data_clics[nombre_variable] = (data_clics.date_open_screen.dt.date - data_clics.fecha_afiliacion.dt.date).dt.days

In [28]:
# Crear rango para mis bins
rango_bins_dias_en_clic_amigos = np.arange(-14, 31, 1)
rango_bins_dias_en_clic_amigos = np.insert(rango_bins_dias_en_clic_amigos, 0, -10000)
rango_bins_dias_en_clic_amigos = np.append(rango_bins_dias_en_clic_amigos, 10000)
rango_bins_dias_en_clic_amigos

array([-10000,    -14,    -13,    -12,    -11,    -10,     -9,     -8,
           -7,     -6,     -5,     -4,     -3,     -2,     -1,      0,
            1,      2,      3,      4,      5,      6,      7,      8,
            9,     10,     11,     12,     13,     14,     15,     16,
           17,     18,     19,     20,     21,     22,     23,     24,
           25,     26,     27,     28,     29,     30,  10000])

In [29]:
# Crear labels para mis bins
labels_rango_bins_dias_en_clic_amigos = [str(i) for i in np.arange(-15, 31, 1)]
print(labels_rango_bins_dias_en_clic_amigos)

['-15', '-14', '-13', '-12', '-11', '-10', '-9', '-8', '-7', '-6', '-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30']


In [30]:
data_cohortes = data_clics.groupby(['affiliation_year', 'affiliation_week', 
                  pd.cut(data_clics[nombre_variable], 
                         bins=rango_bins_dias_en_clic_amigos, 
                         right=False, 
                         labels=labels_rango_bins_dias_en_clic_amigos)])['id'].count().reset_index()

In [31]:
# Agregar semanas que no tenían ningún registro; así, quedarán con un cero. 
labels = labels_rango_bins_dias_en_clic_amigos
id_o_user_id = 'id'
dias_en = nombre_variable

for year in [2021, 2022]:
    for week in list(range(2, 54)):
    
    # Checar si existe esa semana; si no existe, créala
        if ((data_cohortes['affiliation_year'] == year) & (data_cohortes['affiliation_week'] == week)).any() == False:
            # Crear año y semana con valores en cero
            d = {'affiliation_year': [year for _ in labels], 'affiliation_week': [week for _ in labels], dias_en: [label for label in labels], id_o_user_id: [0 for _ in labels]}
            df_data = pd.DataFrame(data=d)

            data_cohortes = pd.concat([data_cohortes, df_data])

In [32]:
## OJO: LOS VALORES NO ESTÁN ORDENADOS, POR LO QUE PARECE QUE NO ESTÁ EL AÑO 2022
# Eliminar semanas vacías que pandas agrega automáticamente
# Sacar año y semana actuales
año_actual = datetime.today().year
semana_actual = weeknum(datetime.today())

# Eliminar signup_weeks agregadas automáticamente por pandas
data_cohortes = data_cohortes[~((data_cohortes.affiliation_year == 2021) & (data_cohortes.affiliation_week < 44)) 
                                & ~((data_cohortes.affiliation_year == año_actual) & (data_cohortes.affiliation_week > semana_actual))].reset_index(drop=True)

In [33]:
# Agreguemos columna con los afiliados totales por cohorte, para después sacar el porcentaje de dar clics por cohorte de afiliación.
data_cohortes[nombre_variable + '_cumulative'] = data_cohortes.groupby(['affiliation_year', 'affiliation_week'])['id'].cumsum()

Los afiliados por cohorte me salen diferentes a los de Conversion_rate porque hay personas (como 38) que se convirtieron antes de afiliarse. Voy a ignorar esos casos aquí. 

In [None]:
# Tabla con afiliados totales por cohorte
afiliados_total_por_cohorte = df_affiliated_users.groupby(['affiliation_year', 'affiliation_week'], as_index=False)['id'].size()
afiliados_total_por_cohorte.rename({'size': 'total_afiliados_cohorte'}, axis=1, inplace=True)
afiliados_total_por_cohorte.head(10)

In [None]:
# Unir con la tabla general de data_cohortes
data_cohortes_per = data_cohortes.merge(afiliados_total_por_cohorte[['affiliation_year', 'affiliation_week', 'total_afiliados_cohorte']], how='left', left_on=['affiliation_year','affiliation_week'], right_on=['affiliation_year','affiliation_week'])
data_cohortes_per.head()

In [36]:
data_cohortes_per[nombre_variable + '_percentage'] = (data_cohortes_per[nombre_variable + '_cumulative'] / data_cohortes_per['total_afiliados_cohorte'] * 100).round(1)

Data final:

In [None]:
data_cohortes_per

### Transformar data para graficar

In [38]:
cohort_pivot = pd.pivot_table(data_cohortes_per, values=(nombre_variable + '_percentage'), index=nombre_variable, columns=['affiliation_year', 'affiliation_week'])
cohort_pivot.columns = cohort_pivot.columns.map(lambda x: '|'.join([str(i) for i in x]))
cohort_pivot.reset_index(inplace=True)
cohort_pivot.head(15)

Unnamed: 0,dias_en_dar_clic_amigos,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|1,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.3,0.0,0.0,15.2,0.0,4.0,6.4,10.7,16.3,2.4,11.4,0.0,2.8,0.0
1,-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,9.1,0.0,0.0,2.1,1.8,10.2,0.0,0.0,0.0,2.8,0.0
2,-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,9.1,0.0,0.0,2.1,1.8,10.2,0.0,0.0,0.0,2.8,0.0
3,-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,2.8,0.0
4,-13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,2.8,0.0
5,-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,0.0,0.0
6,-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,7.6,0.0,0.0,0.0,1.8,8.2,0.0,0.0,0.0,0.0,0.0
7,-2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.9,0.0,0.0,15.2,0.0,2.0,4.3,8.9,16.3,2.4,11.4,0.0,2.8,0.0
8,-3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.9,0.0,0.0,15.2,0.0,0.0,4.3,8.9,14.3,2.4,9.1,0.0,2.8,0.0
9,-4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.9,0.0,0.0,15.2,0.0,0.0,4.3,8.9,14.3,2.4,9.1,0.0,2.8,0.0


In [39]:
# Cambiar columna de días a numérica para poder ordenarla
cohort_pivot = cohort_pivot.astype({nombre_variable: 'float'})

# Ordenar valores de manera correcta
cohort_pivot = cohort_pivot.sort_values(by=nombre_variable).reset_index(drop=True)
cohort_pivot.head(15)

Unnamed: 0,dias_en_dar_clic_amigos,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|1,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,7.6,0.0,0.0,0.0,1.8,8.2,0.0,0.0,0.0,0.0,0.0
1,-14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,0.0,0.0
2,-13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,2.8,0.0
3,-12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,2.8,0.0
4,-11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,9.1,0.0,0.0,2.1,1.8,10.2,0.0,0.0,0.0,2.8,0.0
5,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,9.1,0.0,0.0,2.1,1.8,10.2,0.0,0.0,0.0,2.8,0.0
6,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.7,0.0,0.0,12.1,0.0,0.0,2.1,1.8,10.2,0.0,2.3,0.0,2.8,0.0
7,-8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.1,0.0,0.0,12.1,0.0,0.0,2.1,1.8,10.2,0.0,2.3,0.0,2.8,0.0
8,-7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.8,0.0,0.0,13.6,0.0,0.0,2.1,1.8,10.2,0.0,4.5,0.0,2.8,0.0
9,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.9,0.0,0.0,15.2,0.0,0.0,2.1,1.8,10.2,0.0,9.1,0.0,2.8,0.0


In [40]:
# Eliminar los días que todavía no suceden
for i in range(( (len(cohort_pivot)-16) // 7)+1): 
    i += 1
    cohort_pivot.iloc[15 + (7*(i-1)):, -i] = ""

In [41]:
cohort_pivot

Unnamed: 0,dias_en_dar_clic_amigos,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|1,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,7.6,0.0,0.0,0.0,1.8,8.2,0.0,0.0,0.0,0.0,0.0
1,-14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,0.0,0.0
2,-13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,2.8,0.0
3,-12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,7.6,0.0,0.0,0.0,1.8,10.2,0.0,0.0,0.0,2.8,0.0
4,-11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.3,0.0,0.0,9.1,0.0,0.0,2.1,1.8,10.2,0.0,0.0,0.0,2.8,0.0
5,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,9.1,0.0,0.0,2.1,1.8,10.2,0.0,0.0,0.0,2.8,0.0
6,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.7,0.0,0.0,12.1,0.0,0.0,2.1,1.8,10.2,0.0,2.3,0.0,2.8,0.0
7,-8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.1,0.0,0.0,12.1,0.0,0.0,2.1,1.8,10.2,0.0,2.3,0.0,2.8,0.0
8,-7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.8,0.0,0.0,13.6,0.0,0.0,2.1,1.8,10.2,0.0,4.5,0.0,2.8,0.0
9,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.9,0.0,0.0,15.2,0.0,0.0,2.1,1.8,10.2,0.0,9.1,0.0,2.8,0.0


### Pasar a G Sheets

In [42]:
sheet = client.open("Business_Health_Dashboard")  #open sheet

#replace sheet_name with the name that corresponds to yours, e.g, it can be sheet1
sheet = sheet.worksheet("data_virality_tab_cohort")

In [43]:
sheet.update([cohort_pivot.columns.values.tolist()] + cohort_pivot.values.tolist())

{'spreadsheetId': '1ZKpgLgKOSt-8JuokkfHOcTBAG72ER73-RcE-g7sttJ4',
 'updatedRange': 'data_virality_tab_cohort!A1:AD47',
 'updatedRows': 47,
 'updatedColumns': 30,
 'updatedCells': 1410}

## Button Send Invite by Cohort

In [44]:
nombre_variable = "dias_en_button_send_invite"

### Cleaning

In [45]:
data_g = df_clics[df_clics.screen.isin(['INVITAR', 'INVITAR AMIGOS'])].drop_duplicates('id').copy()

In [None]:
data_g.head()

In [47]:
# Agregar columna de días en dar clic en Send Invite Button
data_g[nombre_variable] = (data_g.date_open_screen.dt.date - data_g.fecha_afiliacion.dt.date).dt.days

In [48]:
# Crear rango para mis bins
rango_bins_dias = np.arange(-14, 31, 1)
rango_bins_dias = np.insert(rango_bins_dias, 0, -10000)
rango_bins_dias = np.append(rango_bins_dias, 10000)
rango_bins_dias

array([-10000,    -14,    -13,    -12,    -11,    -10,     -9,     -8,
           -7,     -6,     -5,     -4,     -3,     -2,     -1,      0,
            1,      2,      3,      4,      5,      6,      7,      8,
            9,     10,     11,     12,     13,     14,     15,     16,
           17,     18,     19,     20,     21,     22,     23,     24,
           25,     26,     27,     28,     29,     30,  10000])

In [49]:
# Crear labels para mis bins
labels_rango_bins_dias = [str(i) for i in np.arange(-15, 31, 1)]
print(labels_rango_bins_dias)

['-15', '-14', '-13', '-12', '-11', '-10', '-9', '-8', '-7', '-6', '-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30']


In [50]:
data_cohortes = data_g.groupby(['affiliation_year', 'affiliation_week', 
                  pd.cut(data_g[nombre_variable], 
                         bins=rango_bins_dias, 
                         right=False, 
                         labels=labels_rango_bins_dias)])['id'].count().reset_index()

In [51]:
# Agregar semanas que no tenían ningún registro; así, quedarán con un cero. 
labels = labels_rango_bins_dias
id_o_user_id = 'id'
dias_en = nombre_variable

for year in [2021, 2022]:
    for week in list(range(2, 54)):
    
    # Checar si existe esa semana; si no existe, créala
        if ((data_cohortes['affiliation_year'] == year) & (data_cohortes['affiliation_week'] == week)).any() == False:
            # Crear año y semana con valores en cero
            d = {'affiliation_year': [year for _ in labels], 'affiliation_week': [week for _ in labels], dias_en: [label for label in labels], id_o_user_id: [0 for _ in labels]}
            df_data = pd.DataFrame(data=d)

            data_cohortes = pd.concat([data_cohortes, df_data])

In [52]:
## OJO: LOS VALORES NO ESTÁN ORDENADOS, POR LO QUE PARECE QUE NO ESTÁ EL AÑO 2022
# Eliminar semanas vacías que pandas agrega automáticamente
# Sacar año y semana actuales
año_actual = datetime.today().year
semana_actual = weeknum(datetime.today())

# Eliminar signup_weeks agregadas automáticamente por pandas
data_cohortes = data_cohortes[~((data_cohortes.affiliation_year == 2021) & (data_cohortes.affiliation_week < 44)) 
                                & ~((data_cohortes.affiliation_year == año_actual) & (data_cohortes.affiliation_week > semana_actual))].reset_index(drop=True)

In [53]:
# Agreguemos columna con los afiliados totales por cohorte, para después sacar el porcentaje de dar clics por cohorte de afiliación.
data_cohortes[nombre_variable + '_cumulative'] = data_cohortes.groupby(['affiliation_year', 'affiliation_week'])['id'].cumsum()

Los afiliados por cohorte me salen diferentes a los de Conversion_rate porque hay personas (como 38) que se convirtieron antes de afiliarse. Voy a ignorar esos casos aquí. 

In [None]:
# Tabla con afiliados totales por cohorte
afiliados_total_por_cohorte = df_affiliated_users.groupby(['affiliation_year', 'affiliation_week'], as_index=False)['id'].size()
afiliados_total_por_cohorte.rename({'size': 'total_afiliados_cohorte'}, axis=1, inplace=True)
afiliados_total_por_cohorte.head(10)

In [None]:
# Unir con la tabla general de data_cohortes
data_cohortes_per = data_cohortes.merge(afiliados_total_por_cohorte[['affiliation_year', 'affiliation_week', 'total_afiliados_cohorte']], how='left', left_on=['affiliation_year','affiliation_week'], right_on=['affiliation_year','affiliation_week'])
data_cohortes_per.head()

In [56]:
data_cohortes_per[nombre_variable + '_percentage'] = (data_cohortes_per[nombre_variable + '_cumulative'] / data_cohortes_per['total_afiliados_cohorte'] * 100).round(1)

Data final:

In [None]:
data_cohortes_per

### Transformar data para graficar

In [58]:
cohort_pivot = pd.pivot_table(data_cohortes_per, values=(nombre_variable + '_percentage'), index=nombre_variable, columns=['affiliation_year', 'affiliation_week'])
cohort_pivot.columns = cohort_pivot.columns.map(lambda x: '|'.join([str(i) for i in x]))
cohort_pivot.reset_index(inplace=True)
cohort_pivot.head(15)

Unnamed: 0,dias_en_button_send_invite,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,3.2,3.8,1.1,6.2,3.9,12.4,4.8,4.6,15.2,11.4,6.0,12.8,10.7,18.4,4.9,9.1,4.9,5.6,22.2
1,-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
2,-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
3,-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
4,-13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
5,-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
6,-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
7,-2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,3.8,0.0,3.7,3.9,7.6,3.2,1.5,10.6,0.0,0.0,8.5,8.9,12.2,2.4,6.8,4.9,2.8,22.2
8,-3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.7,0.0,3.8,0.0,2.5,3.9,7.6,1.6,1.5,9.1,0.0,0.0,6.4,8.9,8.2,2.4,6.8,2.4,2.8,11.1
9,-4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3,0.0,1.9,0.0,2.5,2.6,6.9,0.0,0.0,7.6,0.0,0.0,6.4,5.4,8.2,2.4,6.8,2.4,2.8,11.1


In [59]:
# Cambiar columna de días a numérica para poder ordenarla
cohort_pivot = cohort_pivot.astype({nombre_variable: 'float'})

# Ordenar valores de manera correcta
cohort_pivot = cohort_pivot.sort_values(by=nombre_variable).reset_index(drop=True)
cohort_pivot.head(15)

Unnamed: 0,dias_en_button_send_invite,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
1,-14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
2,-13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
3,-12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
4,-11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
5,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
6,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,1.3,3.4,0.0,0.0,7.6,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
7,-8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,0.0,1.2,1.3,3.4,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,0.0,2.4,2.8,11.1
8,-7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,0.0,1.2,1.3,4.8,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,2.3,2.4,2.8,11.1
9,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3,0.0,1.9,0.0,2.5,1.3,5.5,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,6.8,2.4,2.8,11.1


In [60]:
# Eliminar los días que todavía no suceden
for i in range(( (len(cohort_pivot)-16) // 7)+1): 
    i += 1
    cohort_pivot.iloc[15 + (7*(i-1)):, -i] = ""

In [61]:
cohort_pivot

Unnamed: 0,dias_en_button_send_invite,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
1,-14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
2,-13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
3,-12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
4,-11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
5,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
6,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,1.3,3.4,0.0,0.0,7.6,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
7,-8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,0.0,1.2,1.3,3.4,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,0.0,2.4,2.8,11.1
8,-7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,0.0,1.2,1.3,4.8,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,2.3,2.4,2.8,11.1
9,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3,0.0,1.9,0.0,2.5,1.3,5.5,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,6.8,2.4,2.8,11.1


### Pasar a G Sheets

In [62]:
sheet = client.open("Business_Health_Dashboard")  #open sheet

#replace sheet_name with the name that corresponds to yours, e.g, it can be sheet1
sheet = sheet.worksheet("data_virality_button_cohort")

In [63]:
sheet.update([cohort_pivot.columns.values.tolist()] + cohort_pivot.values.tolist())

{'spreadsheetId': '1ZKpgLgKOSt-8JuokkfHOcTBAG72ER73-RcE-g7sttJ4',
 'updatedRange': 'data_virality_button_cohort!A1:AC47',
 'updatedRows': 47,
 'updatedColumns': 29,
 'updatedCells': 1363}

## Avg. Invites per Aff. User by Cohort

Agregar columnas de year y week para agrpar por cohort después

In [64]:
nombre_variable = "dias_en_invite_se_une"

### Cleaning

In [65]:
df_referidos['id_referido'].count()

332

In [66]:
data_g = df_clics[df_clics.screen.isin(['INVITAR', 'INVITAR AMIGOS'])].drop_duplicates('id').copy()

In [None]:
data_g.head()

In [68]:
# Agregar columna de días en dar clic en Send Invite Button
data_g[nombre_variable] = (data_g.date_open_screen.dt.date - data_g.fecha_afiliacion.dt.date).dt.days

In [69]:
# Crear rango para mis bins
rango_bins_dias = np.arange(-14, 16, 1)
rango_bins_dias = np.insert(rango_bins_dias, 0, -10000)
rango_bins_dias = np.append(rango_bins_dias, 10000)
rango_bins_dias

array([-10000,    -14,    -13,    -12,    -11,    -10,     -9,     -8,
           -7,     -6,     -5,     -4,     -3,     -2,     -1,      0,
            1,      2,      3,      4,      5,      6,      7,      8,
            9,     10,     11,     12,     13,     14,     15,  10000])

In [70]:
# Crear labels para mis bins
labels_rango_bins_dias = [str(i) for i in np.arange(-15, 16, 1)]
print(labels_rango_bins_dias)

['-15', '-14', '-13', '-12', '-11', '-10', '-9', '-8', '-7', '-6', '-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15']


In [71]:
data_cohortes = data_g.groupby(['affiliation_year', 'affiliation_week', 
                  pd.cut(data_g[nombre_variable], 
                         bins=rango_bins_dias, 
                         right=False, 
                         labels=labels_rango_bins_dias)])['id'].count().reset_index()

In [72]:
# Agregar semanas que no tenían ningún registro; así, quedarán con un cero. 
labels = labels_rango_bins_dias
id_o_user_id = 'id'
dias_en = nombre_variable

for year in [2021, 2022]:
    for week in list(range(2, 54)):
    
    # Checar si existe esa semana; si no existe, créala
        if ((data_cohortes['affiliation_year'] == year) & (data_cohortes['affiliation_week'] == week)).any() == False:
            # Crear año y semana con valores en cero
            d = {'affiliation_year': [year for _ in labels], 'affiliation_week': [week for _ in labels], dias_en: [label for label in labels], id_o_user_id: [0 for _ in labels]}
            df_data = pd.DataFrame(data=d)

            data_cohortes = pd.concat([data_cohortes, df_data])

In [73]:
## OJO: LOS VALORES NO ESTÁN ORDENADOS, POR LO QUE PARECE QUE NO ESTÁ EL AÑO 2022
# Eliminar semanas vacías que pandas agrega automáticamente
# Sacar año y semana actuales
año_actual = datetime.today().year
semana_actual = weeknum(datetime.today())

# Eliminar signup_weeks agregadas automáticamente por pandas
data_cohortes = data_cohortes[~((data_cohortes.affiliation_year == 2021) & (data_cohortes.affiliation_week < 44)) 
                                & ~((data_cohortes.affiliation_year == año_actual) & (data_cohortes.affiliation_week > semana_actual))].reset_index(drop=True)

In [74]:
# Agreguemos columna con los afiliados totales por cohorte, para después sacar el porcentaje de dar clics por cohorte de afiliación.
data_cohortes[nombre_variable + '_cumulative'] = data_cohortes.groupby(['affiliation_year', 'affiliation_week'])['id'].cumsum()

Los afiliados por cohorte me salen diferentes a los de Conversion_rate porque hay personas (como 38) que se convirtieron antes de afiliarse. Voy a ignorar esos casos aquí. 

In [None]:
# Tabla con afiliados totales por cohorte
afiliados_total_por_cohorte = df_affiliated_users.groupby(['affiliation_year', 'affiliation_week'], as_index=False)['id'].size()
afiliados_total_por_cohorte.rename({'size': 'total_afiliados_cohorte'}, axis=1, inplace=True)
afiliados_total_por_cohorte.head(10)

In [None]:
# Unir con la tabla general de data_cohortes
data_cohortes_per = data_cohortes.merge(afiliados_total_por_cohorte[['affiliation_year', 'affiliation_week', 'total_afiliados_cohorte']], how='left', left_on=['affiliation_year','affiliation_week'], right_on=['affiliation_year','affiliation_week'])
data_cohortes_per.head()

In [77]:
data_cohortes_per[nombre_variable + '_percentage'] = (data_cohortes_per[nombre_variable + '_cumulative'] / data_cohortes_per['total_afiliados_cohorte'] * 100).round(1)

Data final:

In [None]:
data_cohortes_per

### Transformar data para graficar

In [79]:
cohort_pivot = pd.pivot_table(data_cohortes_per, values=(nombre_variable + '_percentage'), index=nombre_variable, columns=['affiliation_year', 'affiliation_week'])
cohort_pivot.columns = cohort_pivot.columns.map(lambda x: '|'.join([str(i) for i in x]))
cohort_pivot.reset_index(inplace=True)
cohort_pivot.head(15)

Unnamed: 0,dias_en_invite_se_une,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,3.2,3.8,1.1,6.2,3.9,12.4,4.8,4.6,15.2,11.4,6.0,12.8,10.7,18.4,4.9,9.1,4.9,5.6,22.2
1,-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
2,-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
3,-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
4,-13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
5,-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
6,-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
7,-2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,3.8,0.0,3.7,3.9,7.6,3.2,1.5,10.6,0.0,0.0,8.5,8.9,12.2,2.4,6.8,4.9,2.8,22.2
8,-3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.7,0.0,3.8,0.0,2.5,3.9,7.6,1.6,1.5,9.1,0.0,0.0,6.4,8.9,8.2,2.4,6.8,2.4,2.8,11.1
9,-4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3,0.0,1.9,0.0,2.5,2.6,6.9,0.0,0.0,7.6,0.0,0.0,6.4,5.4,8.2,2.4,6.8,2.4,2.8,11.1


In [80]:
# Cambiar columna de días a numérica para poder ordenarla
cohort_pivot = cohort_pivot.astype({nombre_variable: 'float'})

# Ordenar valores de manera correcta
cohort_pivot = cohort_pivot.sort_values(by=nombre_variable).reset_index(drop=True)
cohort_pivot.head(15)

Unnamed: 0,dias_en_invite_se_une,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
1,-14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
2,-13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
3,-12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
4,-11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
5,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
6,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,1.3,3.4,0.0,0.0,7.6,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
7,-8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,0.0,1.2,1.3,3.4,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,0.0,2.4,2.8,11.1
8,-7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,0.0,1.2,1.3,4.8,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,2.3,2.4,2.8,11.1
9,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3,0.0,1.9,0.0,2.5,1.3,5.5,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,6.8,2.4,2.8,11.1


In [81]:
# Eliminar los días que todavía no suceden
for i in range(( (len(cohort_pivot)-16) // 7)+1): 
    i += 1
    cohort_pivot.iloc[15 + (7*(i-1)):, -i] = ""

In [82]:
cohort_pivot

Unnamed: 0,dias_en_invite_se_une,2021|44,2021|45,2021|46,2021|47,2021|48,2021|49,2021|50,2021|51,2021|52,2021|53,2022|2,2022|3,2022|4,2022|5,2022|6,2022|7,2022|8,2022|9,2022|10,2022|11,2022|12,2022|13,2022|14,2022|15,2022|16,2022|17,2022|18,2022|19
0,-15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
1,-14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
2,-13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,2.1,0.0,4.1,2.4,0.0,2.4,0.0,11.1
3,-12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
4,-11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
5,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,3.4,0.0,0.0,4.5,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
6,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,1.3,3.4,0.0,0.0,7.6,0.0,0.0,4.3,0.0,4.1,2.4,0.0,2.4,0.0,11.1
7,-8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,0.0,1.2,1.3,3.4,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,0.0,2.4,2.8,11.1
8,-7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,0.0,1.2,1.3,4.8,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,2.3,2.4,2.8,11.1
9,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3,0.0,1.9,0.0,2.5,1.3,5.5,0.0,0.0,7.6,0.0,0.0,4.3,1.8,4.1,2.4,6.8,2.4,2.8,11.1


### Pasar a G Sheets

In [83]:
sheet = client.open("Business_Health_Dashboard")  #open sheet

#replace sheet_name with the name that corresponds to yours, e.g, it can be sheet1
sheet = sheet.worksheet("data_virality_invites")

In [84]:
sheet.update([cohort_pivot.columns.values.tolist()] + cohort_pivot.values.tolist())

{'spreadsheetId': '1ZKpgLgKOSt-8JuokkfHOcTBAG72ER73-RcE-g7sttJ4',
 'updatedRange': 'data_virality_button_cohort!A1:AC32',
 'updatedRows': 32,
 'updatedColumns': 29,
 'updatedCells': 928}