In [2]:
import pandas as pd 

In [3]:
df_web_data = pd.read_csv('../CSV/df_web_data_combinado.csv')
df_experiment_clients = pd.read_csv('../CSV/df_final_experiment_clients.csv')
df_demo = pd.read_csv('../CSV/df_final_demo.csv')

In [4]:
df_web_data.columns


Index(['client_id', 'visitor_id', 'visit_id', 'process_step', 'date_time'], dtype='object')

In [5]:
df_experiment_clients.columns

Index(['client_id', 'Variation'], dtype='object')

In [6]:
print(df_web_data['process_step'].unique())

['step_3' 'step_2' 'step_1' 'start' 'confirm']


In [18]:
import numpy as np

In [19]:
# Agrupamos por process_step para calcular los clientes únicos en cada paso
steps_data = df_web_data.groupby('process_step')['client_id'].nunique().reset_index()
steps_data.columns = ['process_step', 'unique_clients']
print(steps_data)

# Calculamos la tasa de abandono general
steps_data['abandon_rate'] = steps_data['unique_clients'].pct_change(periods=-1) * 100

# Rellenamos NaN en la última fila (porque no hay siguiente paso para comparar)
steps_data['abandon_rate'] = steps_data['abandon_rate'].fillna(0)
print(steps_data)

# Mezclamos los datos con la variación del experimento
df_merged = df_web_data.merge(df_experiment_clients, on='client_id', how='left')

# Agrupamos por step y variación
steps_experiment = df_merged.groupby(['process_step', 'Variation'])['client_id'].nunique().reset_index()

# Pivotamos para obtener columnas separadas para Control y Test
steps_experiment = steps_experiment.pivot(index='process_step', columns='Variation', values='client_id')

# Calculamos la tasa de abandono para Control
steps_experiment['abandon_rate_control'] = (
    steps_experiment['Control'].diff(periods=-1).clip(lower=0) / 
    steps_experiment['Control'].replace(0, np.nan) * 100
)

# Calculamos la tasa de abandono para Test
steps_experiment['abandon_rate_test'] = (
    steps_experiment['Test'].diff(periods=-1).clip(lower=0) / 
    steps_experiment['Test'].replace(0, np.nan) * 100
)

# Rellenamos los valores NaN generados por la última fila o divisiones por 0
steps_experiment.fillna(0, inplace=True)

print(steps_experiment)


  process_step  unique_clients
0      confirm           81145
1        start          118980
2       step_1          104978
3       step_2           96943
4       step_3           91076
  process_step  unique_clients  abandon_rate
0      confirm           81145    -31.799462
1        start          118980     13.338033
2       step_1          104978      8.288376
3       step_2           96943      6.441873
4       step_3           91076      0.000000
Variation     Control   Test  abandon_rate_control  abandon_rate_test
process_step                                                         
confirm         15434  18687              0.000000           0.000000
start           23397  26679             13.869299           9.040819
step_1          20152  24267              7.453355           8.278732
step_2          18650  22258              6.584450           6.186540
step_3          17422  20881              0.000000           0.000000
