In [5]:
import pandas as pd

# === Vanguard KPI Summary for Tableau ===
data = [
    ["Completion Rate", "Control", 49.97, "%", "Percentage of Control group users who completed the process"],
    ["Completion Rate", "Test", 52.57, "%", "Percentage of Test group users who completed the process"],
    ["Strict Completion Rate", "Control", 49.78, "%", "Percentage of Control group users who followed strict funnel order and completed it"],
    ["Strict Completion Rate", "Test", 52.43, "%", "Percentage of Test group users who followed strict funnel order and completed it"],
    ["Completion Rate Lift", "Test vs Control", 2.65, "pp", "Improvement in completion rate for Test compared to Control"],
    ["Step_Back_Errors", "Control", 5521, "Count", "Total backward step errors in Control group"],
    ["Step_Back_Errors", "Test", 8138, "Count", "Total backward step errors in Test group"],
    ["Step_Repeat_Errors", "Control", 7298, "Count", "Total repeated step errors in Control group"],
    ["Step_Repeat_Errors", "Test", 11139, "Count", "Total repeated step errors in Test group"],
    ["Mean_Step_Time_start", "Control", 57.46, "Seconds", "Average duration at start step for Control group"],
    ["Mean_Step_Time_start", "Test", 51.39, "Seconds", "Average duration at start step for Test group"],
    ["Mean_Step_Time_step_1", "Control", 46.56, "Seconds", "Average duration at Step 1 for Control group"],
    ["Mean_Step_Time_step_1", "Test", 56.53, "Seconds", "Average duration at Step 1 for Test group"],
    ["Mean_Step_Time_step_2", "Control", 94.35, "Seconds", "Average duration at Step 2 for Control group"],
    ["Mean_Step_Time_step_2", "Test", 92.32, "Seconds", "Average duration at Step 2 for Test group"],
    ["Mean_Step_Time_step_3", "Control", 129.58, "Seconds", "Average duration at Step 3 for Control group"],
    ["Mean_Step_Time_step_3", "Test", 111.67, "Seconds", "Average duration at Step 3 for Test group"]
]

df_kpi = pd.DataFrame(data, columns=["Metric", "Variation", "Value", "Unit", "Description"])

# Save to CSV
output_path = "../../data/clean_data_txt/vanguard_kpi_summary_for_tableau_irma.csv"
df_kpi.to_csv(output_path, index=False)
print(f"✅ File saved successfully at: {output_path}")
df_kpi.head()



✅ File saved successfully at: ../../data/clean_data_txt/vanguard_kpi_summary_for_tableau_irma.csv


Unnamed: 0,Metric,Variation,Value,Unit,Description
0,Completion Rate,Control,49.97,%,Percentage of Control group users who complete...
1,Completion Rate,Test,52.57,%,Percentage of Test group users who completed t...
2,Strict Completion Rate,Control,49.78,%,Percentage of Control group users who followed...
3,Strict Completion Rate,Test,52.43,%,Percentage of Test group users who followed st...
4,Completion Rate Lift,Test vs Control,2.65,pp,Improvement in completion rate for Test compar...


In [1]:
import pandas as pd

# === 1. Load your existing CSV ===
path = "../../data/clean_data_txt/vanguard_kpi_summary_for_tableau_irma.csv"
df = pd.read_csv(path)

# === 2. Filter step-time metrics ===
step_metrics = [
    'Mean_Step_Time_start',
    'Mean_Step_Time_step_1',
    'Mean_Step_Time_step_2',
    'Mean_Step_Time_step_3'
]
df_steps = df[df['Metric'].isin(step_metrics)]

# === 3. Calculate the total mean completion time per Variation ===
mean_completion = (
    df_steps.groupby('Variation', as_index=False)['Value'].sum()
)
mean_completion['Metric'] = 'Mean_Completion_Time'
mean_completion['Unit'] = 'Seconds'
mean_completion['Description'] = 'Total average time (sum of steps) to complete the process'

# === Redondear a 2 decimales ===
mean_completion['Value'] = mean_completion['Value'].round(2)

# === 4. Reorder columns to match your original CSV ===
df_final = pd.concat([df, mean_completion[df.columns]], ignore_index=True)

# === 5. Save new version ===
output_path = "../../data/clean_data_txt/vanguard_kpi_summary_for_tableau_irma_v2.csv"
df_final.to_csv(output_path, index=False)

print("✅ File saved successfully!")
print(df_final.tail(6))


✅ File saved successfully!
                   Metric Variation   Value     Unit  \
13  Mean_Step_Time_step_2   Control   94.35  Seconds   
14  Mean_Step_Time_step_2      Test   92.32  Seconds   
15  Mean_Step_Time_step_3   Control  129.58  Seconds   
16  Mean_Step_Time_step_3      Test  111.67  Seconds   
17   Mean_Completion_Time   Control  327.95  Seconds   
18   Mean_Completion_Time      Test  311.91  Seconds   

                                          Description  
13       Average duration at Step 2 for Control group  
14          Average duration at Step 2 for Test group  
15       Average duration at Step 3 for Control group  
16          Average duration at Step 3 for Test group  
17  Total average time (sum of steps) to complete ...  
18  Total average time (sum of steps) to complete ...  


In [4]:
import pandas as pd

# === Load existing KPI summary ===
path = "../../data/clean_data_txt/vanguard_kpi_summary_for_tableau_irma_1.csv"
df = pd.read_csv(path)

# === Add new metrics ===
new_metrics = [
    ["Calls_6M_Mean", "Control", 2.1, "count", "Average number of calls in 6 months per user (Control group)"],
    ["Calls_6M_Mean", "Test", 2.4, "count", "Average number of calls in 6 months per user (Test group)"],
    ["Logons_6M_Mean", "Control", 5.3, "count", "Average number of logons in 6 months per user (Control group)"],
    ["Logons_6M_Mean", "Test", 6.0, "count", "Average number of logons in 6 months per user (Test group)"],
    ["Strict_Lift_CI", "Test vs Control", "1.78 – 3.44", "%", "95% confidence interval for the strict completion rate lift"],
    ["p_value", "Test vs Control", 0.0015, "", "Two-tailed p-value from proportion test confirming statistical significance"]
]
# === Display Vanguard KPI Summary nicely aligned ===
path = "../../data/clean_data_txt/vanguard_kpi_summary_for_tableau_irma_1.csv"
df = pd.read_csv(path)

# Set visual display options (does not modify the file)
pd.set_option("display.colheader_justify", "left")   # left-align headers
pd.set_option("display.width", None)                 # prevents line wrapping
pd.set_option("display.max_colwidth", None)          # shows full text in Description column

# Show nicely aligned preview
print("📋 Vanguard KPI Summary Preview (clean and aligned):\n")
display(
    df.style
    .set_properties(**{'text-align': 'left'})  # left-align all text
    .hide(axis='index')                        # hide numeric index
)
df_new = pd.DataFrame(new_metrics, columns=["Metric", "Variation", "Value", "Unit", "Description"])

# === Append new rows and save ===
df_final = pd.concat([df, df_new], ignore_index=True)
df_final.to_csv(path, index=False)

print(f"✅ File updated successfully at: {path}")
print(f"Total metrics now: {len(df_final)} rows")
df_final.tail(8)


📋 Vanguard KPI Summary Preview (clean and aligned):



Metric,Variation,Value,Unit,Description
Completion Rate,Control,49.97,%,Percentage of Control group users who completed the process
Completion Rate,Test,52.57,%,Percentage of Test group users who completed the process
Strict Completion Rate,Control,49.78,%,Percentage of Control group users who followed strict funnel order and completed it
Strict Completion Rate,Test,52.43,%,Percentage of Test group users who followed strict funnel order and completed it
Completion Rate Lift,Test vs Control,2.65,pp,Improvement in completion rate for Test compared to Control
Step_Back_Errors,Control,5521.0,Count,Total backward step errors in Control group
Step_Back_Errors,Test,8138.0,Count,Total backward step errors in Test group
Step_Repeat_Errors,Control,7298.0,Count,Total repeated step errors in Control group
Step_Repeat_Errors,Test,11139.0,Count,Total repeated step errors in Test group
Mean_Step_Time_start,Control,57.46,Seconds,Average duration at start step for Control group


✅ File updated successfully at: ../../data/clean_data_txt/vanguard_kpi_summary_for_tableau_irma_1.csv
Total metrics now: 36 rows


Unnamed: 0,Metric,Variation,Value,Unit,Description
28,Strict_Lift_CI,Test vs Control,1.78 – 3.44,%,95% confidence interval for the strict completion rate lift
29,p_value,Test vs Control,0.0015,,Two-tailed p-value from proportion test confirming statistical significance
30,Calls_6M_Mean,Control,2.1,count,Average number of calls in 6 months per user (Control group)
31,Calls_6M_Mean,Test,2.4,count,Average number of calls in 6 months per user (Test group)
32,Logons_6M_Mean,Control,5.3,count,Average number of logons in 6 months per user (Control group)
33,Logons_6M_Mean,Test,6.0,count,Average number of logons in 6 months per user (Test group)
34,Strict_Lift_CI,Test vs Control,1.78 – 3.44,%,95% confidence interval for the strict completion rate lift
35,p_value,Test vs Control,0.0015,,Two-tailed p-value from proportion test confirming statistical significance


In [3]:
import pandas as pd

# === Display Vanguard KPI Summary nicely aligned ===
path = "../../data/clean_data_txt/vanguard_kpi_summary_for_tableau_irma_1.csv"
df = pd.read_csv(path)

# Set visual display options (does not modify the file)
pd.set_option("display.colheader_justify", "left")   # left-align headers
pd.set_option("display.width", None)                 # prevents line wrapping
pd.set_option("display.max_colwidth", None)          # shows full text in Description column

# Show nicely aligned preview
print("📋 Vanguard KPI Summary Preview (clean and aligned):\n")
display(
    df.style
    .set_properties(**{'text-align': 'left'})  # left-align all text
    .hide(axis='index')                        # hide numeric index
)


📋 Vanguard KPI Summary Preview (clean and aligned):



Metric,Variation,Value,Unit,Description
Completion Rate,Control,49.97,%,Percentage of Control group users who completed the process
Completion Rate,Test,52.57,%,Percentage of Test group users who completed the process
Strict Completion Rate,Control,49.78,%,Percentage of Control group users who followed strict funnel order and completed it
Strict Completion Rate,Test,52.43,%,Percentage of Test group users who followed strict funnel order and completed it
Completion Rate Lift,Test vs Control,2.65,pp,Improvement in completion rate for Test compared to Control
Step_Back_Errors,Control,5521.0,Count,Total backward step errors in Control group
Step_Back_Errors,Test,8138.0,Count,Total backward step errors in Test group
Step_Repeat_Errors,Control,7298.0,Count,Total repeated step errors in Control group
Step_Repeat_Errors,Test,11139.0,Count,Total repeated step errors in Test group
Mean_Step_Time_start,Control,57.46,Seconds,Average duration at start step for Control group
