[Reference](https://miptgirl.medium.com/making-sense-of-kpi-changes-627e1f416803)

In [3]:
import pandas as pd
import numpy as np
import random
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

pio.templates.default = "simple_white"

# visualisations

def create_share_vs_impact_chart(df, dimension, share_field, impact_field):
    """
    Creates an interactive scatter plot chart using Plotly to compare the size of the segments vs its impact on the metric change

    Parameters:
    -----------
    df : pandas DataFrame
        Data containing share
    dimension : str
        Column name of the parameter/segment variable
    share_field : str
        Column name for the share of segment values
    impact_field : str
        Column name for the impact on the metric change values
    """
    plot_df = df.copy() # cloning df for manipulations
    plot_df['impact_norm'] = plot_df[impact_field]/plot_df[share_field]

    colorscale = px.colors.qualitative.D3
    fig = go.Figure()

    # Add scatter plot with conditional colors
    for i, row in plot_df.iterrows():
        marker_color = colorscale[2] if row['impact_norm'] > 1.5 else (colorscale[3] if row['impact_norm'] < -0.5 else colorscale[0])

        fig.add_trace(go.Scatter(
            x=[row[share_field]],
            y=[row[impact_field]],
            mode='markers',
            marker=dict(
                size=10,  # Larger marker size
                color=marker_color,
                opacity=0.75
            ),
            showlegend=False
        ))


    # Add the x=y reference line
    max_val = max(max(plot_df[share_field].values), max(plot_df[impact_field].values))
    min_val = min(min(plot_df[share_field].values), min(plot_df[impact_field].values))
    line_range = [min_val, max_val]

    fig.add_trace(
        go.Scatter(
            x=line_range,
            y=line_range,
            mode="lines",
            line=dict(dash="dash", color="gray", width=1.5),
        )
    )

    # Add annotations to the chart
    for i, row in plot_df.iterrows():
        fig.add_annotation(
            x=row[share_field],
            y=row[impact_field],
            text=f"{row[dimension]}",
            showarrow=False,
            xanchor='center',
            yanchor='bottom',
            font=dict(size=10),
            yshift=5,
        )

    # Show the plot
    fig.update_layout(
        title="<b>Metric change explained:</b> correlation between segment size and impact on the change",
        xaxis_title="Share of segment before, %",
        yaxis_title="Share in difference, %",
        template="simple_white",
        height=600,
        width=800,
        showlegend = False)
    fig.show()

def create_parallel_coordinates_chart(df, dimension, before_field='before',
                                      after_field='after', impact_norm_field = 'impact_norm', metric_name = '', show_mean = False):
    """
    Creates an interactive parallel coordinates chart using Plotly

    Parameters:
    -----------
    df : pandas DataFrame
        Data containing before/after values per segment
    dimension : str
        Column name of the parameter/segment variable
    before_field : str
        Column name for the 'before' values
    after_field : str
        Column name for the 'after' values
    impact_norm_field : str
        Column name for the normalised impact coefficient values (the ratio of impact to the segment size)
    """
    # Create a copy of the dataframe for manipulation
    plot_df = df.copy()
    plot_df = plot_df.sort_values(impact_norm_field, ascending = False)

    # Define color mapping for params
    dimensions = plot_df[dimension].unique()
    if df.shape[0] <= 10:
        colorscale = px.colors.qualitative.D3
    else:
        colorscale = px.colors.qualitative.Dark24
    colors = [colorscale[i % len(colorscale)] for i in range(len(dimensions))]
    color_map = dict(zip(dimensions, colors))
    plot_df['color'] = plot_df[dimension].map(color_map)

    # Create accents on meaningful changes using line width and opacity
    plot_df['line_width'] = plot_df[impact_norm_field].map(
        lambda x: 4 if (x > 1.5) or (x < -0.5) else 2
    )
    plot_df['opacity'] = plot_df[impact_norm_field].map(
        lambda x: 1 if (x > 1.5) or (x < -0.5) else 0.6
    )

    # Create the figure
    fig = go.Figure()

    if show_mean:
        # Calculate mean values for reference line
        mean_before = plot_df[before_field].mean()
        mean_after = plot_df[after_field].mean()

        # Add mean reference line
        fig.add_trace(
            go.Scatter(
                x=['BEFORE', 'AFTER'],
                y=[mean_before, mean_after],
                mode='lines',
                line=dict(color='gray', width=1.5, dash='dash'),
                opacity=0.7,
                name='Average',
                showlegend=False # remove from legend
            )
        )

    # Add lines for each parameter value
    for idx, row in plot_df.iterrows():
        fig.add_trace(
            go.Scatter(
                x=['BEFORE', 'AFTER'],
                y=[row[before_field], row[after_field]],
                mode='lines+markers',
                line=dict(
                    color=row['color'],
                    width=row['line_width']
                ),
                opacity=row['opacity'],
                name=f"{row[dimension]}",
                marker=dict(size=8),
            )
        )

    # Update layout
    if metric_name != '':
        metric_title = ', ' + metric_name
    else:
        metric_title = ''
    fig.update_layout(
        title= '<b>Metric change explained:</b> before vs after' + metric_title,
        xaxis=dict(
            showgrid=False,
            tickfont=dict(size=12, weight='bold')
        ),
        yaxis=dict(
            title='Value',
            showgrid=True,
            gridcolor='rgba(211, 211, 211, 0.7)',
            gridwidth=1,
            tickformat='.0s'  # Automatically format large numbers (K, M)
        ),
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=-0.25,
            xanchor='center',
            x=0.5,
            font=dict(size=10)
        ),
        plot_bgcolor='white',
        width=800,
        height=600,
        margin=dict(l=60, r=30, t=80, b=120)
    )

    fig.show()

def hex_to_rgba(hex_color, alpha=None):
    hex_color = hex_color.lstrip('#')
    rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
    if alpha is not None:
        return f"rgba({rgb[0]}, {rgb[1]}, {rgb[2]}, {alpha})"
    else:
        return f"rgb({rgb[0]}, {rgb[1]}, {rgb[2]})"

def plot_conversion_waterfall(rate_before, rate_after, df, add_other = True):
    """
    Creates a waterfall chart showing contribution of dimension effects to conversion change.

    Parameters:
    -----------
    rate_before : float
        Initial conversion rate
    rate_after : float
        Final conversion rate
    df : pandas DataFrame
        DataFrame indexed by dimensions, with a column "effect" showing contribution
    """

    plot_df = df.copy()
    plot_df = plot_df[plot_df.effect != 0] # filtering out segments without effect
    plot_df = plot_df.sort_values('effect', ascending = False)
    colorscale = px.colors.qualitative.D3

    # Build the waterfall steps
    dimensions = plot_df.index.tolist()
    effects = plot_df["effect"].tolist()


    base = rate_before
    total_effect = sum(effects)
    remaining_effect = (rate_after - rate_before) - total_effect

    if add_other:
        if abs(remaining_effect) >= 0.1:
            dimensions.append('remaining effects')
            effects.append(remaining_effect)
        total_effect = sum(effects)

    # Add starting point
    x = ["Before"] + dimensions + ["After"]
    y = [rate_before] + effects + [rate_after - (rate_before + total_effect)]

    measures = ["absolute"] + ["relative"] * len(dimensions) + ["total"]

    fig = go.Figure(go.Waterfall(
        name="Conversion",
        orientation="v",
        measure=measures,
        textposition="outside",
        x=x,
        y=y,
        text= ['%.1f%%' % rate_before] + list(map(lambda x: '+%.1f%%' % x if x > 0 else '%.1f%%' % x, effects)) + ['%.1f%%' % rate_after],
        connector={"line": {"color": "gray"}},
        increasing={"marker": {"color": hex_to_rgba(colorscale[2], 0.7)}},
        decreasing={"marker": {"color": hex_to_rgba(colorscale[3], 0.7)}},
        totals={"marker": {"color": hex_to_rgba(colorscale[0], 0.7)}}
    ))

    fig.update_layout(
        title="<b>Conversion rate change explained</b>",
        yaxis_title="conversion, %",
        waterfallgap=0.4,
        margin=dict(t=100)
    )

    plot_df = plot_df.sort_values('effect', ascending = False)
    plot_df['cum_effect'] = plot_df.effect.cumsum()
    max_val = max([rate_before, rate_after, rate_before + plot_df['cum_effect'].max()]) + 10

    # max_val = rate_before + plot_df['cum_effect'].max() + 10  # adjust buffer
    fig.update_yaxes(range=[0, max_val])

    fig.show()

# analysis

def calculate_simple_growth_metrics(stats_df):
    """
    Analyses the change of simple metrics before and after

    Parameters:
    -----------
    df : pandas DataFrame
        Data containing before/after values per segment:
            - dimension is in index;
            - data frame has columns "before" and "after"
    """

    # Calculating overall stats
    before = stats_df.before.sum()
    after = stats_df.after.sum()
    print('Metric change: %.2f -> %.2f (%.2f%%)' % (before, after, 100*(after - before)/before))

    # Estimating impact of each segment
    stats_df['difference'] = stats_df.after - stats_df.before
    stats_df['difference_rate'] = (100*stats_df.difference/stats_df.before).map(lambda x: round(x, 2))
    stats_df['impact'] = (100*stats_df.difference / stats_df.difference.sum()).map(lambda x: round(x, 2))
    stats_df['segment_share_before'] = (100* stats_df.before / stats_df.before.sum()).map(lambda x: round(x, 2))
    stats_df['impact_norm'] = (stats_df.impact/stats_df.segment_share_before).map(lambda x: round(x, 2))
    # stats_df['abs_impact_norm'] = stats_df.impact_norm.map(abs)

    # Sorting based on the impact normed on the size
    # stats_df = stats_df.sort_values('abs_impact_norm', ascending = False)
    # stats_df = stats_df.drop('abs_impact_norm', axis = 1)

    # Creating visualisations
    create_parallel_coordinates_chart(stats_df.reset_index(), stats_df.index.name)
    create_share_vs_impact_chart(stats_df.reset_index(), stats_df.index.name, 'segment_share_before', 'impact')

    return stats_df.sort_values('impact_norm', ascending = False)

def calculate_conversion_effects(df, dimension, numerator_field1, denominator_field1,
                       numerator_field2, denominator_field2):
    """
    Analyses the change of conversion metrics before and after

    Parameters:
    -----------
    df : pandas DataFrame
        Data containing before/after values per segment
    dimension : str
        Column name of the parameter/segment variable
    numerator_field1: str
        Column name of the numerator field before
    denominator_field1: str
        Column name of the denominator field before
    numerator_field2: str
        Column name of the numerator field after
    denominator_field2: str
        Column name of the denominator field after
    """

    cmp_df = df.groupby(dimension)[[numerator_field1, denominator_field1, numerator_field2, denominator_field2]].sum()
    cmp_df = cmp_df.rename(columns = {
        numerator_field1: 'c1',
        numerator_field2: 'c2',
        denominator_field1: 't1',
        denominator_field2: 't2'
    })

    cmp_df['conversion_before'] = cmp_df['c1']/cmp_df['t1']
    cmp_df['conversion_after'] = cmp_df['c2']/cmp_df['t2']

    C1 = cmp_df['c1'].sum()
    T1 = cmp_df['t1'].sum()
    C2 = cmp_df['c2'].sum()
    T2 = cmp_df['t2'].sum()

    print('conversion before = %.2f' % (100*C1/T1))
    print('conversion after = %.2f' % (100*C2/T2))
    print('total conversion change = %.2f' % (100*(C2/T2 - C1/T1)))

    cmp_df['dt'] = (T1*cmp_df.t2 - T2*cmp_df.t1)/(T2 - cmp_df.t2)
    cmp_df['total_effect'] = (C1 - cmp_df.c1 + (cmp_df.t1 + cmp_df.dt)*cmp_df.conversion_after)/(T1 + cmp_df.dt) - C1/T1
    cmp_df['mix_change_effect'] = (C1 + cmp_df.dt*cmp_df.conversion_before)/(T1 + cmp_df.dt) - C1/T1
    cmp_df['conversion_change_effect'] = (cmp_df.t1*cmp_df.c2 - cmp_df.t2*cmp_df.c1)/(T1 * cmp_df.t2)

    for col in ['total_effect', 'mix_change_effect', 'conversion_change_effect', 'conversion_before', 'conversion_after']:
        cmp_df[col] = 100*cmp_df[col]

    cmp_df['conversion_diff'] = cmp_df.conversion_after - cmp_df.conversion_before
    cmp_df['before_segment_share'] = 100*cmp_df.t1/T1
    cmp_df['after_segment_share'] = 100*cmp_df.t2/T2
    for p in ['before_segment_share', 'after_segment_share', 'conversion_before', 'conversion_after', 'conversion_diff',
                     'total_effect', 'mix_change_effect', 'conversion_change_effect']:
        cmp_df[p] = cmp_df[p].map(lambda x: round(x, 2))
    cmp_df['total_effect_share'] = (100*cmp_df.total_effect/(100*(C2/T2 - C1/T1))).map(lambda x: round(x, 2))
    cmp_df['impact_norm'] = cmp_df.total_effect_share/cmp_df.before_segment_share

    # creating visualisations
    create_share_vs_impact_chart(cmp_df.reset_index(), dimension, 'before_segment_share', 'total_effect_share')
    cmp_df = cmp_df[['before_segment_share', 'after_segment_share', 'conversion_before', 'conversion_after', 'conversion_diff',
                     'total_effect', 'mix_change_effect', 'conversion_change_effect', 'total_effect_share', 'impact_norm']]
    cmp_df = cmp_df.sort_values('total_effect', ascending = False)

    # return cmp_df[['total_effect']].rename(columns = {'total_effect': 'effect'})
    plot_conversion_waterfall(
        100*C1/T1, 100*C2/T2, cmp_df[['total_effect']].rename(columns = {'total_effect': 'effect'})
    )

    # putting together effects split by change of mix and conversion change
    tmp = []
    for rec in cmp_df.reset_index().to_dict('records'):
        tmp.append(
            {
                'segment': rec[dimension] + ' - change of mix',
                'effect': rec['mix_change_effect']
            }
        )
        tmp.append(
            {
                'segment': rec[dimension] + ' - conversion change',
                'effect': rec['conversion_change_effect']
            }
        )
    effects_det_df = pd.DataFrame(tmp)
    effects_det_df['effect_abs'] = effects_det_df.effect.map(lambda x: abs(x))
    effects_det_df = effects_det_df.sort_values('effect_abs', ascending = False)
    top_effects_det_df = effects_det_df.head(5).drop('effect_abs', axis = 1)
    plot_conversion_waterfall(
        100*C1/T1, 100*C2/T2, top_effects_det_df.set_index('segment'),
        add_other = True
    )

    create_parallel_coordinates_chart(cmp_df.reset_index(), dimension, before_field='before_segment_share',
                                      after_field='after_segment_share', impact_norm_field = 'impact_norm', metric_name = 'share of segment',
                                     show_mean = False)
    create_parallel_coordinates_chart(cmp_df.reset_index(), dimension, before_field='conversion_before',
                                      after_field='conversion_after', impact_norm_field = 'impact_norm', metric_name = 'conversion',
                                      show_mean = False)
    return cmp_df.rename(columns = {'before_segment_share': 'before_seg_share', 'after_segment_share': 'after_seg_share',
                                   'conversion_before': 'conv_before', 'conversion_after': 'conv_after',
                                   'conversion_diff': 'conv_diff', 'conversion_change_effect': 'conv_change_effect'}).drop('impact_norm', axis = 1)


In [1]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/miptgirl/miptgirl_medium/refs/heads/main/growth_narrative_llm_agent/absolute_metrics_example.csv', sep = '\t')
df.head()

Unnamed: 0,country,maturity,revenue_before,revenue_after_scenario_1,revenue_after_scenario_2,revenue_after_scenario_3
0,UK,new,128324.22,34838.87,127304.87,127304.87
1,UK,existing,353085.05,298405.57,349728.15,349728.15
2,France,new,57901.91,17443.06,57673.79,57673.79
3,France,existing,182802.72,143202.12,50183.27,50183.27
4,Germany,new,48105.83,13678.94,47795.22,47795.22


# Revenue

In [4]:
calculate_simple_growth_metrics(
  df.groupby('country')[['revenue_before', 'revenue_after_scenario_1']].sum()\
    .sort_values('revenue_before', ascending = False).rename(
        columns = {'revenue_after_scenario_1': 'after',
          'revenue_before': 'before'}
    )
)

Metric change: 1731985.21 -> 1107924.43 (-36.03%)


Unnamed: 0_level_0,before,after,difference,difference_rate,impact,segment_share_before,impact_norm
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Spain,96281.86,55061.02,-41220.84,-42.81,6.61,5.56,1.19
Italy,120352.31,71306.71,-49045.6,-40.75,7.86,6.95,1.13
other,632767.39,381711.22,-251056.17,-39.68,40.23,36.53,1.1
Germany,160469.75,105955.86,-54513.89,-33.97,8.74,9.27,0.94
France,240704.63,160645.18,-80059.45,-33.26,12.83,13.9,0.92
UK,481409.27,333244.44,-148164.83,-30.78,23.74,27.8,0.85


In [5]:
calculate_simple_growth_metrics(
    df.groupby('country')[['revenue_before', 'revenue_after_scenario_2']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_2': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1599065.55 (-7.67%)


Unnamed: 0_level_0,before,after,difference,difference_rate,impact,segment_share_before,impact_norm
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
France,240704.63,107857.06,-132847.57,-55.19,99.95,13.9,7.19
UK,481409.27,477033.02,-4376.25,-0.91,3.29,27.8,0.12
Germany,160469.75,159778.76,-690.99,-0.43,0.52,9.27,0.06
Spain,96281.86,96064.77,-217.09,-0.23,0.16,5.56,0.03
other,632767.39,637000.48,4233.09,0.67,-3.18,36.53,-0.09
Italy,120352.31,121331.46,979.15,0.81,-0.74,6.95,-0.11


In [6]:
calculate_simple_growth_metrics(
    df.groupby('country')[['revenue_before', 'revenue_after_scenario_3']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_3': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1665375.27 (-3.85%)


Unnamed: 0_level_0,before,after,difference,difference_rate,impact,segment_share_before,impact_norm
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
France,240704.63,107857.06,-132847.57,-55.19,199.44,13.9,14.35
UK,481409.27,477033.02,-4376.25,-0.91,6.57,27.8,0.24
Germany,160469.75,159778.76,-690.99,-0.43,1.04,9.27,0.11
other,632767.39,637000.48,4233.09,0.67,-6.36,36.53,-0.17
Italy,120352.31,121331.46,979.15,0.81,-1.47,6.95,-0.21
Spain,96281.86,162374.495,66092.635,68.64,-99.22,5.56,-17.85


In [7]:
df['segment'] = df.country + ' - ' + df.maturity

In [8]:
calculate_simple_growth_metrics(
    df.groupby(['segment'])[['revenue_before', 'revenue_after_scenario_1']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_1': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1107924.43 (-36.03%)


Unnamed: 0_level_0,before,after,difference,difference_rate,impact,segment_share_before,impact_norm
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Spain - new,32394.1,7758.9,-24635.2,-76.05,3.95,1.87,2.11
UK - new,128324.22,34838.87,-93485.35,-72.85,14.98,7.41,2.02
Germany - new,48105.83,13678.94,-34426.89,-71.56,5.52,2.78,1.99
France - new,57901.91,17443.06,-40458.85,-69.87,6.48,3.34,1.94
Italy - new,36941.57,11615.29,-25326.28,-68.56,4.06,2.13,1.91
other - new,233958.42,72666.89,-161291.53,-68.94,25.85,13.51,1.91
Italy - existing,83410.74,59691.42,-23719.32,-28.44,3.8,4.82,0.79
Spain - existing,63887.76,47302.12,-16585.64,-25.96,2.66,3.69,0.72
other - existing,398808.97,309044.33,-89764.64,-22.51,14.38,23.03,0.62
France - existing,182802.72,143202.12,-39600.6,-21.66,6.35,10.55,0.6


In [9]:
calculate_simple_growth_metrics(
    df.groupby(['segment'])[['revenue_before', 'revenue_after_scenario_3']].sum()\
        .sort_values('revenue_before', ascending = False).rename(
            columns = {'revenue_after_scenario_3': 'after', 'revenue_before': 'before'}
        )
)

Metric change: 1731985.21 -> 1665375.27 (-3.85%)


Unnamed: 0_level_0,before,after,difference,difference_rate,impact,segment_share_before,impact_norm
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
France - existing,182802.72,50183.27,-132619.45,-72.55,199.1,10.55,18.87
UK - existing,353085.05,349728.15,-3356.9,-0.95,5.04,20.39,0.25
UK - new,128324.22,127304.87,-1019.35,-0.79,1.53,7.41,0.21
Germany - new,48105.83,47795.22,-310.61,-0.65,0.47,2.78,0.17
France - new,57901.91,57673.79,-228.12,-0.39,0.34,3.34,0.1
Germany - existing,112363.92,111983.54,-380.38,-0.34,0.57,6.49,0.09
Italy - new,36941.57,36893.3,-48.27,-0.13,0.07,2.13,0.03
other - new,233958.42,235306.99,1348.57,0.58,-2.02,13.51,-0.15
other - existing,398808.97,401693.49,2884.52,0.72,-4.33,23.03,-0.19
Spain - new,32394.1,32674.67,280.57,0.87,-0.42,1.87,-0.22


# Conversion

In [10]:
conv_df = pd.read_csv('https://raw.githubusercontent.com/miptgirl/miptgirl_medium/refs/heads/main/growth_narrative_llm_agent/conversion_metrics_example.csv', sep = '\t')
conv_df.head()

Unnamed: 0,country,maturity,users_before,converted_users_before,users_after_scenario_1,converted_users_after_scenario_1,users_after_scenario_2,converted_users_after_scenario_2
0,UK,new,8327,863,8733,2592,87318,16756
1,UK,existing,18417,14154,18636,14246,18636,16125
2,France,new,3621,177,3675,943,3675,477
3,France,existing,9750,7693,9633,7652,9633,8675
4,Germany,new,2452,162,2507,640,2507,409


In [11]:
calculate_conversion_effects(
    conv_df, 'country', 'converted_users_before', 'users_before',
    'converted_users_after_scenario_1', 'users_after_scenario_1',
)

conversion before = 55.62
conversion after = 61.41
total conversion change = 5.80


Unnamed: 0_level_0,before_seg_share,after_seg_share,conv_before,conv_after,conv_diff,total_effect,mix_change_effect,conv_change_effect,total_effect_share
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
other,36.54,36.33,59.31,65.18,5.87,2.12,-0.01,2.14,36.58
UK,27.8,28.35,56.15,61.52,5.37,1.53,0.0,1.49,26.4
France,13.9,13.78,58.86,64.59,5.73,0.78,-0.0,0.8,13.46
Italy,6.95,6.64,42.24,50.16,7.92,0.57,0.04,0.55,9.83
Germany,9.26,9.25,46.22,51.6,5.38,0.5,0.0,0.5,8.63
Spain,5.56,5.65,52.92,58.18,5.26,0.29,-0.0,0.29,5.0


In [12]:
conv_df.groupby('country').sum().sum()

Unnamed: 0,0
maturity,newexistingnewexistingnewexistingnewexistingne...
users_before,96215
converted_users_before,53510
users_after_scenario_1,96554
converted_users_after_scenario_1,59295
users_after_scenario_2,175139
converted_users_after_scenario_2,78319


In [13]:
calculate_conversion_effects(
    conv_df, 'country', 'converted_users_before', 'users_before',
    'converted_users_after_scenario_2', 'users_after_scenario_2'
)

conversion before = 55.62
conversion after = 44.72
total conversion change = -10.90


Unnamed: 0_level_0,before_seg_share,after_seg_share,conv_before,conv_after,conv_diff,total_effect,mix_change_effect,conv_change_effect,total_effect_share
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
other,36.54,20.03,59.31,70.03,10.72,1.19,-0.96,3.92,-10.92
Germany,9.26,5.1,46.22,56.81,10.59,0.97,0.43,0.98,-8.9
Italy,6.95,3.66,42.24,50.63,8.39,0.78,0.47,0.58,-7.16
France,13.9,7.6,58.86,68.77,9.91,0.52,-0.24,1.38,-4.77
Spain,5.56,3.11,52.92,62.36,9.45,0.36,0.07,0.53,-3.3
UK,27.8,60.5,56.15,31.03,-25.12,-14.95,0.24,-6.98,137.2


In [15]:
conv_df['segment'] = conv_df.country + ' - ' + conv_df.maturity

In [16]:
calculate_conversion_effects(
    conv_df, 'segment', 'converted_users_before', 'users_before',
    'converted_users_after_scenario_2', 'users_after_scenario_2',

).reset_index()

conversion before = 55.62
conversion after = 44.72
total conversion change = -10.90


Unnamed: 0,segment,before_seg_share,after_seg_share,conv_before,conv_after,conv_diff,total_effect,mix_change_effect,conv_change_effect,total_effect_share
0,other - new,11.52,6.2,5.82,15.86,10.04,3.61,2.99,1.16,-33.13
1,France - new,3.76,2.1,4.89,12.98,8.09,1.05,0.88,0.3,-9.64
2,Italy - new,3.07,1.67,10.21,20.25,10.05,0.82,0.65,0.31,-7.53
3,Germany - new,2.55,1.43,6.61,16.31,9.71,0.7,0.56,0.25,-6.42
4,Spain - new,1.37,0.75,13.01,24.49,11.48,0.36,0.27,0.16,-3.3
5,Germany - existing,6.72,3.67,61.25,72.6,11.35,0.23,-0.18,0.76,-2.11
6,Spain - existing,4.18,2.36,66.02,74.4,8.38,0.0,-0.2,0.35,-0.0
7,Italy - existing,3.88,1.99,67.53,76.06,8.53,-0.06,-0.23,0.33,0.55
8,France - existing,10.13,5.5,78.9,90.06,11.15,-0.59,-1.2,1.13,5.41
9,UK - existing,19.14,10.64,76.85,86.53,9.67,-1.2,-2.23,1.85,11.01
