In [1]:
import pandas as pd
import numpy as np
import random

# Set seed for reproducibility
np.random.seed(42)

# Define possible values for categorical columns
countries = ["UK", "France", "Germany", "Spain", "Italy", "Netherlands", "Belgium", "Switzerland"]
maturity_types = ["new", "existing"]

# Generate 1000 rows of data
n_rows = 1000
data = {
    "country": np.random.choice(countries, size=n_rows),
    "maturity": np.random.choice(maturity_types, size=n_rows),
    "revenue_before": np.random.uniform(10000, 200000, n_rows).round(2),
}

# Generate revenue after scenarios with some correlation to revenue_before
data["revenue_after_scenario_1"] = [
    value * np.random.uniform(0.8, 1.2) for value in data["revenue_before"]
]

data["revenue_after_scenario_2"] = [
    value * np.random.uniform(0.9, 1.3) for value in data["revenue_before"]
]

data["revenue_after_scenario_3"] = [
    value * np.random.uniform(0.85, 1.25) for value in data["revenue_before"]
]

# Round all revenue values to 2 decimal places
for col in ["revenue_before", "revenue_after_scenario_1", "revenue_after_scenario_2", "revenue_after_scenario_3"]:
    data[col] = [round(val, 2) for val in data[col]]

# Create the DataFrame
df = pd.DataFrame(data)

# Save to CSV with tab separator
df.to_csv('absolute_metrics_example.csv', sep='\t', index=False)

# Display first few rows
df.head()

Unnamed: 0,country,maturity,revenue_before,revenue_after_scenario_1,revenue_after_scenario_2,revenue_after_scenario_3
0,Belgium,existing,45175.26,40869.26,52813.55,48735.0
1,Spain,new,112961.18,101528.55,137662.69,132410.04
2,Italy,new,175859.71,204437.24,175892.62,202953.43
3,Belgium,new,149122.73,134183.39,171483.63,135934.31
4,Germany,new,163246.62,148355.25,184256.2,148505.42


In [None]:
def calculate_simple_growth_metrics(stats_df):
  # Calculating overall stats
  before = stats_df.before.sum()
  after = stats_df.after.sum()
  print('Metric change: %.2f -> %.2f (%.2f%%)' % (before, after, 100*(after - before)/before))

  # Estimating impact of each segment
  stats_df['difference'] = stats_df.after - stats_df.before
  stats_df['difference_rate'] = (100*stats_df.difference/stats_df.before)\
    .map(lambda x: round(x, 2))
  stats_df['impact'] = (100*stats_df.difference / stats_df.difference.sum())\
    .map(lambda x: round(x, 2))
  stats_df['segment_share_before'] = (100* stats_df.before / stats_df.before.sum())\
    .map(lambda x: round(x, 2))
  stats_df['impact_norm'] = (stats_df.impact/stats_df.segment_share_before)\
    .map(lambda x: round(x, 2))

  # Creating visualisations
  create_parallel_coordinates_chart(stats_df.reset_index(), stats_df.index.name)
  create_share_vs_impact_chart(stats_df.reset_index(), stats_df.index.name, 'segment_share_before', 'impact')
  
  return stats_df.sort_values('impact_norm', ascending = False)