# Concentration measures

In [None]:
import copy

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from scipy import stats

pio.templates.default = "plotly_dark+presentation"

pd.options.plotting.backend = "plotly"

default_colors = px.colors.qualitative.Plotly

In [None]:
df = (
    pd.DataFrame(
        {
            "A": [1, 1, 4],
            "B": [2 - 3**0.5, 2, 2 + 3**0.5],
        }
    )
    / 6
)
df.loc["mean"] = df.mean()
df.loc["var"] = df.loc[:2].var()
print(df.round(2).to_markdown())

In [None]:
rng = np.random.default_rng(seed=9459974)
n = 100_000
skew = pd.Series(data=stats.skewnorm.rvs(a=5, size=n, random_state=rng))
skew = skew - skew.min()
fig = skew.plot.hist(nbins=1000)
fig.add_vline(x=skew.quantile(0.25), line_width=5, line_color="#FFB3A1")
fig.add_vline(x=skew.quantile(0.9), line_width=5, line_color="#D0D6FF")
fig.update_layout(showlegend=False, width=1000, height=600)
fig.write_image("concentration_quantile/screencast/public/initial.svg")
fig

In [None]:
fig_sum = copy.deepcopy(fig)
fig_sum.add_annotation(
    x=0.45,
    y=650,
    text=(
        "Sum of values <br>at the 25th <br>percentile <br>and below: <br>"
        f"{int(skew[skew <= skew.quantile(0.25)].sum().round(-3))}"
    ),
    align="left",
    showarrow=False,
    font={"color": "#FFB3A1", "size": 20},
)
fig_sum.add_annotation(
    x=3.5,
    y=503,
    text=(
        "Sum of values at the<br>90th percentile and above: "
        f"{int(skew[skew >= skew.quantile(0.9)].sum().round(-3))}"
    ),
    align="left",
    showarrow=False,
    font={"color": "#D0D6FF", "size": 20},
)
fig_sum.write_image("concentration_quantile/screencast/public/sum.svg")
fig_sum.show()

In [None]:
fig_sum = copy.deepcopy(fig)
fig_sum.add_annotation(
    x=0.45,
    y=650,
    text=(
        f"{skew[skew <= skew.quantile(0.25)].sum() / skew.sum():.0%} "
        "of total<br>concentrated<br>at the 25th<br>percentile<br>and below"
    ),
    align="left",
    showarrow=False,
    font={"color": "#FFB3A1", "size": 20},
)
fig_sum.add_annotation(
    x=3.5,
    y=503,
    text=(
        f"{skew[skew >= skew.quantile(0.9)].sum() / skew.sum():.0%} "
        "of total concentrated at the<br>90th percentile and above"
    ),
    align="left",
    showarrow=False,
    font={"color": "#D0D6FF", "size": 20},
)
fig_sum.write_image("concentration_quantile/screencast/public/frac.svg")
fig_sum.show()

In [None]:
tiny = pd.DataFrame({"A": [1, 5, 6], "B": [2, 2, 8], "C": [4, 4, 4]}, index=[1, 2, 3])
print(tiny.to_markdown(index=False))

In [None]:
tiny_rel = tiny / tiny.sum()
print(tiny_rel.round(2).to_markdown(index=False))

In [None]:
tiny_cum_sum = tiny.cumsum()
print(tiny_cum_sum.to_markdown(index=False))

In [None]:
tiny_dist = (
    tiny_cum_sum.reset_index() / tiny_cum_sum.reset_index().iloc[-1]
).set_index("index")
print(tiny_dist.round(2).to_markdown(index=False))
tiny_dist_stacked = (
    tiny_dist.stack(future_stack=True)
    .reset_index()
    .rename(
        columns={
            "index": "Headcount share",
            "level_1": "Series",
            0: "Value share",
        }
    )
)

In [None]:
def gini(rel):
    n = len(rel)
    return (2 * sum([(i + 1) * rel.iloc[i] for i in range(n)]) - (n + 1)) / n

In [None]:
tiny_gini = pd.DataFrame({c: [gini(tiny_rel[c])] for c in tiny_rel.columns})
print(tiny_gini.round(2).to_markdown(index=False))

In [None]:
fig_only_a_only_data = tiny_dist_stacked.plot.scatter(
    x="Headcount share", y="Value share", color="Series"
)
ticktext = ["0", "1/3", "2/3", "1"]
axis_props = {
    "range": [-0.1, 1.1],
    "tickvals": [eval(t) for t in ticktext],
    "ticktext": ticktext,
}
fig_only_a_only_data.update_layout(
    xaxis=axis_props,
    yaxis=axis_props,
)
fig_only_a_only_data.data[1].visible = "legendonly"
fig_only_a_only_data.data[2].visible = "legendonly"

for subchapter in ("lorenz_curves",):
    fig_only_a_only_data.write_image(
        f"{subchapter}/screencast/public/data_points_only_a-line_none.svg"
    )
fig_only_a_only_data

In [None]:
fig_only_a_with_first_segment = copy.deepcopy(fig_only_a_only_data)
fig_only_a_with_first_segment.add_trace(
    go.Scatter(
        x=[0, 1 / 3],
        y=[0, 1 / 12],
        mode="lines",
        showlegend=False,
        line={"color": default_colors[0]},
    )
)
for subchapter in ("lorenz_curves",):
    fig_only_a_with_first_segment.write_image(
        f"{subchapter}/screencast/public/data_points_only_a-line_first_segment.svg"
    )
fig_only_a_with_first_segment.show()

In [None]:
fig_only_a_with_first_two_segments = copy.deepcopy(fig_only_a_with_first_segment)
fig_only_a_with_first_two_segments.add_trace(
    go.Scatter(
        x=[1 / 3, 2 / 3],
        y=[1 / 12, 0.5],
        mode="lines",
        showlegend=False,
        line={"color": default_colors[0]},
    )
)
for subchapter in ("lorenz_curves",):
    fig_only_a_with_first_two_segments.write_image(
        f"{subchapter}/screencast/public/data_points_only_a-line_first_two_segments.svg"
    )
fig_only_a_with_first_two_segments.show()

In [None]:
fig_only_a_with_all_segments = copy.deepcopy(fig_only_a_with_first_two_segments)
fig_only_a_with_all_segments.add_trace(
    go.Scatter(
        x=[2 / 3, 1],
        y=[0.5, 1],
        mode="lines",
        showlegend=False,
        line={"color": default_colors[0]},
    )
)
for subchapter in ("lorenz_curves", "gini"):
    fig_only_a_with_all_segments.write_image(
        f"{subchapter}/screencast/public/data_points_only_a-line_all_segments.svg"
    )
fig_only_a_with_all_segments.show()

In [None]:
fig_only_a_b = copy.deepcopy(fig_only_a_with_all_segments)
fig_only_a_b.data[1].visible = True
fig_only_a_b.add_traces(
    [
        go.Scatter(
            x=[0, 1 / 3],
            y=[0, 1 / 6],
            mode="lines",
            showlegend=False,
            line={"color": default_colors[1]},
        ),
        go.Scatter(
            x=[1 / 3, 2 / 3],
            y=[1 / 6, 1 / 3],
            mode="lines",
            showlegend=False,
            line={"color": default_colors[1]},
        ),
        go.Scatter(
            x=[2 / 3, 1],
            y=[1 / 3, 1],
            mode="lines",
            showlegend=False,
            line={"color": default_colors[1]},
        ),
    ]
)
for subchapter in ("lorenz_curves",):
    fig_only_a_b.write_image(
        f"{subchapter}/screencast/public/data_points_a_b-lines_a_b.svg"
    )
fig_only_a_b.show()

In [None]:
fig_all = copy.deepcopy(fig_only_a_b)
fig_all.data[2].visible = True
fig_all.add_trace(
    go.Scatter(
        x=[0, 1],
        y=[0, 1],
        mode="lines",
        showlegend=False,
        line={"color": default_colors[2]},
    )
)
for subchapter in ("lorenz_curves", "gini"):
    fig_all.write_image(f"{subchapter}/screencast/public/data_points_all-lines_all.svg")
fig_all.show()

In [None]:
fig_shade_a = copy.deepcopy(fig_only_a_with_all_segments)
fig_shade_a.add_traces(
    [
        go.Scatter(
            x=[0, 1 / 3, 1 / 3, 0],
            y=[0, 1 / 12, 1 / 3, 0],
            mode="lines",
            showlegend=False,
            line={"color": "rgba(99, 110, 250, 0)"},
            fill="toself",
            fillcolor="rgba(99, 110, 250, 0.5)",
        ),
        go.Scatter(
            x=[1 / 3, 2 / 3, 2 / 3, 1 / 3],
            y=[1 / 12, 0.5, 2 / 3, 1 / 3],
            mode="lines",
            showlegend=False,
            line={"color": "rgba(99, 110, 250, 0)"},
            fill="toself",
            fillcolor="rgba(99, 110, 250, 0.5)",
        ),
        go.Scatter(
            x=[2 / 3, 1, 1, 2 / 3],
            y=[0.5, 1, 1, 2 / 3],
            mode="lines",
            showlegend=False,
            line={"color": "rgba(99, 110, 250, 0)"},
            fill="toself",
            fillcolor="rgba(99, 110, 250, 0.5)",
        ),
    ]
)
for subchapter in ("gini",):
    fig_shade_a.write_image(
        f"{subchapter}/screencast/public/data_points_all-shade_a.svg"
    )
fig_shade_a.show()

In [None]:
default_colors[5]