# Example from Finanz- und Sozialpolitik



In [None]:
import copy

import numpy as np
import pandas as pd
import plotly.io as pio
from scipy import stats

pio.templates.default = "plotly_dark+presentation"

pd.options.plotting.backend = "plotly"

# Abstract distributions

In [None]:
rng = np.random.default_rng(seed=9459974)
n = 100_000
df = pd.DataFrame(
    data={
        "Small": pd.Series(stats.norm.rvs(size=n, scale=1.0, random_state=rng)),
        "Large": pd.Series(stats.norm.rvs(size=n, scale=1.5, random_state=rng)),
    }
).melt(var_name="Dispersion", value_name="Value")
df_grouped = df.groupby("Dispersion")["Value"]
df_grouped_description = df_grouped.describe()

In [None]:
fig_bare = df.plot.hist(nbins=75, color="Dispersion", barmode="group")
fig_bare.update_layout(
    title="Distributions",
    xaxis_title="",
    yaxis_title="",
)
fig_bare.update_xaxes(
    tickvals=[],
    range=[df["Value"].min() - 0.05, df["Value"].max() + 0.05],
)
fig_bare.update_yaxes(
    tickvals=[],
    range=[0, 9000],
)
for subchapter in "absolute", "squared":
    fig_bare.write_image(f"{subchapter}/screencast/public/bare.svg")
fig_bare.show()

In [None]:
fig_only_small = copy.deepcopy(fig_bare)
fig_only_small.data[1].visible = "legendonly"
fig_only_small.update_layout(
    xaxis_range=fig_bare.layout.xaxis.range,
    yaxis_range=fig_bare.layout.yaxis.range,
)
for subchapter in "absolute", "squared":
    fig_only_small.write_image(f"{subchapter}/screencast/public/only_small.svg")
fig_only_small.show()

In [None]:
fig_min_max_small_only = copy.deepcopy(fig_only_small)
fig_min_max_small_only.update_layout(title="Distributions with Minimum and Maximum")

fig_min_max_small_only.add_vline(
    x=df_grouped_description.loc["Small", "min"], line_width=5, line_color="#D0D6FF"
)
fig_min_max_small_only.add_vline(
    x=df_grouped_description.loc["Small", "max"], line_width=5, line_color="#D0D6FF"
)

fig_min_max_small_only.write_image("absolute/screencast/public/min_max_small_only.svg")
fig_min_max_small_only

In [None]:
fig_min_max = copy.deepcopy(fig_bare)
fig_min_max.update_layout(title="Distributions with Minimum and Maximum")

fig_min_max.add_vline(
    x=df_grouped_description.loc["Small", "min"], line_width=5, line_color="#D0D6FF"
)
fig_min_max.add_vline(
    x=df_grouped_description.loc["Small", "max"], line_width=5, line_color="#D0D6FF"
)

fig_min_max.add_vline(
    x=df_grouped_description.loc["Large", "min"], line_width=5, line_color="#FFB3A1"
)
fig_min_max.add_vline(
    x=df_grouped_description.loc["Large", "max"], line_width=5, line_color="#FFB3A1"
)
fig_min_max.write_image("absolute/screencast/public/min_max.svg")

fig_min_max.show()

In [None]:
fig_quartiles = copy.deepcopy(fig_bare)
fig_quartiles.update_layout(title="Distributions with 1st and 3rd quartiles")

fig_quartiles.add_vline(
    x=df_grouped_description.loc["Small", "25%"], line_width=5, line_color="#D0D6FF"
)
fig_quartiles.add_vline(
    x=df_grouped_description.loc["Small", "75%"], line_width=5, line_color="#D0D6FF"
)

fig_quartiles.add_vline(
    x=df_grouped_description.loc["Large", "25%"], line_width=5, line_color="#FFB3A1"
)
fig_quartiles.add_vline(
    x=df_grouped_description.loc["Large", "75%"], line_width=5, line_color="#FFB3A1"
)
fig_quartiles.write_image("absolute/screencast/public/quartiles.svg")

fig_quartiles.show()

In [None]:
fig_deciles = copy.deepcopy(fig_bare)
fig_deciles.update_layout(title="Distributions with 10th and 90th Percentiles")

fig_deciles.add_vline(
    x=df_grouped.quantile(0.1).loc["Small"], line_width=5, line_color="#D0D6FF"
)
fig_deciles.add_vline(
    x=df_grouped.quantile(0.9).loc["Small"], line_width=5, line_color="#D0D6FF"
)

fig_deciles.add_vline(
    x=df_grouped.quantile(0.1).loc["Large"], line_width=5, line_color="#FFB3A1"
)
fig_deciles.add_vline(
    x=df_grouped.quantile(0.9).loc["Large"], line_width=5, line_color="#FFB3A1"
)
fig_deciles.write_image("absolute/screencast/public/deciles.svg")

fig_deciles.show()

In [None]:
df_skew = pd.DataFrame(
    {
        "Symmetric": stats.skewnorm.rvs(a=0, size=n, random_state=rng),
        "Right-skewed": stats.skewnorm.rvs(a=5, size=n, random_state=rng),
        "Left-skewed": stats.skewnorm.rvs(a=-5, size=n, random_state=rng),
    }
)
for c in df_skew.columns:
    df_skew[c] = (df_skew[c] - df_skew[c].mean()) / df_skew[c].std()
df_skew = df_skew.melt(var_name="Skewness", value_name="Value")
df_skew_grouped = df_skew.groupby("Skewness")["Value"]
df_skew_grouped_description = df_skew_grouped.describe()

In [None]:
df_skew_grouped_description

In [None]:
df_skew

In [None]:
fig_bare = df_skew.plot.hist(nbins=75, color="Skewness", barmode="group")
fig_bare.update_layout(
    title="Distributions",
    xaxis_title="",
    yaxis_title="",
)
fig_bare.update_xaxes(
    tickvals=[],
    range=[
        df_skew_grouped.quantile(0.001)["Left-skewed"],
        df_skew_grouped.quantile(0.999)["Right-skewed"],
    ],
)
fig_bare.update_yaxes(
    tickvals=[],
    range=[0, 9000],
)
fig_bare.write_image("skewness/screencast/public/bare.svg")
fig_bare.show()

In [None]:
fig_only_sym = copy.deepcopy(fig_bare)
fig_only_sym.data[1].visible = "legendonly"
fig_only_sym.data[2].visible = "legendonly"
fig_only_sym.update_layout(
    xaxis_range=fig_bare.layout.xaxis.range,
    yaxis_range=fig_bare.layout.yaxis.range,
)
fig_only_sym.write_image("skewness/screencast/public/only_sym.svg")
fig_only_sym.show()

In [None]:
fig_sym_right = copy.deepcopy(fig_bare)
fig_sym_right.data[2].visible = "legendonly"
fig_sym_right.update_layout(
    xaxis_range=fig_bare.layout.xaxis.range,
    yaxis_range=fig_bare.layout.yaxis.range,
)
fig_sym_right.write_image("skewness/screencast/public/sym_right.svg")
fig_sym_right.show()

In [None]:
fig_sym_left = copy.deepcopy(fig_bare)
fig_sym_left.data[1].visible = "legendonly"
fig_sym_left.update_layout(
    xaxis_range=fig_bare.layout.xaxis.range,
    yaxis_range=fig_bare.layout.yaxis.range,
)
fig_sym_left.write_image("skewness/screencast/public/sym_left.svg")
fig_sym_left.show()

In [None]:
tiny = pd.DataFrame({"A": [2, 4, 6], "B": [1, 3, 8]})
tiny_mad = (tiny - tiny.median()).abs()
tiny_mad = tiny_mad.rename(columns={"A": "|A - 4|", "B": "|B - 3|"})
tiny_mad = pd.concat([tiny_mad, tiny_mad.mean().to_frame(name=3).T]).round(2)
tiny_out = tiny.astype(str).join(tiny_mad, how="outer")
tiny_out = tiny_out[["A", "|A - 4|", "B", "|B - 3|"]].fillna("MAD")
print(tiny_out.to_markdown(index=False))

In [None]:
tiny_for_df_correction = pd.concat([tiny.copy(), tiny.mean().to_frame(name="Mean").T])
print(tiny_for_df_correction.astype(int).to_markdown())

In [None]:
tiny_sq = (tiny - tiny.mean()) ** 2
tiny_sq = tiny_sq.rename(columns={"A": "(A - 4)²", "B": "(B - 4)²"})
tiny_sq = pd.concat(
    [
        tiny_sq,
        tiny_sq.sum().to_frame(name=3).T,
        (tiny_sq.sum() / (len(tiny_sq) - 1)).to_frame(name=4).T,
        ((tiny_sq.sum() / (len(tiny_sq) - 1)) ** 0.5).to_frame(name=5).T,
    ]
).round(2)
tiny_out = tiny.astype(str).join(tiny_sq, how="outer")
tiny_out = tiny_out[["A", "(A - 4)²", "B", "(B - 4)²"]].fillna("SSD")
tiny_out.loc[4:5, "A"] = ["Variance", "Std. Dev."]
tiny_out.loc[4:5, "B"] = ["Variance", "Std. Dev."]
print(tiny_out.round(1).to_markdown(index=False))

In [None]:
tiny.std()

In [None]:
tiny_cub = (tiny - tiny.mean()) ** 3
tiny_cub = pd.concat(
    [
        tiny_cub,
        tiny_cub.sum().to_frame(name=3).T,
        (3 / 2) * (tiny_cub.sum() / tiny.mean() ** 3).to_frame(name=4).T,
    ]
).round(2)
tiny_cub = tiny_cub.rename(columns={"A": "(A - 4)³", "B": "(B - 4)³"})
tiny_out = tiny.astype(str).join(tiny_cub, how="outer")
tiny_out = tiny_out[["A", "(A - 4)³", "B", "(B - 4)³"]].fillna("SCD")
tiny_out.loc[4, ("A", "B")] = "Skewness"
print(tiny_out.round(2).to_markdown(index=False))