In [None]:
import numpy as np
import pandas as pd
import plotly.io as pio

pd.options.plotting.backend = "plotly"
pio.templates.default = "plotly_dark+presentation"

# Contingency

In [None]:
df = pd.DataFrame(
    {
        "x": ["c", "a", "b", "a", "b", "b"],
        "y": [
            "R",
            "S",
            "S",
            "T",
            "R",
            "S",
        ],
    }
)
print(df.to_markdown())

In [None]:
print(df["x"].value_counts().sort_index().to_markdown())

In [None]:
print(df["y"].value_counts().sort_index().to_markdown())

In [None]:
print(pd.crosstab(df["x"], df["y"]).to_markdown())

In [None]:
fig_discrete = df.plot.scatter(x=df["x"], y=df["y"])
fig_discrete.write_image("contingency/screencast/public/fig_discrete.svg")
fig_discrete

# Continuous data

In [None]:
n_draws = [100, 10_000]
dfs = {
    n: pd.DataFrame(
        np.random.multivariate_normal([0, 0], [[1, 0.3], [0.3, 1]], n),
        columns=["x", "y"],
    )
    for n in n_draws
}

Again, the `value_counts` method has the same behaviour as above. Note, however, that it
is not very helpful to summarize the data. In this case, only one value occurs twice,
all others occur only once. Hence, the Series with the value counts has only one element
less than the original Series, which hardly counts as a summary.

In [None]:
figs = {n: dfs[n].plot.scatter(x="x", y="y") for n in n_draws}

In [None]:
[
    fig.update_layout(
        xaxis_range=[-5, 5],
        yaxis_range=[-5, 5],
    ).write_image(f"contingency/screencast/public/fig_continuous_{n}.svg")
    for n, fig in figs.items()
]

# Discrete, ordered data

In [None]:
discrete = pd.Series([1, 1, 2, 5, 1, 5], name="dis")
discrete

The `value_counts` method has the same behaviour as above.

In [None]:
discrete.value_counts()

The plots now look like for continous data — there is no gap between the bars and the x-axis is continuous. Whether that makes sense for your application is up to you to decide.

In [None]:
discrete.plot.hist()

If you only want to see all unique outcomes, a simple way to get that into the visualisation is to increase the number of bins:

In [None]:
discrete.plot.hist(nbins=5)

We can achieve a similar result to the categorical data by making a bar chart out value
counts. Note that this keeps empty space for 3 and 4.

In [None]:
discrete.value_counts().plot.bar()

Again, we can swap the axes as this is the most readable way for such data.

In [None]:
discrete.value_counts().plot.bar(orientation="h")

Alternatively

In [None]:
discrete.value_counts().plot.barh()