In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

rng = np.random.default_rng(19454)

pd.options.plotting.backend = "plotly"
pio.templates.default = "plotly_dark+presentation"

# Contingency

In [None]:
df = pd.DataFrame(
    {
        "x": ["c", "a", "b", "a", "b", "b"],
        "y": [
            "R",
            "S",
            "S",
            "T",
            "R",
            "S",
        ],
    }
)
print(df.to_markdown())

In [None]:
print(df["x"].value_counts().sort_index().to_markdown())

In [None]:
print(df["y"].value_counts().sort_index().to_markdown())

In [None]:
print(pd.crosstab(df["x"], df["y"]).to_markdown())

In [None]:
fig_discrete = df.plot.scatter(x=df["x"], y=df["y"])
fig_discrete.write_image("strategies/screencast/public/fig_discrete.svg")
fig_discrete

In [None]:
fig_heatmap_small = px.imshow(pd.crosstab(df["x"], df["y"]))
fig_heatmap_small.write_image("strategies/screencast/public/fig_heatmap_small.svg")
fig_heatmap_small.show()

In [None]:
# Initialize the DataFrame
columns = ["poor", "fair", "good", "very good", "excellent"]
index = range(11)

# Create a base array with desired peak and decay
data = np.zeros((11, 5), dtype=int)

# Set the peak value
data[8, 3] = 5000  # "very good", index 8

# Apply decay in both dimensions
for i in range(11):
    for j in range(5):
        # Calculate decay based on distance from the peak (8 for index, 3 for column)
        distance_from_peak = abs(i - 8) + abs(j - 3)
        data[i, j] = max(0, 5000 - (distance_from_peak * 500))

# Fill in the rest of the DataFrame with random counts for spots that are not zero
for i in range(11):
    for j in range(5):
        data[i, j] = max(rng.integers(1, 100), data[i, j] + rng.integers(0, 1000))

# Create DataFrame
df = pd.DataFrame(data, columns=columns, index=index)


print(df.to_markdown())

In [None]:
fig_heatmap_large = px.imshow(df)
fig_heatmap_large.update_yaxes(autorange=True)
fig_heatmap_large.write_image("strategies/screencast/public/fig_heatmap_large.svg")
fig_heatmap_large.show()

In [None]:
fig_heatmap_large_w_labels = px.imshow(df, text_auto=True)
fig_heatmap_large_w_labels.update_yaxes(autorange=True)
fig_heatmap_large_w_labels.write_image(
    "strategies/screencast/public/fig_heatmap_large_w_labels.svg"
)
fig_heatmap_large_w_labels.show()

# Continuous data

In [None]:
n_draws = [100, 10_000]
dfs = {
    n: pd.DataFrame(
        rng.multivariate_normal([0, 0], [[1, 0.3], [0.3, 1]], n),
        columns=["x", "y"],
    )
    for n in n_draws
}

In [None]:
print(dfs[100].head(3).to_markdown())
print(*(["|"] * 4), sep=" ... ")
print("\n".join(dfs[100].tail(3).to_markdown().split("\n")[2:]))

In [None]:
figs = {n: dfs[n].plot.scatter(x="x", y="y") for n in n_draws}

In [None]:
[
    fig.update_layout(
        xaxis_range=[-5, 5],
        yaxis_range=[-5, 5],
    ).write_image(f"strategies/screencast/public/fig_continuous_{n}.svg")
    for n, fig in figs.items()
]

# Discrete + continuos 

In [None]:
n = [1_000]
df = pd.DataFrame(
    rng.multivariate_normal(
        [0, 0.3, -0.2], [[1, 0.3, -0.2], [0.3, 1, 0.7], [-0.2, 0.7, 1]], n
    ),
    columns=["a", "b", "c"],
).melt(var_name="x", value_name="y")

In [None]:
fig_dc = df.plot.scatter(x="x", y="y")
fig_dc.write_image("strategies/screencast/public/fig_dc.svg")
fig_dc

In [None]:
fig_box = df.plot.box(x="x", y="y")
fig_box.write_image("strategies/screencast/public/fig_box.svg")
fig_box

In [None]:
fig_violin = px.violin(df, x="x", y="y")
fig_violin.write_image("strategies/screencast/public/fig_violin.svg")
fig_violin

# Covariance|

In [None]:
tiny = pd.DataFrame({"A": [2, 4, 6], "B": [1, 3, 8]})

In [None]:
print(tiny.to_markdown(index=False))

In [None]:
fig_orig = tiny.plot.scatter(x="A", y="B")
fig_orig.write_image("covariance/screencast/public/fig_orig.svg")
fig_orig

In [None]:
tiny["A - 4"] = tiny["A"] - 4
tiny["B - 4"] = tiny["B"] - 4
tiny["(A - 4)(B - 4)"] = tiny["A - 4"] * tiny["B - 4"]
print(tiny.to_markdown(index=False))

In [None]:
tiny_exchanged = tiny[["A", "B"]].copy()
tiny_exchanged["B"] = [8, 3, 1]
print(tiny_exchanged.to_markdown(index=False))

In [None]:
fig_exchanged = tiny_exchanged.plot.scatter(x="A", y="B")
fig_exchanged.write_image("covariance/screencast/public/fig_exchanged.svg")
fig_exchanged

In [None]:
tiny_exchanged["A - 4"] = tiny_exchanged["A"] - 4
tiny_exchanged["B - 4"] = tiny_exchanged["B"] - 4
tiny_exchanged["(A - 4)(B - 4)"] = tiny_exchanged["A - 4"] * tiny_exchanged["B - 4"]
print(tiny_exchanged.to_markdown(index=False))

# Correlations

In [None]:
c = 30
np.array([[0.1**2, c / 100], [c / 100, 10**2]]).shape
np.linalg.cholesky(np.array([[0.1**2, c / 100], [c / 100, 10**2]]))

In [None]:
corrs = [90, 30, 0, -70, -99]
chols = {
    c: np.linalg.cholesky(np.array([[0.1**2, c / 100], [c / 100, 10**2]]))
    for c in corrs
}
means = np.array([1.05, -10])
dfs = {
    c: pd.DataFrame(
        means + (chols[c] @ rng.multivariate_normal([0, 0], [[1, 0], [0, 1]], 100).T).T,
        columns=["x", "y"],
    )
    for c in corrs
}

In [None]:
for c, df in dfs.items():
    fig = df.plot.scatter(x="x", y="y")
    fig.update_layout(
        xaxis_range=[0.71, 1.39],
        yaxis_range=[-44, 24],
        showlegend=False,
        title=f"Correlation: {c/100:.2g}",
    )
    fig.write_image(f"correlation/screencast/public/fig_{c}.svg")
    fig.show()
    if c == 30:
        fig.add_trace(
            go.Scatter(
                x=[1.05],
                y=[-10],
                mode="markers",
                marker={"size": 20, "color": "red"},
            )
        )
        fig.write_image(f"correlation/screencast/public/fig_dot_{c}.svg")
        fig.show()

In [None]:
dfs[c].describe()

In [None]:
c = 30
df = dfs[c].copy()
df -= means
df

In [None]:
fig_demeaned = df.plot.scatter(x="x", y="y")
fig_demeaned.add_trace(
    go.Scatter(
        x=[0],
        y=[0],
        mode="markers",
        marker={"size": 20, "color": "red"},
    )
)
fig_demeaned.update_layout(
    xaxis_range=[-0.34, 0.34],
    yaxis_range=[-34, 34],
    showlegend=False,
    title=f"Correlation: {c/100:.1f}, demeaned x and y",
)
fig_demeaned.write_image("correlation/screencast/public/fig_demeaned.svg")
fig_demeaned.show()

In [None]:
df /= pd.Series({"x": 0.1, "y": 10})
fig_standardised = df.plot.scatter(x="x", y="y")
fig_standardised.add_trace(
    go.Scatter(
        x=[0],
        y=[0],
        mode="markers",
        marker={"size": 20, "color": "red"},
    )
)
fig_standardised.update_layout(
    xaxis_range=[-3.4, 3.4],
    yaxis_range=[-3.4, 3.4],
    showlegend=False,
    title=f"Correlation: {c/100:.1f}, demeaned/unit variance x and y",
)
fig_standardised.write_image("correlation/screencast/public/fig_standardised.svg")
fig_standardised.show()

In [None]:
np.diag(chols[30])