<p style="float:right"> <img src="assets/orange.png" alt="Orange logo" width="40" /> <img src="assets/ulb.jpg" alt="ULB logo" width="40" /> <img src="assets/mlg.png" alt="MLG logo" width="160" /> <img src="assets/innoviris.jpg" alt="Innoviris logo" width="200" /></p>

**_Notebook for the AppliedPhD project Machu-Picchu written by Théo Verhelst_**<br/>
_Supervisors at Orange: Denis Mercier, Jeevan Shrestha_<br/>
_Academic supervision: Gianluca Bontempi (ULB MLG)_
# Simulating uplift modeling with a Dirichlet distribution

First, some imports

In [None]:
%load_ext autoreload
%autoreload 2
from time import time
from datetime import datetime
from math import ceil
import pickle
import numpy as np
from matplotlib import pyplot as plt
from scipy import stats as st
from scipy.special import digamma
import pandas as pd
from pandas.plotting import scatter_matrix
import seaborn as sns
from joblib import Parallel, delayed
from tqdm.autonotebook import trange
from sklearn.model_selection import ParameterGrid

from functions.simulation_functions import simulate_uplift_dirichlet
from functions.eval_measures import cf_profit_curve

plt.rcParams["font.family"] = "serif"
plt.rcParams["mathtext.fontset"] = "dejavuserif"

This function is used to format plot titles

In [None]:
def format_number(x, precision):
    res = np.format_float_positional(x, precision=precision, trim="-", fractional=True)
    if res.endswith("."):
        res = res[:-1]
    return res

## Testing the code on a single simulation

As a preliminary step, we run the simulation once with fixed parameters.

In [None]:
mu = np.array([0.9, 0.05, 0.03, 0.02])
S_0 = mu[1] + mu[3]
S_1 = mu[2] + mu[3]
A = 80
a = A * mu
size = 100000
var_p = 0.003
var_u = 0.01
n_p_0 = 60
n_u_0 = 20
n_u_1 = 20
data = simulate_uplift_dirichlet(a, size, n_p_0, n_u_0, n_u_1)

prior_entropy = -np.sum(mu * np.log2(mu))
posterior_entropy = digamma(A + 1) - np.sum(digamma(a + 1) * a) / A
mutual_info = prior_entropy - posterior_entropy

Let's compute the mutual information between the outcomes and the emulated features.

In [None]:
I_0 = st.entropy([S_0, 1 - S_0])
I_0_X = np.mean(st.entropy(np.array([data.S_0, 1 - data.S_0]), axis=0))
I_1 = st.entropy([S_1, 1 - S_1])
I_1_X = np.mean(st.entropy(np.array([data.S_1, 1 - data.S_1]), axis=0))
print("Percentage of mutual information I(X; Y_0): {:.4%}".format(1 - I_0_X/I_0))
print("Percentage of mutual information I(X; Y_1): {:.4%}".format(1 - I_1_X/I_1))

In [None]:
curve_u = cf_profit_curve(data["uplift_hat"], data["S_0"], data["S_1"])
curve_p = cf_profit_curve(data["S_0_hat"], data["S_0"], data["S_1"])
plt.plot(curve_u["k"], curve_u["profit"], label="uplift")
plt.plot(curve_p["k"], curve_p["profit"], label="proba")
plt.xlabel("k")
plt.ylabel("Profit")
plt.legend()
plt.show()

## Define the function that evaluates a simulation run

This function computes all the statistics that we will report from the full experiment.

In [None]:
def compare_uplift_proba_dirichlet(mu, A, size, n_p_0, n_u_0, n_u_1,
                               CB=np.array([[1, 1], [0, 0]]),
                               use_churn_convention=True):
    a = A * mu
    data = simulate_uplift_dirichlet(a, size, n_p_0, n_u_0, n_u_1,
                                 use_churn_convention=use_churn_convention)
    
    prior_entropy = -np.sum(mu * np.log(mu))
    posterior_entropy = digamma(A + 1) - np.sum(digamma(a + 1) * a) / A
    mutual_info = prior_entropy - posterior_entropy
    
    curve_u = cf_profit_curve(data.uplift_hat, data.S_0, data.S_1, CB)
    curve_p = cf_profit_curve(data.S_0_hat, data.S_0, data.S_1, CB)
    auuc_u = np.mean(curve_u.profit)
    auuc_p = np.mean(curve_p.profit)
    var_p = np.mean(data.S_0 * (1 - data.S_0) / n_p_0)
    var_u = np.mean(data.S_0 * (1 - data.S_0) / n_u_0 + data.S_1 * (1 - data.S_1) / n_u_1)

    eps_auuc = 0.000001
    if np.abs(auuc_u - auuc_p) <= eps_auuc:
        best_approach = "e"
    elif auuc_u > auuc_p:
        best_approach = "u"
    else:
        best_approach = "p"
    
    return {
        "alpha": mu[0], "beta" : mu[1], "gamma": mu[2], "delta": mu[3],
        "A": A,
        "CB_00": CB[0, 0], "CB_01": CB[0, 1], "CB_10": CB[1, 0], "CB_11": CB[1, 1],
        "a_0": a[0], "a_1": a[1], "a_2": a[2], "a_3": a[3],
        "S_0": mu[1] + mu[3],
        "S_1": mu[2] + mu[3],
        "size": size,
        "prior_entropy": prior_entropy,
        "posterior_entropy": posterior_entropy,
        "mutual_info": mutual_info,
        "n_p_0": n_p_0,
        "n_u_0": n_u_0,
        "n_u_1": n_u_1,
        "var_p": var_p,
        "var_u": var_u,
        "auuc_u": auuc_u,
        "auuc_p": auuc_p,
        "auuc_d": auuc_u - auuc_p,
        "best_approach": best_approach
    }

This function converts a long format dataframe to a color matrix displayable with plt.imshow.

In [None]:
def dataframe_to_color_matrix(df, rows, columns, values, cmap):
    df = df[[rows, columns, values]].pivot(index=rows, columns=columns)[values].to_numpy()
    # Add a third dimension for specifying the color
    df = np.expand_dims(df, 2).repeat(3, 2)
    # Fill with the colors
    for i in range(df.shape[0]):
        for j in range(df.shape[1]):
            for value, color in cmap.items():
                if df[i, j, 0] == value:
                    df[i, j, :] = color
    return df.astype(int)

In [None]:
cmap = {
    "u": (31, 119, 180),
    "p": (255,127,14),
    "e": (148,103,189)
}

## Experiments
### Impact of mutual information

In [None]:
n_runs = 6000
#mu = st.dirichlet.rvs(np.array([0.4, 0.2, 0.2, 0.2]) * 30, size=n_runs)
mu = np.array([0.6, 0.2, 0.1, 0.1])

# Values selected to have information rate of 0, 0.01, 0.1, 0.9, 0.99 and 1
steps_A = np.array([
    1e10,      # I = 0%
    1365.0078, # I = 0.1%
    136.3132,  # I = 1%
    44.22273980505,  # I = 3%
    12.24227,  # I = 10%
    0.128356,  # I = 90%
    #0.01154,  # I = 99%
    #1e-4      # I = 100%
])
#A = 10**st.uniform.rvs(-4, 7, size=n_runs)
size = 500000
params = ParameterGrid({
    "A": steps_A,
    "n_p_0": range(1, 51),
    "n_u_0": range(1, 51)
})
n_runs = len(params)

results = Parallel(n_jobs=4)(
    delayed(compare_uplift_proba_dirichlet)(
        mu,
        params[i]["A"],
        size,
        params[i]["n_p_0"],
        params[i]["n_u_0"],
        params[i]["n_u_0"]
    ) for i in trange(n_runs)
)
stats = pd.DataFrame.from_records(results)

In [None]:
var = "mutual_info"
values = np.unique(stats[var])
max_value = np.unique(stats.prior_entropy)[0]
mu = np.array([0.6, 0.2, 0.1, 0.1])
S_0 = mu[1] + mu[3]
S_1 = mu[2] + mu[3]

show_variance = True

def x_tick_formatter(n_u, pos):
    if show_variance:
        var_0 = S_0 * (1 - S_0) / max(1, n_u)
        var_1 = S_1 * (1 - S_1) / max(1, n_u)
        return "{:.0e}".format(var_0 + var_1)
    else:
        return str(int(n_u))

def y_tick_formatter(n_p, pos):
    if show_variance:
        return "{:.0e}".format(S_0 * (1 - S_0) / max(1, n_p))
    else:
        return str(int(n_p))

cols = 3
rows = len(values) // cols
fig, axs = plt.subplots(
    ncols=cols, nrows=rows,
    figsize=(6.5, 4),
    layout="constrained",
    sharex=True,
    sharey=True,
    squeeze=False
)

for i, value in enumerate(values):
    row = i // cols
    col = i % cols
    
    s = stats[stats[var] == value]
    s = dataframe_to_color_matrix(s, "n_p_0", "n_u_0", "best_approach", cmap)
    axs[row, col].imshow(s, interpolation="none")
    axs[row, col].xaxis.set_major_formatter(x_tick_formatter)
    axs[row, col].yaxis.set_major_formatter(y_tick_formatter)
    axs[row, col].set_title("${}$% of information".format(
        format_number(100 * value / np.unique(stats.prior_entropy)[0], 1)
    ), fontsize=11)
    
    if row == rows - 1:
        axs[row, col].set_xlabel("Variance uplift" if show_variance else "$n_u$")
    if col == 0:
        axs[row, col].set_ylabel("Variance predictive" if show_variance else "$n_p$")

# Since axes are shared, only one should be inverted to affect all of them
axs[0, 0].invert_xaxis()
# Hack to have the right legend
plt.scatter([], [], c="C0", label="Uplift")
plt.scatter([], [], c="C1", label="Predictive")
plt.scatter([], [], c="C4", label="None")
fig.legend(title="Best approach", frameon=False, loc="center left", bbox_to_anchor=(0.96, 0.84))
plt.savefig("pdf/n_u_n_p_I.pdf", bbox_inches="tight")
plt.show()

### Impact of the distribution of counterfactuals

In [None]:
eps = 1e-2
steps_mu = np.array([
    [eps, 0.5 - eps, eps, 0.5 - eps],
    [eps, eps, 0.5 - eps, 0.5 - eps],
    [0.25, 0.25, 0.25, 0.25],
    [0.8, 0.1, 0.01, 0.09],
    [0.8, 0.01, 0.1, 0.09],
    [0.8, 0.01, 0.01, 0.18]
])

A = 44.43905105200017 # 3%
size = 50
size = 500000
params = ParameterGrid({
    "mu": steps_mu,
    "n_p_0": range(1, 51),
    "n_u_0": range(1, 51)
})
n_runs = len(params)

values = np.zeros((n_runs, 4))
for i, v in enumerate(params):
    values[i, :] = v["mu"]

results = Parallel(n_jobs=4)(
    delayed(compare_uplift_proba_dirichlet)(
        params[i]["mu"],
        A,
        size,
        params[i]["n_p_0"],
        params[i]["n_u_0"],
        params[i]["n_u_0"]
    ) for i in trange(n_runs)
)
stats = pd.DataFrame.from_records(results)

In [None]:
mu_values = np.array([
    [eps, eps, 0.5 - eps, 0.5 - eps],
    [0.8, 0.1, 0.01, 0.09],          
    [0.25, 0.25, 0.25, 0.25],        
    [0.8, 0.01, 0.01, 0.18],          
    [eps, 0.5 - eps, eps, 0.5 - eps],
    [0.8, 0.01, 0.1, 0.09]         
])
cols = 3
rows = mu_values.shape[0] // cols
fig, axs = plt.subplots(
    ncols=cols, nrows=rows,
    figsize=(6.5, 4),
    layout="constrained",
    #sharex=True,
    #sharey=True,
    squeeze=False
)

show_variance = True

def x_tick_formatter(n_u, pos, S_0, S_1):
    if show_variance:
        var_0 = S_0 * (1 - S_0) / max(1, n_u)
        var_1 = S_1 * (1 - S_1) / max(1, n_u)
        return "{:.0e}".format(var_0 + var_1)
    else:
        return str(int(n_u))

def y_tick_formatter(n_p, pos, S_0):
    if show_variance:
        return "{:.0e}".format(S_0 * (1 - S_0) / max(1, n_p))
    else:
        return str(int(n_p))
    
plt.rcParams['axes.titlepad'] = 9

for i, mu in enumerate(mu_values):
    row = i // cols
    col = i % cols
    s = stats[(stats.alpha == mu[0])
            & (stats.beta  == mu[1])
            & (stats.gamma == mu[2])
            & (stats.delta == mu[3])
    ]
    s = dataframe_to_color_matrix(s, "n_p_0", "n_u_0", "best_approach", cmap)
    axs[row, col].imshow(s, interpolation="none")
    axs[row, col].invert_xaxis()
    S_0 = mu[1] + mu[3]
    S_1 = mu[2] + mu[3]
    axs[row, col].xaxis.set_major_formatter(lambda n, pos, s_0=S_0, s_1=S_1: x_tick_formatter(n, pos, s_0, s_1))
    axs[row, col].yaxis.set_major_formatter(lambda n, pos, s_0=S_0: y_tick_formatter(n, pos, s_0))
    #axs[row, col].set_title("$\\beta = {}$%".format(format_number(mu[1] * 100, 3)))
    axs[row, col].set_title("$\\mu = [{}, {}, {}, {}]$".format(*[format_number(m, 3) for m in mu]), fontsize=10)
    
    if row == rows - 1:
        axs[row, col].set_xlabel("Variance uplift")
    if col == 0:
        axs[row, col].set_ylabel("Variance predictive")
    
# Hack to have the right legend
plt.scatter([], [], c="C0", label="Uplift")
plt.scatter([], [], c="C1", label="Predictive")
fig.legend(title="Best approach", frameon=False, loc="center left", bbox_to_anchor=(0.96, 0.84))
plt.savefig("pdf/n_u_n_p_mu.pdf", bbox_inches="tight")
plt.show()

### Impact of the cost-benefit matrix

In [None]:
mu = np.array([0.6, 0.2, 0.1, 0.1])
A = 44.43905105200017 # 3%
size = 500000
steps_CB = np.array([
    [[1, 1],     [0, 0]],
    [[120, 99],  [0, -1]],
    [[120, 60],  [0, -1]],
    [[120, 99],  [-30, -40]],
    [[120, 60],  [-30, -40]],
    [[120, 120],  [-40, -40]]
])

params = ParameterGrid({
    "CB": steps_CB,
    "n_p_0": range(1, 51),
    "n_u_0": range(1, 51)
})
n_runs = len(params)


results = Parallel(n_jobs=4)(
    delayed(compare_uplift_proba_dirichlet)(
        mu,
        A,
        size,
        params[i]["n_p_0"],
        params[i]["n_u_0"],
        params[i]["n_u_0"],
        CB=params[i]["CB"]
    ) for i in trange(n_runs)
)
stats = pd.DataFrame.from_records(results)

In [None]:
mu = np.array([0.6, 0.2, 0.1, 0.1])

In [None]:
# Pick the order to make it look good
CB_values = np.array([
    [120,  60,   0,  -1],
    [120,  99,   0,  -1],
    [  1,   1,   0,   0],
    [120,  60, -30, -40],
    [120,  99, -30, -40],
    [120, 120, -40, -40]
])

cols = 3
rows = CB_values.shape[0] // cols
fig, axs = plt.subplots(
    ncols=cols, nrows=rows,
    figsize=(6.5, 4),
    layout="constrained",
    sharex=True,
    sharey=True,
    squeeze=False
)
S_0 = mu[1] + mu[3]
S_1 = mu[2] + mu[3]
show_variance = True

def x_tick_formatter(n_u, pos):
    if show_variance:
        var_0 = S_0 * (1 - S_0) / max(1, n_u)
        var_1 = S_1 * (1 - S_1) / max(1, n_u)
        return "{:.0e}".format(var_0 + var_1)
    else:
        return str(int(n_u))

def y_tick_formatter(n_p, pos):
    if show_variance:
        return "{:.0e}".format(S_0 * (1 - S_0) / max(1, n_p))
    else:
        return str(int(n_p))
    
plt.rcParams["text.usetex"] = False
    
for i, CB in enumerate(CB_values):
    row = i // cols
    col = i % cols
    s = stats[
        (stats.CB_00 == CB[0])
        & (stats.CB_01  == CB[1])
        & (stats.CB_10 == CB[2])
        & (stats.CB_11 == CB[3])
    ]
    s = dataframe_to_color_matrix(s, "n_p_0", "n_u_0", "best_approach", cmap)
    axs[row, col].imshow(s, interpolation="none")
    axs[row, col].xaxis.set_major_formatter(lambda n, pos: x_tick_formatter(n, pos))
    axs[row, col].yaxis.set_major_formatter(lambda n, pos: y_tick_formatter(n, pos))
    axs[row, col].set_title("$\\mathrm{{CB}} = [{}; {}; {}; {}]$".format(*CB), fontsize=10)
    
    if row == rows - 1:
        axs[row, col].set_xlabel("Variance uplift")
    if col == 0:
        axs[row, col].set_ylabel("Variance predictive")
        
axs[0, 0].invert_xaxis()
# Hack to have the right legend
plt.scatter([], [], c="C0", label="Uplift")
plt.scatter([], [], c="C1", label="Predictive")
fig.legend(title="Best approach", frameon=False, loc="center left", bbox_to_anchor=(0.96, 0.86))
plt.savefig("pdf/n_u_n_p_cb.pdf", bbox_inches="tight")
plt.show()

### Uniform grid in the counterfactual simplex

In [None]:
eps = 1e-5

mu = ParameterGrid({
    "alpha": np.linspace(eps, 1-eps, 12),
    "beta": np.linspace(0.01, 1-eps, 12),
    "gamma": np.linspace(0.01, 1-eps, 12)
})

mu = pd.DataFrame.from_records(list(mu))
mu = mu[mu.alpha + mu.beta + mu.gamma <= 1]
mu["delta"] = 1 - mu.alpha - mu.beta - mu.gamma
mu = mu.reset_index(drop=True)

# With 0.1 increments for the counterfactuals we have 286 values
# Let's take 7 steps for the variances, so we have 7 * 7 * 286 = 14014 combinations
A = 44.43905105200017 # 3%
size = 50
size = 500000
params = ParameterGrid({
    "mu": mu.to_numpy(),
    "n_p_0": range(2, 49, 7), # 7 of increment and 7 values
    "n_u_0": range(2, 49, 7)  # 7 of increment and 7 values
})
n_runs = len(params)
print(n_runs)

if True:
    results = Parallel(n_jobs=4)(
        delayed(compare_uplift_proba_dirichlet)(
            params[i]["mu"],
            A,
            size,
            params[i]["n_p_0"],
            params[i]["n_u_0"],
            params[i]["n_u_0"]
        ) for i in trange(n_runs)
    )
    stats = pd.DataFrame.from_records(results)

In [None]:
alpha_values = np.unique(mu.alpha)

cols = 4
rows = ceil(len(alpha_values) / cols)
fig, axs = plt.subplots(
    ncols=cols, nrows=rows,
    figsize=(7.5, 6),
    layout="constrained",
    sharex=True,
    sharey=True,
    squeeze=False
)


data_to_plot = stats.copy()
data_to_plot["uplift_best"] = data_to_plot.best_approach == "u"
data_to_plot = data_to_plot[["alpha", "beta", "gamma", "delta", "uplift_best"]]
data_to_plot = data_to_plot.groupby(["alpha", "beta", "gamma", "delta"], as_index=False).mean()

for i, value in enumerate(alpha_values):
    row = i // cols
    col = i % cols

    s = data_to_plot[data_to_plot.alpha == value]
    sc = axs[row, col].scatter(
        s.beta,
        s.gamma,
        c=s.uplift_best,
        vmin=0.5,
        s=30,
        marker="s",
        vmax=1
    )
    axs[row, col].set_title("$\\alpha = {}$".format(
        format_number(value, 3)
    ), fontsize=12)

    if row == rows - 1:
        axs[row, col].set_xlabel("$\\beta$")
    if col == 0:
        axs[row, col].set_ylabel("$\\gamma$")

# Since axes are shared, only one should be inverted to affect all of them
#axs[0, 0].invert_xaxis()
plt.colorbar(sc)
#plt.savefig("pdf/n_u_n_p_I.pdf", bbox_inches="tight")
plt.show()

Now, let's show a plot of $S_0$ and $S_1$, where we evaluate to what extent the best approach varies for different values of the counterfactuals, for a given values of $S_0$ and $S_1$. This shows how much just observing the marginal is sufficient or not to understand which approach is better.

In [None]:
plt.rcParams["figure.figsize"] = (5.5, 4)
data_to_plot = stats.copy()
data_to_plot["uplift_best"] = data_to_plot.best_approach == "u"
data_to_plot = data_to_plot[["alpha", "beta", "gamma", "delta", "uplift_best"]]
data_to_plot = data_to_plot.groupby(["alpha", "beta", "gamma", "delta"], as_index=False).mean()
data_to_plot["S_0"] = data_to_plot.beta  + data_to_plot.delta
data_to_plot["S_1"] = data_to_plot.gamma + data_to_plot.delta
#data_to_plot = data_to_plot.groupby(["S_0", "S_1"], as_index=False)
jitter = 0.016
dx = np.random.uniform(-jitter, jitter, size=data_to_plot.shape[0])
dy = np.random.uniform(-jitter, jitter, size=data_to_plot.shape[0])
plt.scatter(
    data_to_plot.S_0 + dx,
    data_to_plot.S_1 + dy,
    c=data_to_plot.uplift_best * 100,
    s=50,
    vmin=50, vmax=100)
plt.xlabel("$S_0$", fontsize=12)
plt.ylabel("$S_1$", fontsize=12)
cc = plt.colorbar(label="Ratio of cases where uplift is better (%)", pad=0.1)
cc.ax.yaxis.tick_left()
cc.ax.yaxis.labelpad = 10
plt.savefig("pdf/S_0_S_1_uplift_better.pdf", bbox_inches="tight")
plt.show()

### Show uplift curves for different situations

In [None]:
mu = np.array([0.9, 0.05, 0.04, 0.01])
prior_entropy = -np.sum(mu * np.log2(mu))

steps_A = np.array([
    50,
    1
])
size = 500000
steps_var_p = np.array([0, 0.1, 0.3])
steps_var_u = np.array([0, 0.1, 0.3])

nrows = 1
ncols = 2

fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(8, 3),
    squeeze=False,
    sharex=True
)

    
for i, A in enumerate(steps_A):
    a = A * mu
    posterior_entropy = digamma(A + 1) - np.sum(digamma(a + 1) * a) / A
    
    for j, (var_p, var_u) in enumerate(zip(steps_var_p, steps_var_u)):
        data = simulate_uplift_ind(a, size, var_p, var_u)

        curve_u = cf_uplift_curve(data["uplift_hat"], data["uplift"]).reset_index(drop=True)
        curve_p = cf_uplift_curve(data["S_0_hat"], data["uplift"]).reset_index(drop=True)

        subsample = np.arange(0, curve_u.shape[0], step=curve_u.shape[0]//5000)
        ax = axs[0, i]
        line = ax.plot(
            curve_u["k"][subsample] / size * 100,
            curve_u["uplift"][subsample] * 100,
            c="C0"#, alpha=var_u * 2 + 0.4
        )
        x = [0.56, 0.56, 0.57, 0.36, 0.5, 0.62][i * 3 + j]
        y = curve_u["uplift"][x * size] * 100
        ax.annotate(str(var_u), xy=(x, y), 
                    xycoords = ax.get_yaxis_transform(),
                    size=12, va="center", color="C0",
                    bbox=dict(boxstyle="round,pad=0.05,rounding_size=0.5", fc="white", lw=0))
        line = ax.plot(
            curve_p["k"][subsample] / size * 100,
            curve_p["uplift"][subsample] * 100,
            c="C1"#, alpha=var_p * 2 + 0.4
        )
        x = [0.56, 0.4, 0.73, 0.43, 0.18, 0.32][i * 3 + j]
        y = curve_p["uplift"][x * size] * 100
        ax.annotate(str(var_p), xy=(x, y), 
                    xycoords = ax.get_yaxis_transform(),
                    size=12, va="center", color="C1",
                    bbox=dict(boxstyle="round,pad=0.05,rounding_size=0.5", fc="white", lw=0))
    
    ax.set_xlabel("Treatment rate (%)")
    ax.set_ylabel("Uplift (%)")
    ax.set_title("$I(X; Y_0, Y_1) = {:.1%}$%".format((prior_entropy - posterior_entropy) / prior_entropy))
    
plt.plot([], [], c="C0", label="Uplift")
plt.plot([], [], c="C1", label="Probabilistic")
fig.legend(title="Approach", frameon=False, loc="lower left", bbox_to_anchor=(0.58, 0.15))
plt.savefig("pdf/uplift_curves.pdf", bbox_inches="tight")
plt.show()