# Scatterplot for multivariate distribution 

In [None]:
import sandy

In [None]:
import numpy as np
import pandas as pd

In [None]:
from scipy.stats import multivariate_normal
from mpl_toolkits.mplot3d import Axes3D

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")

## $z=f(x,y)$

In [None]:
x = np.linspace(0, 20, 1000)
y = np.linspace(0, 20, 1000)
def f(x, y): return x + 2 * y
z = f(x, y)

In [None]:
mean = 10
std = mean * 1 / 100
x = np.random.normal(loc=mean, scale=std, size=1000)
z = f(x, 10)

df = pd.DataFrame(dict(x=x, z=z))
g = sns.jointplot(data=df, x="x", y="z", kind="reg",
                  color="tomato", height=4)
g.figure.set_dpi(150)

## No correlation

In [None]:
mean = [10, 10]
cov = [(1, .0), (.0, 1)]
rng = np.random.RandomState(0)
x, y = rng.multivariate_normal(mean, cov, 10000).T
z = f(x, y)

In [None]:
fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
sns.scatterplot(x=x, y=y, s=5, color=".15")
sns.histplot(x=x, y=y, bins=50, pthresh=.1, cmap="mako")
sns.kdeplot(x=x, y=y, levels=5, color="w", linewidths=1)
ax.set_xlabel("x")
ax.set_ylabel("y")
fig.tight_layout()

In [None]:
df = pd.DataFrame(dict(x=x, y=y))
g = sns.jointplot(data=df, x="x", y="y", kind="reg",
                  color="tomato", height=4)
g.figure.set_dpi(150)

In [None]:
xls = np.linspace(7, 13, 500)
yls = np.linspace(7, 13, 500)
X, Y = np.meshgrid(xls, yls)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
rv = multivariate_normal(mean, cov)

fig = plt.figure(figsize=(4, 4), dpi=150)
ax = fig.add_subplot(projection='3d')
ax.plot_surface(X, Y, rv.pdf(pos), cmap='turbo', linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
fig.tight_layout()

## With correlations, $\rho=0.8$

In [None]:
mean = [10, 10]
cov = [(1, .8), (.8, 1)]
rng = np.random.RandomState(0)
x, y = rng.multivariate_normal(mean, cov, 10000).T
z = f(x, y)

In [None]:
fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
sns.scatterplot(x=x, y=y, s=5, color=".15")
sns.histplot(x=x, y=y, bins=50, pthresh=.1, cmap="mako")
sns.kdeplot(x=x, y=y, levels=5, color="w", linewidths=1)
ax.set_xlabel("x")
ax.set_ylabel("y")
fig.tight_layout()

In [None]:
df = pd.DataFrame(dict(x=x, y=y))
g = sns.jointplot(data=df, x="x", y="y", kind="reg",
                  color="tomato", height=4)
g.figure.set_dpi(150)

In [None]:
xls = np.linspace(7, 13, 500)
yls = np.linspace(7, 13, 500)
X, Y = np.meshgrid(xls, yls)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
rv = multivariate_normal(mean, cov)

fig = plt.figure(figsize=(4, 4), dpi=150)
ax = fig.add_subplot(projection='3d')
ax.plot_surface(X, Y, rv.pdf(pos), cmap='turbo', linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
fig.tight_layout()

## With correlations, $\rho=-0.8$

In [None]:
mean = [10, 10]
cov = [(1, -.8), (-.8, 1)]
rng = np.random.RandomState(0)
x, y = rng.multivariate_normal(mean, cov, 10000).T
z = f(x, y)

In [None]:
fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
sns.scatterplot(x=x, y=y, s=5, color=".15")
sns.histplot(x=x, y=y, bins=50, pthresh=.1, cmap="mako")
sns.kdeplot(x=x, y=y, levels=5, color="w", linewidths=1)
ax.set_xlabel("x")
ax.set_ylabel("y")
fig.tight_layout()

In [None]:
df = pd.DataFrame(dict(x=x, y=y))
g = sns.jointplot(data=df, x="x", y="y", kind="reg",
                  color="tomato", height=4)
g.figure.set_dpi(150)

In [None]:
xls = np.linspace(7, 13, 500)
yls = np.linspace(7, 13, 500)
X, Y = np.meshgrid(xls, yls)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
rv = multivariate_normal(mean, cov)

fig = plt.figure(figsize=(4, 4), dpi=150)
ax = fig.add_subplot(projection='3d')
ax.plot_surface(X, Y, rv.pdf(pos), cmap='turbo', linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
fig.tight_layout()