In [None]:
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import probplot
import numpy as np

In [None]:
np.random.seed(42)
n_samples = 1000

# Q-Q Plots

This notebook is intended to provide intuition behind Q-Q plots **results**, by plotting various distributions against the `normal`.

In [None]:
data_normal = np.random.normal(loc=0, scale=1, size=n_samples)

### Normal

In [None]:
fig, ax = plt.subplots()
_ = probplot(data_normal, dist="norm", plot=ax)

### Uniform

In [None]:
data_uniform = np.random.uniform(low=0, high=1, size=n_samples)

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

probplot(data_uniform, dist="norm", plot=axs[0])
axs[0].set_title('Q-Q Plot: Uniform data vs Normal dist')

x = np.linspace(-3, 3, 500)
axs[1].plot(x, norm.pdf(x), label='Normal PDF')
axs[1].plot(x, uniform.pdf(x, loc=0, scale=1), label='Uniform PDF')

axs[1].set_title('PDFs: Normal and Uniform distributions')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

### Exponential

In [None]:
data_exponential = np.random.exponential(scale=1, size=n_samples)

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

probplot(data_exponential, dist="norm", plot=axs[0])
axs[0].set_title('Q-Q Plot: Exponential data vs Normal dist')

x = np.linspace(-3, 3, 500)
axs[1].plot(x, stats.norm.pdf(x), label='Normal PDF')
axs[1].plot(x, stats.expon.pdf(x, loc=0, scale=1), label='Exponential PDF')

axs[1].set_title('PDFs: Normal and Exponential distributions')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

### Skewed

In [None]:
a = 5
data_skewed = stats.skewnorm.rvs(a=a, size=n_samples)

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

probplot(data_skewed, dist="norm", plot=axs[0])
axs[0].set_title(f'Q-Q Plot: Skewed data ({a=}) vs Normal dist')

x = np.linspace(-3, 3, 500)
axs[1].plot(x, norm.pdf(x), label='Normal PDF')
axs[1].plot(x, stats.skewnorm.pdf(x, a), label=f'Skewed PDF ({a=})')

axs[1].set_title('PDFs: Normal and Skewed distributions')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

In [None]:
a = -5
data_skewed = stats.skewnorm.rvs(a=a, size=n_samples)

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

probplot(data_skewed, dist="norm", plot=axs[0])
axs[0].set_title(f'Q-Q Plot: Skewed data ({a=}) vs Normal dist')

x = np.linspace(-3, 3, 500)
axs[1].plot(x, norm.pdf(x), label='Normal PDF')
axs[1].plot(x, stats.skewnorm.pdf(x, a), label=f'Skewed PDF ({a=})')

axs[1].set_title('PDFs: Normal and Skewed distributions')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

### Bimodal

In [None]:
mu1, sigma1 = -2, 0.8
data_component1 = np.random.normal(loc=mu1, scale=sigma1, size=int(n_samples * 0.5))
mu2, sigma2 = 2, 0.8
data_component2 = np.random.normal(loc=mu2, scale=sigma2, size=int(n_samples * 0.5))

data_bimodal = np.concatenate((data_component1, data_component2))

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

probplot(data_bimodal, dist="norm", plot=axs[0])
axs[0].set_title(f'Q-Q Plot: Bimodal data vs Normal dist')

x = np.linspace(-3, 3, 500)
axs[1].plot(x, norm.pdf(x), label='Norm PDF')
bimodal_pdf_vals = 0.5 * stats.norm.pdf(x, loc=mu1, scale=sigma1) + \
                   0.5 * stats.norm.pdf(x, loc=mu2, scale=sigma2)
axs[1].plot(x, bimodal_pdf_vals, label='Bimodal PDF (true)', color='blue')

axs[1].set_title('PDFs: Bimodal and Skewed distributions')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

In [None]:
mu1, sigma1 = -2, 0.5
data_component1 = np.random.normal(loc=mu1, scale=sigma1, size=int(n_samples * 0.5))
mu2, sigma2 = 2, 2
data_component2 = np.random.normal(loc=mu2, scale=sigma2, size=int(n_samples * 0.5))

data_bimodal = np.concatenate((data_component1, data_component2))

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

probplot(data_bimodal, dist="norm", plot=axs[0])
axs[0].set_title(f'Q-Q Plot: Bimodal data vs Normal dist')

x = np.linspace(-3, 3, 500)
axs[1].plot(x, norm.pdf(x), label='Norm PDF')
bimodal_pdf_vals = 0.5 * stats.norm.pdf(x, loc=mu1, scale=sigma1) + \
                   0.5 * stats.norm.pdf(x, loc=mu2, scale=sigma2)
axs[1].plot(x, bimodal_pdf_vals, label='Bimodal PDF (true)', color='blue')

axs[1].set_title('PDFs: Bimodal and Skewed distributions')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

In [None]:
mu1, sigma1 = -2, 2
data_component1 = np.random.normal(loc=mu1, scale=sigma1, size=int(n_samples * 0.5))
mu2, sigma2 = 2, 0.5
data_component2 = np.random.normal(loc=mu2, scale=sigma2, size=int(n_samples * 0.5))

data_bimodal = np.concatenate((data_component1, data_component2))

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

probplot(data_bimodal, dist="norm", plot=axs[0])
axs[0].set_title(f'Q-Q Plot: Bimodal data vs Normal dist')

x = np.linspace(-3, 3, 500)
axs[1].plot(x, norm.pdf(x), label='Norm PDF')
bimodal_pdf_vals = 0.5 * stats.norm.pdf(x, loc=mu1, scale=sigma1) + \
                   0.5 * stats.norm.pdf(x, loc=mu2, scale=sigma2)
axs[1].plot(x, bimodal_pdf_vals, label='Bimodal PDF (true)', color='blue')

axs[1].set_title('PDFs: Bimodal and Skewed distributions')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()