In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import container
from matplotlib.lines import Line2D
from matplotlib.collections import PolyCollection
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
%matplotlib qt
from scipy.stats import norm
import copy

In [2]:
full_data = pd.read_csv('../../Data/BHcompilation_updated.csv', encoding='ISO-8859-1')

In [3]:
# Data for fitting
data = full_data.loc[full_data.SELECTED == 1]

# Normalizing uncertainties to 3-sigma values
data.loc[data.CONFLEVEL == 1, 'DMBH'] = data.DMBH*3
data.loc[data.CONFLEVEL == 2, 'DMBH'] = data.DMBH*(3/2)

# Reset the indices to avoid KeyError's later on
data = data.reset_index(drop=True)

In [4]:
# logistic function
def logit(x, beta0, beta1):
    return (1/(1+np.exp(-(beta0 + beta1*x))))
  
# log_linear function
def linear(x, gamma0, gamma1):
    return gamma0 + gamma1*x
  
# hurdle model functional form
def hurdle(x, beta0, beta1, gamma0, gamma1):
    return logit(x, beta0, beta1)*linear(x, gamma0, gamma1)

# hurdle model functional form
def hurdle_brms(x, beta0, beta1, gamma0, gamma1):
    return (1-logit(x, beta0, beta1))*linear(x, gamma0, gamma1)

# Looking at velocity dispersion spread

In [5]:
mu, std = norm.fit(data.SIG)

print(mu)
print(std)

plt.hist(data.SIG, bins=25, density=True, alpha=0.6)

# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2)
title = "Fit results: mu = %.2f,  std = %.2f" % (mu, std)
plt.title(title)

plt.show()

2.143089338385246
0.2660857663714269


libGL error: MESA-LOADER: failed to open iris: /usr/lib/dri/iris_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: iris
libGL error: MESA-LOADER: failed to open iris: /usr/lib/dri/iris_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: iris
libGL error: MESA-LOADER: failed to open swrast: /usr/lib/dri/swrast_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: swrast


In [6]:
# Load data
pydf = np.loadtxt('./prior_samples_full.txt', delimiter=" ", dtype=float)

beta0_priors = pydf[:, 0]
beta1_priors = pydf[:, 1]
gamma0_priors = pydf[:, 2]
gamma1_priors = pydf[:, 3]

# Create a 2x2 grid of subplots
plt.figure(figsize=(10, 8))

# Top row, first subplot
plt.subplot(2, 2, 1)
hist_beta0, _, _ = plt.hist(beta0_priors, color='blue', alpha=0.7)
ymin, ymax = plt.ylim()
plt.xlim(-1000, 1000)
plt.vlines(-4.03, ymin, ymax)
plt.title('Beta0 Priors')

# Top row, second subplot
plt.subplot(2, 2, 2)
hist_beta1, _, _ = plt.hist(beta1_priors, color='orange', alpha=0.7)
ymin, ymax = plt.ylim()
plt.xlim(-1000, 1000)
plt.vlines(3.98, ymin, ymax)
plt.title('Beta1 Priors')

# Bottom row, first subplot
plt.subplot(2, 2, 3)
hist_gamma0, _, _ = plt.hist(gamma0_priors, color='green', alpha=0.7)
ymin, ymax = plt.ylim()
plt.xlim(-1000, 1000)
plt.vlines(-4.85, ymin, ymax)
plt.title('Gamma0 Priors')

# Bottom row, second subplot
plt.subplot(2, 2, 4)
hist_gamma1, _, _ = plt.hist(gamma1_priors, color='red', alpha=0.7)
ymin, ymax = plt.ylim()
plt.xlim(-1000, 1000)
plt.vlines(5.75, ymin, ymax)
plt.title('Gamma1 Priors')

# Adjust layout for better spacing
plt.tight_layout()

# Show the plot
plt.savefig('uniform_priors.png')
plt.show()

In [7]:
pydf = np.loadtxt('./prior_samples_full.txt', delimiter=" ", dtype=float)
n_samples = pydf.shape[0]
n_chains = 4
steps = np.arange(n_samples//n_chains)

sigma_plot_values = np.linspace(1, 2.8, num=100)
y = np.empty((len(pydf), 100))

for i in range(len(pydf)):
    y[i] = hurdle(sigma_plot_values, pydf[i][0], pydf[i][1], pydf[i][2], pydf[i][3])

stan_mean_full = np.median(y, axis=0)
stan_lower_full = np.quantile(y, 0.975, axis=0)
stan_upper_full = np.quantile(y, 0.025, axis=0)

# Split chains
chain1 = pydf[n_samples//n_chains * 0:n_samples//n_chains * 1, :]
chain2 = pydf[n_samples//n_chains * 1:n_samples//n_chains * 2, :]
chain3 = pydf[n_samples//n_chains * 2:n_samples//n_chains * 3, :]
chain4 = pydf[n_samples//n_chains * 3:n_samples//n_chains * 4, :]

#################################
# Create subplots for first param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 0], label='Chain 1')
axs[1].plot(steps, chain2[:, 0], label='Chain 2')
axs[2].plot(steps, chain3[:, 0], label='Chain 3')
axs[3].plot(steps, chain4[:, 0], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 0]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 0]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 0]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 0]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\beta_0$')
axs[1].set_ylabel(r'$\beta_0$')
axs[2].set_ylabel(r'$\beta_0$')
axs[3].set_ylabel(r'$\beta_0$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\beta_0$')
plt.show()

plt.savefig('beta0.png')

##################################
# Create subplots for second param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 1], label='Chain 1')
axs[1].plot(steps, chain2[:, 1], label='Chain 2')
axs[2].plot(steps, chain3[:, 1], label='Chain 3')
axs[3].plot(steps, chain4[:, 1], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 1]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 1]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 1]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 1]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\beta_1$')
axs[1].set_ylabel(r'$\beta_1$')
axs[2].set_ylabel(r'$\beta_1$')
axs[3].set_ylabel(r'$\beta_1$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\beta_1$')
plt.show()

plt.savefig('beta1.png')

#################################
# Create subplots for third param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 2], label='Chain 1')
axs[1].plot(steps, chain2[:, 2], label='Chain 2')
axs[2].plot(steps, chain3[:, 2], label='Chain 3')
axs[3].plot(steps, chain4[:, 2], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 2]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 2]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 2]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 2]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\gamma_0$')
axs[1].set_ylabel(r'$\gamma_0$')
axs[2].set_ylabel(r'$\gamma_0$')
axs[3].set_ylabel(r'$\gamma_0$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\gamma_0$')
plt.show()

plt.savefig('gamma0.png')

##################################
# Create subplots for fourth param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 3], label='Chain 1')
axs[1].plot(steps, chain2[:, 3], label='Chain 2')
axs[2].plot(steps, chain3[:, 3], label='Chain 3')
axs[3].plot(steps, chain4[:, 3], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 3]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 3]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 3]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 3]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\gamma_1$')
axs[1].set_ylabel(r'$\gamma_1$')
axs[2].set_ylabel(r'$\gamma_1$')
axs[3].set_ylabel(r'$\gamma_1$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\gamma_1$')
plt.show()

plt.savefig('gamma1.png')
plt.clf()

In [8]:
# Data omitted from fits
omitted_upplim = full_data.loc[(full_data.SELECTED != 1) & (full_data.UPPERLIMIT == 1)]
omitted_true = full_data.loc[(full_data.SELECTED != 1) & (full_data.UPPERLIMIT == 0)]

# Fit data
stars_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.MBH > 0) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]
stars_upplim_zero = full_data.loc[(full_data.SELECTED == 1) & (full_data.MBH == 0) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]
stars_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]

gas_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'gas') | (full_data.TYPE == 'CO'))]
gas_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & ((full_data.TYPE == 'gas') | (full_data.TYPE == 'CO'))]

reverb_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & (full_data.TYPE == 'reverb')]
reverb_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & (full_data.TYPE == 'reverb')]

maser_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & (full_data.TYPE == 'maser')]
maser_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & (full_data.TYPE == 'maser')]

In [9]:
pydf = np.loadtxt('./samples.txt', delimiter=" ", dtype=float)

sigma_plot_values = np.linspace(1, 2.8, num=100)
y = np.empty((len(pydf), 100))
lin = np.empty((len(pydf), 100))

for i in range(len(pydf)):
    y[i] = hurdle(sigma_plot_values, pydf[i][0], pydf[i][1], pydf[i][2], pydf[i][3])
    lin[i] = linear(sigma_plot_values, pydf[i][2], pydf[i][3])

stan_mean = np.median(y, axis=0)
stan_lower = np.quantile(y, 0.975, axis=0)
stan_upper = np.quantile(y, 0.025, axis=0)

lin_mean = np.median(lin, axis=0)

np.mean(pydf, axis=0)

array([-4.38265183,  4.31015723, -4.84220561,  5.75186808])

In [10]:
x_rep_arr = np.loadtxt('./prior_pred_check_x.txt', delimiter=" ", dtype=float)
y_rep_arr = np.loadtxt('./prior_pred_check_y.txt', delimiter=" ", dtype=float)
lin_arr = np.loadtxt('./prior_pred_check_lin.txt', delimiter=" ", dtype=float)
hur_arr = np.loadtxt('./prior_pred_check_hur.txt', delimiter=" ", dtype=float)

In [11]:
# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.xlim(1.0, 2.8)
# plt.ylim(-0.5, 11)

# Plotting simulated data
n_sims = 100
sims = np.random.randint(0, len(y_rep_arr), size=n_sims)
x = np.array(data.SIG)
for i in sims:
    x_rep = x_rep_arr[i]
    y_rep = y_rep_arr[i]
    lin = lin_arr[i]
    hur = hur_arr[i]

    upp_lim_idx = np.where(hur == 0)
    non_upp_lim_idx = np.where(hur == 1)
    plt.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color='orange', marker='v', s=50, edgecolors='black')
    plt.scatter(x_rep[non_upp_lim_idx], lin[non_upp_lim_idx], color='orange', s=36)


# Plotting true data
plt.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
plt.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
plt.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
plt.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

plt.scatter(stars_upplim_zero.SIG, stars_upplim_zero.MBH, color='darkred', marker='v', s=50)

plt.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
plt.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
plt.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
plt.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

plt.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
plt.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
plt.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
plt.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

plt.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
plt.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
plt.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
plt.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)


plt.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
plt.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
plt.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

# Customizing legend to add upper limit marker and remove error bars
ax = plt.gca()

# ax.text(1.66, 1.70, '$\\log(M_{BH}) = p(%.2f^{\\pm%.1f}+%.2f^{\\pm%.1f}\\log(\\sigma))$' % (gamma0, gamma0_err, gamma1, gamma1_err), fontsize=21)
# ax.text(1.87, 0.70, '$p = \\frac{1}{1+e^{-(%.2f^{\\pm%.1f}+%.2f^{\\pm%.2f}\\log(\\sigma))}}$' % (beta0, beta0_err, beta1, beta1_err), fontsize=22)

# Customizing font size for axis labels
ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

handles, labels = ax.get_legend_handles_labels()

new_handles = []

for h in handles:
    # only needed to edit the errorbar legend entries
    if isinstance(h, container.ErrorbarContainer):
        new_handles.append(h[0])
    else:
        new_handles.append(h)
        
# Customizing tick marks
# ax.tick_params(reset=True)
# ax.tick_params(which='major', direction='in', length=7, top=True, right=True, labelsize=20)
# ax.tick_params(which='minor', direction='in', length=2.5, top=True, right=True)
# ax.xaxis.set_major_locator(MultipleLocator(0.5))
# ax.xaxis.set_major_formatter('{x:.1f}')
# ax.xaxis.set_minor_locator(MultipleLocator(0.1))

# ax.yaxis.set_major_locator(MultipleLocator(2))
# ax.yaxis.set_major_formatter('{x:.0f}')
# ax.yaxis.set_minor_locator(MultipleLocator(0.5))

# # Remove tick label at beginning of x axis
# xticks = ax.xaxis.get_major_ticks()
# xticks[1].set_visible(False)

# 95% credible interval
# labels.insert(3, 'Density') # modify the 2 depending on how many plots
# new_handles.insert(3, PolyCollection(verts=np.zeros((1,2,2)), color='lightgrey'))

# upper limit marker
labels.insert(0, 'upper limit') # modify the 3 depending on how many plots
new_handles.insert(0, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))
labels.insert(6, 'simulated') # modify the 3 depending on how many plots
new_handles.insert(6, Line2D([0], [0], color='orange', marker='.', ls=''))


ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=False)


# Plotting and saving
plt.savefig('uniform_priors_prioronly_fake_data.png')
plt.show()

# Posterior Predictive Checks

In [12]:
x_rep_arr = np.loadtxt('./post_pred_check_x.txt', delimiter=" ", dtype=float)
lin_arr = np.loadtxt('./post_pred_check_lin.txt', delimiter=" ", dtype=float)
hur_arr = np.loadtxt('./post_pred_check_hur.txt', delimiter=" ", dtype=float)

x_rep_arr_10x = np.loadtxt('./post_pred_check_x_10x.txt', delimiter=" ", dtype=float)
lin_arr_10x = np.loadtxt('./post_pred_check_lin_10x.txt', delimiter=" ", dtype=float)
hur_arr_10x = np.loadtxt('./post_pred_check_hur_10x.txt', delimiter=" ", dtype=float)

x_rep_arr_100x = np.loadtxt('./post_pred_check_x_100x.txt', delimiter=" ", dtype=float)
lin_arr_100x = np.loadtxt('./post_pred_check_lin_100x.txt', delimiter=" ", dtype=float)
hur_arr_100x = np.loadtxt('./post_pred_check_hur_100x.txt', delimiter=" ", dtype=float)

n_sims = 1000
np.random.seed(1)
sims = np.random.randint(0, len(lin_arr), size=n_sims)

## Histogram Comparison

### 10 points

In [39]:
# Getting simulated data. Putting upper limits at zero
hur_flat = copy.deepcopy(hur_arr[sims]).flatten()
ul_idx_flat = np.where(hur_flat == 0)
lin_flat = copy.deepcopy(lin_arr[sims]).flatten()
lin_flat[ul_idx_flat] = 0

bin_edges = np.linspace(0, 14, num=50)  # Adjust num as needed

# Plotting histograms with common bin edges
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.hist(data.MBH, bins=bin_edges, alpha=0.5, density=True, color='dodgerblue', label='Observed Data')
plt.hist(lin_flat, bins=bin_edges, alpha=0.5, density=True, color='#ff9408', label='Simulated Data')

ax = plt.gca()
ax.set_xlim(0, 14)
ax.set_ylim(0, 0.4)
# Customizing font size for axis labels
ax.set_xlabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)
ax.set_ylabel(r'Density', fontsize=25)
ax.legend()

plt.savefig('histogram_comparison.png')

### 100 points

In [38]:
# Getting simulated data. Putting upper limits at zero
hur_flat_10x = copy.deepcopy(hur_arr_10x[sims]).flatten()
ul_idx_flat_10x = np.where(hur_flat_10x == 0)
lin_flat_10x = copy.deepcopy(lin_arr_10x[sims]).flatten()
lin_flat_10x[ul_idx_flat_10x] = 0

bin_edges = np.linspace(0, 14, num=50)  # Adjust num as needed

# Plotting histograms with common bin edges
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.hist(data.MBH, bins=bin_edges, alpha=0.5, density=True, color='dodgerblue', label='Observed Data')
plt.hist(lin_flat_10x, bins=bin_edges, alpha=0.5, density=True, color='#ff9408', label='Simulated Data')

ax = plt.gca()
ax.set_xlim(0, 14)
ax.set_ylim(0, 0.4)
# Customizing font size for axis labels
ax.set_xlabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)
ax.set_ylabel(r'Density', fontsize=25)
ax.legend()

plt.savefig('histogram_comparison_10x.png')

### 1000 points

In [37]:
# Getting simulated data. Putting upper limits at zero
hur_flat_100x = copy.deepcopy(hur_arr_100x[sims]).flatten()
ul_idx_flat_100x = np.where(hur_flat_100x == 0)
lin_flat_100x = copy.deepcopy(lin_arr_100x[sims]).flatten()
lin_flat_100x[ul_idx_flat_100x] = 0

bin_edges = np.linspace(0, 14, num=50)  # Adjust num as needed

# Plotting histograms with common bin edges
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.hist(data.MBH, bins=bin_edges, alpha=0.5, density=True, color='dodgerblue', label='Observed Data')
plt.hist(lin_flat_100x, bins=bin_edges, alpha=0.5, density=True, color='#ff9408', label='Simulated Data')

ax = plt.gca()
ax.set_xlim(0, 14)
ax.set_ylim(0, 0.4)
# Customizing font size for axis labels
ax.set_xlabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)
ax.set_ylabel(r'Density', fontsize=25)
ax.legend()

plt.savefig('histogram_comparison_100x.png')

## Simulated Data Comparison

### 10 points

In [49]:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import container
import numpy as np

# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 8), sharey=False)

for ax, j in zip(axes, range(len(axes))):
    ax.set_xlim(1.0, 2.8)
    ax.set_ylim(-0.5, 11)

    # Plotting simulated data
    for i in sims:
        x_rep = copy.deepcopy(x_rep_arr[i])
        lin = copy.deepcopy(lin_arr[i])
        hur = copy.deepcopy(hur_arr[i])

        upp_lim_idx = np.where(hur == 0)
        non_upp_lim_idx = np.where(hur == 1)
        
        if j == 1:
            lin[upp_lim_idx] = 0
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color='#ff9408', marker='v', s=50, alpha=0.3)
        else:
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color='#ff9408', marker='v', s=50, edgecolors='black', alpha=0.3)

        ax.scatter(x_rep[non_upp_lim_idx], lin[non_upp_lim_idx], color='#ff9408', s=36, alpha=0.3)

    # Plotting true data
    ax.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
    ax.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
    ax.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
    ax.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

    ax.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
    ax.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
    ax.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
    ax.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

    ax.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
    ax.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
    ax.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
    ax.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

    ax.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
    ax.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
    ax.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
    ax.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)

    ax.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
    ax.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
    ax.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

    # Customizing font size for axis labels
    ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
    ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

    # Customizing legend to add upper limit marker and remove error bars
    handles, labels = ax.get_legend_handles_labels()

    new_handles = []

    for h in handles:
        # only needed to edit the errorbar legend entries
        if isinstance(h, container.ErrorbarContainer):
            new_handles.append(h[0])
        else:
            new_handles.append(h)

    ax.tick_params(reset=True)
    ax.tick_params(which='major', direction='in', length=10, top=True, right=True, labelsize=20)
    ax.tick_params(which='minor', direction='in', length=5, top=True, right=True)
    ax.xaxis.set_major_locator(MultipleLocator(0.5))
    ax.xaxis.set_major_formatter('{x:.1f}')
    ax.xaxis.set_minor_locator(MultipleLocator(0.1))

    ax.yaxis.set_major_locator(MultipleLocator(2))
    ax.yaxis.set_major_formatter('{x:.0f}')
    ax.yaxis.set_minor_locator(MultipleLocator(0.5))

    # Remove tick label at beginning of x axis
    xticks = ax.xaxis.get_major_ticks()
    xticks[1].set_visible(False)

    # upper limit marker
    labels.insert(0, 'upper limit')  # modify the index depending on how many plots
    new_handles.insert(0, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))
    labels.insert(6, 'simulated')  # modify the index depending on how many plots
    new_handles.insert(6, Line2D([0], [0], color='#ff9408', marker='.', ls=''))

    # ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=False)
    legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
    ax.grid(True, which='both', alpha=0.25)

# Plotting and saving
plt.savefig('comparison_posterior_fake_data.png')
plt.show()


### 100 points

In [46]:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import container
import numpy as np

# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 8), sharey=False)

for ax, j in zip(axes, range(len(axes))):
    ax.set_xlim(1.0, 2.8)
    ax.set_ylim(-0.5, 11)

    # Plotting simulated data
    for i in sims:
        x_rep = copy.deepcopy(x_rep_arr_10x[i])
        lin = copy.deepcopy(lin_arr_10x[i])
        hur = copy.deepcopy(hur_arr_10x[i])

        upp_lim_idx = np.where(hur == 0)
        non_upp_lim_idx = np.where(hur == 1)
        
        if j == 1:
            lin[upp_lim_idx] = 0
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color='#ff9408', marker='v', s=50, alpha=0.3)
        else:
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color='#ff9408', marker='v', s=50, edgecolors='black', alpha=0.3)

        ax.scatter(x_rep[non_upp_lim_idx], lin[non_upp_lim_idx], color='#ff9408', s=36, alpha=0.3)

    # Plotting true data
    ax.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
    ax.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
    ax.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
    ax.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

    ax.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
    ax.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
    ax.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
    ax.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

    ax.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
    ax.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
    ax.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
    ax.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

    ax.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
    ax.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
    ax.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
    ax.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)

    ax.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
    ax.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
    ax.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

    # Customizing font size for axis labels
    ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
    ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

    # Customizing legend to add upper limit marker and remove error bars
    handles, labels = ax.get_legend_handles_labels()

    new_handles = []

    for h in handles:
        # only needed to edit the errorbar legend entries
        if isinstance(h, container.ErrorbarContainer):
            new_handles.append(h[0])
        else:
            new_handles.append(h)
            
    ax.tick_params(reset=True)
    ax.tick_params(which='major', direction='in', length=10, top=True, right=True, labelsize=20)
    ax.tick_params(which='minor', direction='in', length=5, top=True, right=True)
    ax.xaxis.set_major_locator(MultipleLocator(0.5))
    ax.xaxis.set_major_formatter('{x:.1f}')
    ax.xaxis.set_minor_locator(MultipleLocator(0.1))

    ax.yaxis.set_major_locator(MultipleLocator(2))
    ax.yaxis.set_major_formatter('{x:.0f}')
    ax.yaxis.set_minor_locator(MultipleLocator(0.5))

    # Remove tick label at beginning of x axis
    xticks = ax.xaxis.get_major_ticks()
    xticks[1].set_visible(False)

    # upper limit marker
    labels.insert(0, 'upper limit')  # modify the index depending on how many plots
    new_handles.insert(0, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))
    labels.insert(6, 'simulated')  # modify the index depending on how many plots
    new_handles.insert(6, Line2D([0], [0], color='#ff9408', marker='.', ls=''))

    # ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=False)
    legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
    ax.grid(True, which='both', alpha=0.25)

# Plotting and saving
plt.savefig('comparison_posterior_fake_data_10x.png')
plt.show()


### 1000 points

In [45]:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import container
import numpy as np

# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 8), sharey=False)

for ax, j in zip(axes, range(len(axes))):
    ax.set_xlim(1.0, 2.8)
    ax.set_ylim(-0.5, 11)

    # Plotting simulated data
    for i in sims:
        x_rep = copy.deepcopy(x_rep_arr_100x[i])
        lin = copy.deepcopy(lin_arr_100x[i])
        hur = copy.deepcopy(hur_arr_100x[i])

        upp_lim_idx = np.where(hur == 0)
        non_upp_lim_idx = np.where(hur == 1)
        
        if j == 1:
            lin[upp_lim_idx] = 0
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color='#ff9408', marker='v', s=50, alpha=0.3)
        else:
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color='#ff9408', marker='v', s=50, edgecolors='black', alpha=0.3)

        ax.scatter(x_rep[non_upp_lim_idx], lin[non_upp_lim_idx], color='#ff9408', s=36, alpha=0.3)

    # Plotting true data
    ax.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
    ax.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
    ax.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
    ax.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

    ax.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
    ax.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
    ax.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
    ax.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

    ax.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
    ax.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
    ax.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
    ax.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

    ax.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
    ax.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
    ax.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
    ax.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)

    ax.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
    ax.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
    ax.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

    # Customizing font size for axis labels
    ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
    ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

    # Customizing legend to add upper limit marker and remove error bars
    handles, labels = ax.get_legend_handles_labels()

    new_handles = []

    for h in handles:
        # only needed to edit the errorbar legend entries
        if isinstance(h, container.ErrorbarContainer):
            new_handles.append(h[0])
        else:
            new_handles.append(h)

    ax.tick_params(reset=True)
    ax.tick_params(which='major', direction='in', length=10, top=True, right=True, labelsize=20)
    ax.tick_params(which='minor', direction='in', length=5, top=True, right=True)
    ax.xaxis.set_major_locator(MultipleLocator(0.5))
    ax.xaxis.set_major_formatter('{x:.1f}')
    ax.xaxis.set_minor_locator(MultipleLocator(0.1))

    ax.yaxis.set_major_locator(MultipleLocator(2))
    ax.yaxis.set_major_formatter('{x:.0f}')
    ax.yaxis.set_minor_locator(MultipleLocator(0.5))

    # Remove tick label at beginning of x axis
    xticks = ax.xaxis.get_major_ticks()
    xticks[1].set_visible(False)

    # upper limit marker
    labels.insert(0, 'upper limit')  # modify the index depending on how many plots
    new_handles.insert(0, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))
    labels.insert(6, 'simulated')  # modify the index depending on how many plots
    new_handles.insert(6, Line2D([0], [0], color='#ff9408', marker='.', ls=''))

    # ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=False)
    legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
    ax.grid(True, which='both', alpha=0.25)

# Plotting and saving
plt.savefig('comparison_posterior_fake_data_100x.png')
plt.show()
