# Workspace Setup

## Importing Packages

In [321]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import container
from matplotlib.lines import Line2D
from matplotlib.collections import PolyCollection
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
%matplotlib qt
from scipy.stats import norm, lognorm
import copy
from astropy.io import fits
from astropy.table import Table


In [339]:
# https://data.sdss.org/datamodel/files/SPECTRO_REDUX/galSpecLine.html

# Open the FITS file
hdul = fits.open("/home/gsasseville/Downloads/galSpecLine-dr8.fits")

# Access the BinTableHDU extension
table_hdu = hdul[1]

# Convert the BinTableHDU to a Table object
data_JHU = table_hdu.data

table = Table(data_JHU)

dispersions_balmer = np.array(table['SIGMA_BALMER'])
dispersions_forbidden = np.array(table['SIGMA_FORBIDDEN'])

hdul.close

# https://data.sdss.org/datamodel/files/SPECTRO_REDUX/galSpecInfo.html

# Open the FITS file
hdul = fits.open("/home/gsasseville/Downloads/galSpecInfo-dr8.fits")

# Access the BinTableHDU extension
table_hdu = hdul[1]

# Convert the BinTableHDU to a Table object
data_JHU = table_hdu.data

table = Table(data_JHU)

redshifts = np.array(table['Z'])

hdul.close

<bound method HDUList.close of [<astropy.io.fits.hdu.image.PrimaryHDU object at 0x7f98ad83bbb0>, <astropy.io.fits.hdu.table.BinTableHDU object at 0x7f98ad83be50>]>

In [340]:
plt.hist(dispersions_balmer, alpha=0.3, color='b', label='Balmer')
plt.hist(dispersions_forbidden, alpha=0.3, color='r', label='Forbidden')
plt.legend()

<matplotlib.legend.Legend at 0x7f98b49384f0>

In [341]:
# Find the maximum redshift Z in our datasample
max_Z = np.max(data_JHU.Z)
max_sigma = np.max(data.SIG)

# Keep relevant galaxies (redshift lower than max redshift)
dispersions = dispersions_balmer[(redshifts >= 0) & (redshifts <= max_Z)]
dispersions = np.log10(dispersions[np.where(dispersions > 0)])
dispersions = dispersions[np.where((dispersions > 0) & (dispersions <= max_sigma))]

In [342]:
# Fit a normal distribution to velocity dispersion data
mu, std = norm.fit(dispersions)

fig = plt.figure(figsize=(10,8), layout='constrained')
plt.hist(dispersions, bins=25, density=True, alpha=0.6, label='MPA-JHU')
plt.hist(data.SIG, bins=25, density=True, alpha=0.6, label='Our data')

# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2)
plt.xlabel('Velocity Dispersion')
plt.title("Fit results: mu = %.2f,  std = %.2f" % (mu, std))

print(f'mu = {mu}')
print(f'std = {std}')
plt.legend()

plt.show()

mu = 1.9570682048797607
std = 0.2733267545700073


## Importing and Preprocessing Data

In [328]:
full_data = pd.read_csv('../../Data/BHcompilation_updated.csv', encoding='ISO-8859-1')

In [331]:
# Data for fitting
data = full_data.loc[full_data.SELECTED == 1]

# Normalizing uncertainties to 3-sigma values
data.loc[data.CONFLEVEL == 1, 'DMBH'] = data.DMBH*3
data.loc[data.CONFLEVEL == 2, 'DMBH'] = data.DMBH*(3/2)

# Reset the indices to avoid KeyError's later on
data = data.reset_index(drop=True)

In [4]:
# Data omitted from fits
omitted_upplim = full_data.loc[(full_data.SELECTED != 1) & (full_data.UPPERLIMIT == 1)]
omitted_true = full_data.loc[(full_data.SELECTED != 1) & (full_data.UPPERLIMIT == 0)]

# Fit data
stars_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.MBH > 0) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]
stars_upplim_zero = full_data.loc[(full_data.SELECTED == 1) & (full_data.MBH == 0) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]
stars_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]

gas_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'gas') | (full_data.TYPE == 'CO'))]
gas_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & ((full_data.TYPE == 'gas') | (full_data.TYPE == 'CO'))]

reverb_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & (full_data.TYPE == 'reverb')]
reverb_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & (full_data.TYPE == 'reverb')]

maser_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & (full_data.TYPE == 'maser')]
maser_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & (full_data.TYPE == 'maser')]

## Function Definitions

In [6]:
# logistic function
def logit(x, beta0, beta1):
    return (1/(1+np.exp(-(beta0 + beta1*x))))
  
# log_linear function
def linear(x, gamma0, gamma1):
    return gamma0 + gamma1*x
  
# hurdle model functional form
def hurdle(x, beta0, beta1, gamma0, gamma1):
    return logit(x, beta0, beta1)*linear(x, gamma0, gamma1)

# Exploratory Data Analysis

## Data Visualization

In [7]:
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.xlabel(r'$\sigma$ log(km/s)', fontsize='x-large', fontweight='bold')
plt.ylabel(r'$M_\bullet$ log(M$_\odot$)', fontsize='x-large', fontweight='bold')
plt.scatter(data.SIG, data.MBH, label='data')
plt.show()

libGL error: MESA-LOADER: failed to open iris: /usr/lib/dri/iris_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: iris
libGL error: MESA-LOADER: failed to open iris: /usr/lib/dri/iris_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: iris
libGL error: MESA-LOADER: failed to open swrast: /usr/lib/dri/swrast_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: swrast


## Velocity Dispersion Spread

In [8]:
# Fit a normal distribution to velocity dispersion data
mu, std = norm.fit(data.SIG)

fig = plt.figure(figsize=(10,8), layout='constrained')
plt.hist(data.SIG, bins=25, density=True, alpha=0.6)

# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2)
plt.xlabel('Velocity Dispersion')
plt.title("Fit results: mu = %.2f,  std = %.2f" % (mu, std))

print(f'mu = {mu}')
print(f'std = {std}')

plt.show()

mu = 2.143089338385246
std = 0.2660857663714269


## Preliminary Fitting

In [9]:
# Fit on precise measurements
pydf = np.loadtxt('../Exploratory_Data_analysis/obs_samples.txt', delimiter=" ", dtype=float)

sigma_plot_values = np.linspace(1, 2.8, num=100)
y = np.empty((len(pydf), 100))

for i in range(len(pydf)):
    y[i] = linear(sigma_plot_values, pydf[i][0], pydf[i][1])

stan_obs_mean = np.median(y, axis=0)
stan_obs_lower = np.quantile(y, 0.975, axis=0)
stan_obs_upper = np.quantile(y, 0.025, axis=0)

# Fit on upper limits
pydf = np.loadtxt('../Exploratory_Data_analysis/upp_samples.txt', delimiter=" ", dtype=float)

sigma_plot_values = np.linspace(1, 2.8, num=100)
y = np.empty((len(pydf), 100))

for i in range(len(pydf)):
    y[i] = linear(sigma_plot_values, pydf[i][0], pydf[i][1])

stan_upp_mean = np.median(y, axis=0)
stan_upp_lower = np.quantile(y, 0.975, axis=0)
stan_upp_upper = np.quantile(y, 0.025, axis=0)

In [10]:
# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.xlim(1.0, 2.8)
plt.ylim(-0.5, 11)

plt.plot(sigma_plot_values, stan_obs_mean, color='black', label='Precise Measurements')
plt.fill_between(sigma_plot_values, stan_obs_lower, stan_obs_upper, alpha=0.15, color='black')

plt.plot(sigma_plot_values, stan_upp_mean, color='black', label='Upper Limits', linestyle='dotted')
plt.fill_between(sigma_plot_values, stan_upp_lower, stan_upp_upper, alpha=0.15, color='black')

# Plotting data
plt.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
plt.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
plt.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
plt.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

plt.scatter(stars_upplim_zero.SIG, stars_upplim_zero.MBH, color='darkred', marker='v', s=50)

plt.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
plt.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
plt.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
plt.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

plt.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
plt.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
plt.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
plt.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

plt.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
plt.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
plt.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
plt.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)


plt.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
plt.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
plt.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

# Customizing legend to add upper limit marker and remove error bars
ax = plt.gca()

# Customizing font size for axis labels
ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

handles, labels = ax.get_legend_handles_labels()

new_handles = []

for h in handles:
    # only needed to edit the errorbar legend entries
    if isinstance(h, container.ErrorbarContainer):
        new_handles.append(h[0])
    else:
        new_handles.append(h)
        
# Customizing tick marks
ax.tick_params(reset=True)
ax.tick_params(which='major', direction='in', length=7, top=True, right=True, labelsize=20)
ax.tick_params(which='minor', direction='in', length=2.5, top=True, right=True)
ax.xaxis.set_major_locator(MultipleLocator(0.5))
ax.xaxis.set_major_formatter('{x:.1f}')
ax.xaxis.set_minor_locator(MultipleLocator(0.1))

ax.yaxis.set_major_locator(MultipleLocator(2))
ax.yaxis.set_major_formatter('{x:.0f}')
ax.yaxis.set_minor_locator(MultipleLocator(0.5))

# Remove tick label at beginning of x axis
xticks = ax.xaxis.get_major_ticks()
xticks[1].set_visible(False)

# upper limit marker
labels.insert(2, 'upper limit') # modify the 3 depending on how many plots
new_handles.insert(2, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))


legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
plt.grid(True, which='both', alpha=0.25)

# Plotting and saving
plt.show()

# Hurdle Fit

## Chain Convergence

In [11]:
pydf = np.loadtxt('./samples_full.txt', delimiter=" ", dtype=float)
n_samples = pydf.shape[0]
n_chains = 4
steps = np.arange(n_samples//n_chains)

sigma_plot_values = np.linspace(1, 2.8, num=100)
y = np.empty((len(pydf), 100))

for i in range(len(pydf)):
    y[i] = hurdle(sigma_plot_values, pydf[i][0], pydf[i][1], pydf[i][2], pydf[i][3])

stan_mean_full = np.median(y, axis=0)
stan_lower_full = np.quantile(y, 0.975, axis=0)
stan_upper_full = np.quantile(y, 0.025, axis=0)

# Split chains
chain1 = pydf[n_samples//n_chains * 0:n_samples//n_chains * 1, :]
chain2 = pydf[n_samples//n_chains * 1:n_samples//n_chains * 2, :]
chain3 = pydf[n_samples//n_chains * 2:n_samples//n_chains * 3, :]
chain4 = pydf[n_samples//n_chains * 3:n_samples//n_chains * 4, :]

#################################
# Create subplots for first param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 0], label='Chain 1')
axs[1].plot(steps, chain2[:, 0], label='Chain 2')
axs[2].plot(steps, chain3[:, 0], label='Chain 3')
axs[3].plot(steps, chain4[:, 0], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 0]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 0]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 0]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 0]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\beta_0$')
axs[1].set_ylabel(r'$\beta_0$')
axs[2].set_ylabel(r'$\beta_0$')
axs[3].set_ylabel(r'$\beta_0$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\beta_0$')
plt.show()

plt.savefig('beta0_post.png')

##################################
# Create subplots for second param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 1], label='Chain 1')
axs[1].plot(steps, chain2[:, 1], label='Chain 2')
axs[2].plot(steps, chain3[:, 1], label='Chain 3')
axs[3].plot(steps, chain4[:, 1], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 1]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 1]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 1]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 1]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\beta_1$')
axs[1].set_ylabel(r'$\beta_1$')
axs[2].set_ylabel(r'$\beta_1$')
axs[3].set_ylabel(r'$\beta_1$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\beta_1$')
plt.show()

plt.savefig('beta1_post.png')

#################################
# Create subplots for third param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 2], label='Chain 1')
axs[1].plot(steps, chain2[:, 2], label='Chain 2')
axs[2].plot(steps, chain3[:, 2], label='Chain 3')
axs[3].plot(steps, chain4[:, 2], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 2]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 2]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 2]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 2]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\gamma_0$')
axs[1].set_ylabel(r'$\gamma_0$')
axs[2].set_ylabel(r'$\gamma_0$')
axs[3].set_ylabel(r'$\gamma_0$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\gamma_0$')
plt.show()

plt.savefig('gamma0_post.png')

##################################
# Create subplots for fourth param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 3], label='Chain 1')
axs[1].plot(steps, chain2[:, 3], label='Chain 2')
axs[2].plot(steps, chain3[:, 3], label='Chain 3')
axs[3].plot(steps, chain4[:, 3], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 3]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 3]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 3]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 3]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\gamma_1$')
axs[1].set_ylabel(r'$\gamma_1$')
axs[2].set_ylabel(r'$\gamma_1$')
axs[3].set_ylabel(r'$\gamma_1$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\gamma_1$')
plt.show()

plt.savefig('gamma1_post.png')

## Plotting Fit

In [12]:
samples = np.loadtxt('./samples.txt', delimiter=" ", dtype=float)

# Calculate the median for each parameter
medians = np.median(samples, axis=0)

# Calculate the 95% credible interval for each parameter
credible_interval_low = np.percentile(samples, 2.5, axis=0)
credible_interval_high = np.percentile(samples, 97.5, axis=0)

beta0 = medians[0]
beta1 = medians[1]
gamma0 = medians[2]
gamma1 = medians[3]

beta0_err = max(credible_interval_high[0] - beta0, beta0 - credible_interval_low[0])
beta1_err = max(credible_interval_high[1] - beta1, beta1 - credible_interval_low[1])
gamma0_err = max(credible_interval_high[2] - gamma0, gamma0 - credible_interval_low[2])
gamma1_err = max(credible_interval_high[3] - gamma1, gamma1 - credible_interval_low[3])

errors = np.array([beta0_err, beta1_err, gamma0_err, gamma1_err])

# Print or use the results as needed
for param, median, low, high, error in zip(range(1, 5), medians, credible_interval_low, credible_interval_high, errors):
    print(f"Parameter {param}: Median = {median}, 95% Credible Interval = ({low}, {high}, Estimated error = {error})")

Parameter 1: Median = -4.38666724688224, 95% Credible Interval = (-9.061565166842458, 0.19112920440202733, Estimated error = 4.674897919960218)
Parameter 2: Median = 4.281752420743155, 95% Credible Interval = (1.8447817706016483, 7.054578024056098, Estimated error = 2.772825603312943)
Parameter 3: Median = -4.8388185288773595, 95% Credible Interval = (-5.308153535060876, -4.385094849148747, Estimated error = 0.46933500618351687)
Parameter 4: Median = 5.751297449122225, 95% Credible Interval = (5.548970331688697, 5.955971709919481, Estimated error = 0.20467426079725648)


In [13]:
pydf = np.loadtxt('./samples.txt', delimiter=" ", dtype=float)

sigma_plot_values = np.linspace(1, 2.8, num=100)
y = np.empty((len(pydf), 100))
lin = np.empty((len(pydf), 100))

for i in range(len(pydf)):
    y[i] = hurdle(sigma_plot_values, pydf[i][0], pydf[i][1], pydf[i][2], pydf[i][3])
    lin[i] = linear(sigma_plot_values, pydf[i][2], pydf[i][3])

stan_mean = np.median(y, axis=0)
stan_lower = np.quantile(y, 0.975, axis=0)
stan_upper = np.quantile(y, 0.025, axis=0)
lin_mean = np.median(lin, axis=0)

In [40]:
# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Calculating BH masses for VDB 2016
vdb_gamma0 = -4
vdb_gamma1 = 5.35
vdb_mass = linear(sigma_plot_values, vdb_gamma0, vdb_gamma1) # coefficients from Remco Van Den Bosch 2016

# Plot labels
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.xlim(1.0, 2.8)
plt.ylim(-0.5, 11)

# Plotting average fit with previous ones
plt.plot(sigma_plot_values, stan_mean, color='black', label='Hurdle Model') # hurdle fit
plt.plot(sigma_plot_values, lin_mean, color='black', linestyle='dashed', label='Linear Portion') # linear portion
plt.plot(sigma_plot_values, vdb_mass, color='black', linestyle='dotted', label='van den Bosch 2016')
plt.fill_between(sigma_plot_values, stan_lower, stan_upper, alpha=0.15, color='black', label='95% Credible Interval') # CI

# Plotting data
plt.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
plt.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
plt.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
plt.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

plt.scatter(stars_upplim_zero.SIG, stars_upplim_zero.MBH, color='darkred', marker='v', s=50)

plt.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
plt.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
plt.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
plt.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

plt.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
plt.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
plt.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
plt.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

plt.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
plt.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
plt.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
plt.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)


plt.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
plt.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
plt.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

# Customizing legend to add upper limit marker and remove error bars
ax = plt.gca()

ax.text(1.60, 1.70, '$\\log(M_\\bullet) = p(%.2f^{\\pm%.1f}+%.2f^{\\pm%.1f}\\log(\\sigma))$' % (gamma0, gamma0_err, gamma1, gamma1_err), fontsize=21)
ax.text(1.80, 0.70, '$p = \\frac{1}{1+exp{(-(%.2f^{\\pm%.1f}+%.2f^{\\pm%.2f}\\log(\\sigma)))}}$' % (beta0, beta0_err, beta1, beta1_err), fontsize=22)

# Customizing font size for axis labels
ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

handles, labels = ax.get_legend_handles_labels()

new_handles = []

for h in handles:
    # only needed to edit the errorbar legend entries
    if isinstance(h, container.ErrorbarContainer):
        new_handles.append(h[0])
    else:
        new_handles.append(h)
        
# Customizing tick marks
ax.tick_params(reset=True)
ax.tick_params(which='major', direction='in', length=10, top=True, right=True, labelsize=20)
ax.tick_params(which='minor', direction='in', length=5, top=True, right=True)
ax.xaxis.set_major_locator(MultipleLocator(0.5))
ax.xaxis.set_major_formatter('{x:.1f}')
ax.xaxis.set_minor_locator(MultipleLocator(0.1))

ax.yaxis.set_major_locator(MultipleLocator(2))
ax.yaxis.set_major_formatter('{x:.0f}')
ax.yaxis.set_minor_locator(MultipleLocator(0.5))

# Remove tick label at beginning of x axis
xticks = ax.xaxis.get_major_ticks()
xticks[1].set_visible(False)

# upper limit marker
labels.insert(4, 'upper limit') # modify the 3 depending on how many plots
new_handles.insert(4, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))


legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
plt.grid(True, which='both', alpha=0.25)

# Plotting and saving
# plt.savefig('../../Figures/Levy_Upper_Lims.png')
# plt.savefig('Levy_Upper_Lims.png')
plt.show()

34     9.2989
49     4.4771
59     4.3979
78     5.3979
94     7.1614
120    5.1761
126    6.0000
137    6.9445
168    7.1139
226    6.4150
257    6.3010
265    5.1761
266    5.1761
Name: MBH, dtype: float64


## Analysis

### Logistic Portion

Calculate the stellar velocity dispersion values at which the 50%, 90% and 99% probability of hosting a central BH is attained. Calculate what percentage of the data set is above these values of velocity dispersions.

In [15]:
# Probability values
P = [0.5, 0.9, 0.99]
sigma_vals = np.zeros(len(P))
mass_vals = np.zeros(len(P))
percentages = np.zeros(len(P))

# Calculate velocity dispersions and masses corresponding to P vals
for p, i in zip(P, range(len(P))):
    sigma_vals[i] = (1/beta1 * (np.log(p/(1-p)) - beta0))
    mass_vals[i] = (gamma0 + gamma1*sigma_vals[i])
    print('For p = %.2f, we have velocity dispersion = %.2f km/s and BH mass = %.2E solar masses' % (p, 10**sigma_vals[i], 10**mass_vals[i]))

print('')

# Calculate percentage of data
for i in range(len(P)):
    print('%.2f of the galaxies are above the p = %.2f probability of having a BH' % (len(np.where(data.SIG > sigma_vals[i])[0])/len(data.SIG), P[i]))

For p = 0.50, we have velocity dispersion = 10.58 km/s and BH mass = 1.13E+01 solar masses
For p = 0.90, we have velocity dispersion = 34.49 km/s and BH mass = 1.01E+04 solar masses
For p = 0.99, we have velocity dispersion = 125.22 km/s and BH mass = 1.68E+07 solar masses

1.00 of the galaxies are above the p = 0.50 probability of having a BH
0.96 of the galaxies are above the p = 0.90 probability of having a BH
0.63 of the galaxies are above the p = 0.99 probability of having a BH


### Linear Portion

Calculate the velocity dispersion and BH mass values at which our linear curve intersects VDB2016.

In [16]:
transition_sigma = (vdb_gamma0 - gamma0)/(gamma1 - vdb_gamma1)
transition_mass = gamma0 + gamma1*transition_sigma

print('Transition velocity dispersion is: %.2f' % 10**transition_sigma)
print('Transition mass is: %.2E' % 10**transition_mass)

Transition velocity dispersion is: 123.10
Transition mass is: 1.52E+07


Calculate the velocity dispersion at which BHs are ultra-massive, i.e. masses are greater than $10^{10}$

In [17]:
sigma_ultramassive = (10 - gamma0)/gamma1

print('BHs have masses greater than 10^10 at velocity dispersion: %.2f' % 10**sigma_ultramassive)

BHs have masses greater than 10^10 at velocity dispersion: 380.26


# Predictive Checks

## Prior Predictive Checks

In [18]:
# Load data
pydf = np.loadtxt('./prior_samples_full.txt', delimiter=" ", dtype=float)

beta0_priors = pydf[:, 0]
beta1_priors = pydf[:, 1]
gamma0_priors = pydf[:, 2]
gamma1_priors = pydf[:, 3]

# Create a 2x2 grid of subplots
plt.figure(figsize=(10, 8))

# Top row, first subplot
plt.subplot(2, 2, 1)
hist_beta0, _, _ = plt.hist(beta0_priors, color='blue', alpha=0.7)
ymin, ymax = plt.ylim()
# plt.xlim(-1000, 1000)
plt.vlines(beta0, ymin, ymax)
plt.title('Beta0 Priors')

# Top row, second subplot
plt.subplot(2, 2, 2)
hist_beta1, _, _ = plt.hist(beta1_priors, color='orange', alpha=0.7)
ymin, ymax = plt.ylim()
# plt.xlim(-1000, 1000)
plt.vlines(beta1, ymin, ymax)
plt.title('Beta1 Priors')

# Bottom row, first subplot
plt.subplot(2, 2, 3)
hist_gamma0, _, _ = plt.hist(gamma0_priors, color='green', alpha=0.7)
ymin, ymax = plt.ylim()
# plt.xlim(-1000, 1000)
plt.vlines(gamma0, ymin, ymax)
plt.title('Gamma0 Priors')

# Bottom row, second subplot
plt.subplot(2, 2, 4)
hist_gamma1, _, _ = plt.hist(gamma1_priors, color='red', alpha=0.7)
ymin, ymax = plt.ylim()
# plt.xlim(-1000, 1000)
plt.vlines(gamma1, ymin, ymax)
plt.title('Gamma1 Priors')

# Adjust layout for better spacing
plt.tight_layout()

# Show the plot
# plt.savefig('uniform_priors.png')
plt.show()

In [31]:
x_rep_arr = np.loadtxt('./prior_pred_check_x.txt', delimiter=" ", dtype=float)
lin_arr = np.loadtxt('./prior_pred_check_lin.txt', delimiter=" ", dtype=float)
hur_arr = np.loadtxt('./prior_pred_check_hur.txt', delimiter=" ", dtype=float)

In [32]:
# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.xlim(1.0, 2.8)
# plt.ylim(-0.5, 11)

# Plotting simulated data
n_sims = 1000
sims = np.random.randint(0, len(lin_arr), size=n_sims)
x = np.array(data.SIG)
for i in sims:
    x_rep = x_rep_arr[i]
    lin = lin_arr[i]
    hur = hur_arr[i]

    upp_lim_idx = np.where(hur == 0)
    non_upp_lim_idx = np.where(hur == 1)
    plt.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color='orange', marker='v', s=50, edgecolors='black')
    plt.scatter(x_rep[non_upp_lim_idx], lin[non_upp_lim_idx], color='orange', s=36)


# Plotting true data
plt.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
plt.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
plt.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
plt.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

plt.scatter(stars_upplim_zero.SIG, stars_upplim_zero.MBH, color='darkred', marker='v', s=50)

plt.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
plt.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
plt.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
plt.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

plt.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
plt.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
plt.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
plt.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

plt.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
plt.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
plt.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
plt.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)


plt.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
plt.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
plt.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

# Customizing legend to add upper limit marker and remove error bars
ax = plt.gca()

# Customizing font size for axis labels
ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

handles, labels = ax.get_legend_handles_labels()

new_handles = []

for h in handles:
    # only needed to edit the errorbar legend entries
    if isinstance(h, container.ErrorbarContainer):
        new_handles.append(h[0])
    else:
        new_handles.append(h)
        

# upper limit marker
labels.insert(0, 'upper limit') # modify the 3 depending on how many plots
new_handles.insert(0, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))
labels.insert(6, 'simulated') # modify the 3 depending on how many plots
new_handles.insert(6, Line2D([0], [0], color='orange', marker='.', ls=''))

legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
plt.grid(True, which='both', alpha=0.25)

# Plotting and saving
# plt.savefig('uniform_priors_prioronly_fake_data.png')
plt.show()

## Posterior Predictive Checks

In [302]:
x_rep_arr = np.loadtxt('./post_pred_check_x.txt', delimiter=" ", dtype=float)
lin_arr = np.loadtxt('./post_pred_check_lin.txt', delimiter=" ", dtype=float)
hur_arr = np.loadtxt('./post_pred_check_hur.txt', delimiter=" ", dtype=float)

x_rep_arr_10x = np.loadtxt('./post_pred_check_x_10x.txt', delimiter=" ", dtype=float)
lin_arr_10x = np.loadtxt('./post_pred_check_lin_10x.txt', delimiter=" ", dtype=float)
hur_arr_10x = np.loadtxt('./post_pred_check_hur_10x.txt', delimiter=" ", dtype=float)

x_rep_arr_100x = np.loadtxt('./post_pred_check_x_100x.txt', delimiter=" ", dtype=float)
lin_arr_100x = np.loadtxt('./post_pred_check_lin_100x.txt', delimiter=" ", dtype=float)
hur_arr_100x = np.loadtxt('./post_pred_check_hur_100x.txt', delimiter=" ", dtype=float)

n_sims = 1000
# np.random.seed(1)
sims = np.random.randint(0, len(lin_arr), size=n_sims)

color_sim = '#dbbc23'
# https://colorbrewer2.org/#type=sequential&scheme=OrRd&n=3

### Analyzing upper limits

Calculate between what values of velocity dispersion are situated the middle 75% of the upper limits.

In [91]:
tmp_x = np.array([])

for i in sims:
    x_rep = copy.deepcopy(x_rep_arr[i])
    lin = copy.deepcopy(lin_arr[i])
    hur = copy.deepcopy(hur_arr[i])

    upp_lim_idx = np.where(hur == 0)

    tmp_x = np.append(tmp_x, x_rep[upp_lim_idx])

sig1, sig2 = np.percentile(tmp_x, [12.5, 87.5])

print('90 percent of the simulated upper limits fall between stellar velocity dispersion values of %.1f and %.1f km/s.' % (10**sig1, 10**sig2))
print('Using our linear portion, this corresponds to BH masses of %.1E and %.1E solar masses.' % (10**linear(sig1, gamma0, gamma1), 10**linear(sig2, gamma0, gamma1)))

90 percent of the simulated upper limits fall between stellar velocity dispersion values of 36.2 and 152.1 km/s.
Using our linear portion, this corresponds to BH masses of 1.3E+04 and 5.1E+07 solar masses.


### Simulating 10 Points Per Posterior Sample

In [313]:
# Getting simulated data. Putting upper limits at zero
hur_flat = copy.deepcopy(hur_arr[sims]).flatten()
ul_idx_flat = np.where(hur_flat == 0)
lin_flat = copy.deepcopy(lin_arr[sims]).flatten()
lin_flat[ul_idx_flat] = 0

bin_edges = np.linspace(0, 14, num=50)  # Adjust num as needed

# Plotting histograms with common bin edges
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.hist(data.MBH, bins=bin_edges, alpha=0.5, density=True, color='dodgerblue', label='Observed Data')
plt.hist(lin_flat, bins=bin_edges, alpha=0.5, density=True, color=color_sim, label='Simulated Data')

ax = plt.gca()
ax.set_xlim(0, 14)
ax.set_ylim(0, 0.4)
# Customizing font size for axis labels
ax.set_xlabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)
ax.set_ylabel(r'Density', fontsize=25)
ax.legend()

plt.savefig('histogram_comparison.png')

In [312]:
# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 8), sharey=False)

for ax, j in zip(axes, range(len(axes))):
    ax.set_xlim(1.0, 2.8)
    ax.set_ylim(-0.5, 11)

    # Plotting simulated data
    for i in sims:
        x_rep = copy.deepcopy(x_rep_arr[i])
        lin = copy.deepcopy(lin_arr[i])
        hur = copy.deepcopy(hur_arr[i])

        upp_lim_idx = np.where(hur == 0)
        non_upp_lim_idx = np.where(hur == 1)
        
        if j == 1:
            lin[upp_lim_idx] = 0
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color=color_sim, marker='v', s=50, alpha=0.3)
        else:
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color=color_sim, marker='v', s=50, edgecolors='black', alpha=0.3)

        ax.scatter(x_rep[non_upp_lim_idx], lin[non_upp_lim_idx], color=color_sim, s=36, alpha=0.3)

    # Plotting true data
    ax.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
    ax.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
    ax.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
    ax.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

    ax.scatter(stars_upplim_zero.SIG, stars_upplim_zero.MBH, color='darkred', marker='v', s=50)

    ax.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
    ax.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
    ax.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
    ax.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

    ax.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
    ax.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
    ax.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
    ax.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

    ax.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
    ax.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
    ax.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
    ax.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)

    ax.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
    ax.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
    ax.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

    # Customizing font size for axis labels
    ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
    ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

    # Customizing legend to add upper limit marker and remove error bars
    handles, labels = ax.get_legend_handles_labels()

    new_handles = []

    for h in handles:
        # only needed to edit the errorbar legend entries
        if isinstance(h, container.ErrorbarContainer):
            new_handles.append(h[0])
        else:
            new_handles.append(h)

    ax.tick_params(reset=True)
    ax.tick_params(which='major', direction='in', length=10, top=True, right=True, labelsize=20)
    ax.tick_params(which='minor', direction='in', length=5, top=True, right=True)
    ax.xaxis.set_major_locator(MultipleLocator(0.5))
    ax.xaxis.set_major_formatter('{x:.1f}')
    ax.xaxis.set_minor_locator(MultipleLocator(0.1))

    ax.yaxis.set_major_locator(MultipleLocator(2))
    ax.yaxis.set_major_formatter('{x:.0f}')
    ax.yaxis.set_minor_locator(MultipleLocator(0.5))

    # Remove tick label at beginning of x axis
    xticks = ax.xaxis.get_major_ticks()
    xticks[1].set_visible(False)

    # upper limit marker
    labels.insert(0, 'upper limit')  # modify the index depending on how many plots
    new_handles.insert(0, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))
    labels.insert(6, 'simulated')  # modify the index depending on how many plots
    new_handles.insert(6, Line2D([0], [0], color=color_sim, marker='.', ls=''))

    legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
    ax.grid(True, which='both', alpha=0.25)

    # Add a) and b) labels
    if j == 0:
        ax.text(0.02, 1.05, f'a) Logistic Zeros Plotted As Upper Limits', transform=ax.transAxes, va='top', ha='left', fontsize=20)
    else:
        ax.text(0.02, 1.05, f'b) Logistic Zeros Plotted As Galaxies Without BHs', transform=ax.transAxes, va='top', ha='left', fontsize=20)

# Plotting and saving
plt.savefig('comparison_posterior_fake_data.png')
plt.show()

### Simulating 100 Points Per Posterior Sample

In [306]:
# Getting simulated data. Putting upper limits at zero
hur_flat_10x = copy.deepcopy(hur_arr_10x[sims]).flatten()
ul_idx_flat_10x = np.where(hur_flat_10x == 0)
lin_flat_10x = copy.deepcopy(lin_arr_10x[sims]).flatten()
lin_flat_10x[ul_idx_flat_10x] = 0

bin_edges = np.linspace(0, 14, num=50)  # Adjust num as needed

# Plotting histograms with common bin edges
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.hist(data.MBH, bins=bin_edges, alpha=0.5, density=True, color='dodgerblue', label='Observed Data')
plt.hist(lin_flat_10x, bins=bin_edges, alpha=0.5, density=True, color=color_sim, label='Simulated Data')

ax = plt.gca()
ax.set_xlim(0, 14)
ax.set_ylim(0, 0.4)
# Customizing font size for axis labels
ax.set_xlabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)
ax.set_ylabel(r'Density', fontsize=25)
ax.legend()

plt.savefig('histogram_comparison_10x.png')

In [311]:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import container
import numpy as np

# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 8), sharey=False)

for ax, j in zip(axes, range(len(axes))):
    ax.set_xlim(1.0, 2.8)
    ax.set_ylim(-0.5, 11)

    # Plotting simulated data
    for i in sims:
        x_rep = copy.deepcopy(x_rep_arr_10x[i])
        lin = copy.deepcopy(lin_arr_10x[i])
        hur = copy.deepcopy(hur_arr_10x[i])

        upp_lim_idx = np.where(hur == 0)
        non_upp_lim_idx = np.where(hur == 1)
        
        if j == 1:
            lin[upp_lim_idx] = 0
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color=color_sim, marker='v', s=50, alpha=0.3)
        else:
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color=color_sim, marker='v', s=50, edgecolors='black', alpha=0.3)

        ax.scatter(x_rep[non_upp_lim_idx], lin[non_upp_lim_idx], color=color_sim, s=36, alpha=0.3)

    # Plotting true data
    ax.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
    ax.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
    ax.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
    ax.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

    ax.scatter(stars_upplim_zero.SIG, stars_upplim_zero.MBH, color='darkred', marker='v', s=50)

    ax.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
    ax.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
    ax.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
    ax.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

    ax.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
    ax.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
    ax.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
    ax.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

    ax.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
    ax.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
    ax.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
    ax.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)

    ax.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
    ax.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
    ax.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

    # Customizing font size for axis labels
    ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
    ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

    # Customizing legend to add upper limit marker and remove error bars
    handles, labels = ax.get_legend_handles_labels()

    new_handles = []

    for h in handles:
        # only needed to edit the errorbar legend entries
        if isinstance(h, container.ErrorbarContainer):
            new_handles.append(h[0])
        else:
            new_handles.append(h)
            
    ax.tick_params(reset=True)
    ax.tick_params(which='major', direction='in', length=10, top=True, right=True, labelsize=20)
    ax.tick_params(which='minor', direction='in', length=5, top=True, right=True)
    ax.xaxis.set_major_locator(MultipleLocator(0.5))
    ax.xaxis.set_major_formatter('{x:.1f}')
    ax.xaxis.set_minor_locator(MultipleLocator(0.1))

    ax.yaxis.set_major_locator(MultipleLocator(2))
    ax.yaxis.set_major_formatter('{x:.0f}')
    ax.yaxis.set_minor_locator(MultipleLocator(0.5))

    # Remove tick label at beginning of x axis
    xticks = ax.xaxis.get_major_ticks()
    xticks[1].set_visible(False)

    # upper limit marker
    labels.insert(0, 'upper limit')  # modify the index depending on how many plots
    new_handles.insert(0, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))
    labels.insert(6, 'simulated')  # modify the index depending on how many plots
    new_handles.insert(6, Line2D([0], [0], color=color_sim, marker='.', ls=''))

    # ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=False)
    legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
    ax.grid(True, which='both', alpha=0.25)

    # Add a) and b) labels
    if j == 0:
        ax.text(0.02, 1.05, f'a) Logistic Zeros Plotted As Upper Limits', transform=ax.transAxes, va='top', ha='left', fontsize=20)
    else:
        ax.text(0.02, 1.05, f'b) Logistic Zeros Plotted As Galaxies Without BHs', transform=ax.transAxes, va='top', ha='left', fontsize=20)


# Plotting and saving
plt.savefig('comparison_posterior_fake_data_10x.png')
plt.show()


### Simulating 1000 Points Per Posterior Sample

In [314]:
# Getting simulated data. Putting upper limits at zero
hur_flat_100x = copy.deepcopy(hur_arr_100x[sims]).flatten()
ul_idx_flat_100x = np.where(hur_flat_100x == 0)
lin_flat_100x = copy.deepcopy(lin_arr_100x[sims]).flatten()
lin_flat_100x[ul_idx_flat_100x] = 0

bin_edges = np.linspace(0, 14, num=50)  # Adjust num as needed

# Plotting histograms with common bin edges
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.hist(data.MBH, bins=bin_edges, alpha=0.5, density=True, color='dodgerblue', label='Observed Data')
plt.hist(lin_flat_100x, bins=bin_edges, alpha=0.5, density=True, color=color_sim, label='Simulated Data')

ax = plt.gca()
ax.set_xlim(0, 14)
ax.set_ylim(0, 0.4)
# Customizing font size for axis labels
ax.set_xlabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)
ax.set_ylabel(r'Density', fontsize=25)
ax.legend()

plt.savefig('histogram_comparison_100x.png')

In [50]:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import container
import numpy as np

# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Plot labels
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 8), sharey=False)

for ax, j in zip(axes, range(len(axes))):
    ax.set_xlim(1.0, 2.8)
    ax.set_ylim(-0.5, 11)

    # Plotting simulated data
    for i in sims:
        x_rep = copy.deepcopy(x_rep_arr_100x[i])
        lin = copy.deepcopy(lin_arr_100x[i])
        hur = copy.deepcopy(hur_arr_100x[i])

        upp_lim_idx = np.where(hur == 0)
        non_upp_lim_idx = np.where(hur == 1)
        
        if j == 1:
            lin[upp_lim_idx] = 0
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color=color_sim, marker='v', s=50, alpha=0.3)
        else:
            ax.scatter(x_rep[upp_lim_idx], lin[upp_lim_idx], color=color_sim, marker='v', s=50, edgecolors='black', alpha=0.3)

        ax.scatter(x_rep[non_upp_lim_idx], lin[non_upp_lim_idx], color=color_sim, s=36, alpha=0.3)

    # Plotting true data
    ax.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
    ax.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
    ax.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
    ax.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

    ax.scatter(stars_upplim_zero.SIG, stars_upplim_zero.MBH, color='darkred', marker='v', s=50)

    ax.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
    ax.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
    ax.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
    ax.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

    ax.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
    ax.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
    ax.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
    ax.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

    ax.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
    ax.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
    ax.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
    ax.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)

    ax.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
    ax.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
    ax.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

    # Customizing font size for axis labels
    ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
    ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

    # Customizing legend to add upper limit marker and remove error bars
    handles, labels = ax.get_legend_handles_labels()

    new_handles = []

    for h in handles:
        # only needed to edit the errorbar legend entries
        if isinstance(h, container.ErrorbarContainer):
            new_handles.append(h[0])
        else:
            new_handles.append(h)

    ax.tick_params(reset=True)
    ax.tick_params(which='major', direction='in', length=10, top=True, right=True, labelsize=20)
    ax.tick_params(which='minor', direction='in', length=5, top=True, right=True)
    ax.xaxis.set_major_locator(MultipleLocator(0.5))
    ax.xaxis.set_major_formatter('{x:.1f}')
    ax.xaxis.set_minor_locator(MultipleLocator(0.1))

    ax.yaxis.set_major_locator(MultipleLocator(2))
    ax.yaxis.set_major_formatter('{x:.0f}')
    ax.yaxis.set_minor_locator(MultipleLocator(0.5))

    # Remove tick label at beginning of x axis
    xticks = ax.xaxis.get_major_ticks()
    xticks[1].set_visible(False)

    # upper limit marker
    labels.insert(0, 'upper limit')  # modify the index depending on how many plots
    new_handles.insert(0, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))
    labels.insert(6, 'simulated')  # modify the index depending on how many plots
    new_handles.insert(6, Line2D([0], [0], color=color_sim, marker='.', ls=''))

    # ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=False)
    legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
    ax.grid(True, which='both', alpha=0.25)

    # Add a) and b) labels
    if j == 0:
        ax.text(0.02, 1.05, f'a) Logistic Zeros Plotted As Upper Limits', transform=ax.transAxes, va='top', ha='left', fontsize=20)
    else:
        ax.text(0.02, 1.05, f'b) Logistic Zeros Plotted As Galaxies Without BHs', transform=ax.transAxes, va='top', ha='left', fontsize=20)

# Plotting and saving
plt.savefig('comparison_posterior_fake_data_100x.png')
plt.show()