In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import container
from matplotlib.lines import Line2D
from matplotlib.collections import PolyCollection
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
from scipy.stats import bernoulli, norm
%matplotlib qt

In [2]:
full_data = pd.read_csv('../../Data/BHcompilation_updated.csv', encoding='ISO-8859-1')

In [3]:
# Data for fitting
data = full_data.loc[full_data.SELECTED == 1]

# Normalizing uncertainties to 3-sigma values
data.loc[data.CONFLEVEL == 1, 'DMBH'] = data.DMBH*3
data.loc[data.CONFLEVEL == 2, 'DMBH'] = data.DMBH*(3/2)

# Reset the indices to avoid KeyError's later on
data = data.reset_index(drop=True)

In [4]:
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.xlabel(r'$\sigma$ log(km/s)', fontsize='x-large', fontweight='bold')
plt.ylabel(r'$M_\bullet$ log(M$_\odot$)', fontsize='x-large', fontweight='bold')
plt.scatter(data.SIG, data.MBH, label='data')
plt.show()

libGL error: MESA-LOADER: failed to open iris: /usr/lib/dri/iris_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: iris
libGL error: MESA-LOADER: failed to open iris: /usr/lib/dri/iris_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: iris
libGL error: MESA-LOADER: failed to open swrast: /usr/lib/dri/swrast_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: swrast


In [5]:
# logistic function
def logit(x, beta0, beta1):
    return (1/(1+np.exp(-(beta0 + beta1*x))))
  
# log_linear function
def linear(x, gamma0, gamma1):
    return gamma0 + gamma1*x
  
# hurdle model functional form
def hurdle(x, beta0, beta1, gamma0, gamma1):
    return logit(x, beta0, beta1)*linear(x, gamma0, gamma1)

# hurdle model functional form
def hurdle_brms(x, beta0, beta1, gamma0, gamma1):
    return (1-logit(x, beta0, beta1))*linear(x, gamma0, gamma1)

In [6]:
samples = np.loadtxt('./samples.txt', delimiter=" ", dtype=float)

# Calculate the median for each parameter
medians = np.median(samples, axis=0)

# Calculate the 95% credible interval for each parameter
credible_interval_low = np.percentile(samples, 2.5, axis=0)
credible_interval_high = np.percentile(samples, 97.5, axis=0)

beta0 = medians[0]
beta1 = medians[1]
gamma0 = medians[2]
gamma1 = medians[3]

beta0_err = max(credible_interval_high[0] - beta0, beta0 - credible_interval_low[0])
beta1_err = max(credible_interval_high[1] - beta1, beta1 - credible_interval_low[1])
gamma0_err = max(credible_interval_high[2] - gamma0, gamma0 - credible_interval_low[2])
gamma1_err = max(credible_interval_high[3] - gamma1, gamma1 - credible_interval_low[3])

errors = np.array([beta0_err, beta1_err, gamma0_err, gamma1_err])

# Print or use the results as needed
for param, median, low, high, error in zip(range(1, 5), medians, credible_interval_low, credible_interval_high, errors):
    print(f"Parameter {param}: Median = {median}, 95% Credible Interval = ({low}, {high}, Estimated error = {error})")

Parameter 1: Median = -0.0032363818538583, 95% Credible Interval = (-7.437722307151463, 0.5874964673294943, Estimated error = 7.434485925297605)
Parameter 2: Median = 0.10404228195641, 95% Credible Interval = (0.07308558331662403, 5.97645068542675, Estimated error = 5.87240840347034)
Parameter 3: Median = 1.8054118779549666, 95% Credible Interval = (-6.5471565621515335, 3.956706320533861, Estimated error = 8.3525684401065)
Parameter 4: Median = 3.033956430311785, 95% Credible Interval = (1.6435809963364638, 6.475195837418021, Estimated error = 3.441239407106236)


In [7]:
# Data omitted from fits
omitted_upplim = full_data.loc[(full_data.SELECTED != 1) & (full_data.UPPERLIMIT == 1)]
omitted_true = full_data.loc[(full_data.SELECTED != 1) & (full_data.UPPERLIMIT == 0)]

# Fit data
stars_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.MBH > 0) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]
stars_upplim_zero = full_data.loc[(full_data.SELECTED == 1) & (full_data.MBH == 0) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]
stars_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & ((full_data.TYPE == 'star') | (full_data.TYPE == 'stars'))]

gas_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & ((full_data.TYPE == 'gas') | (full_data.TYPE == 'CO'))]
gas_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & ((full_data.TYPE == 'gas') | (full_data.TYPE == 'CO'))]

reverb_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & (full_data.TYPE == 'reverb')]
reverb_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & (full_data.TYPE == 'reverb')]

maser_upplim = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 1) & (full_data.TYPE == 'maser')]
maser_true = full_data.loc[(full_data.SELECTED == 1) & (full_data.UPPERLIMIT == 0) & (full_data.TYPE == 'maser')]

In [8]:
pydf = np.loadtxt('./samples_full.txt', delimiter=" ", dtype=float)
n_samples = pydf.shape[0]
n_chains = 4
steps = np.arange(n_samples//n_chains)

sigma_plot_values = np.linspace(1, 2.8, num=100)
y = np.empty((len(pydf), 100))

for i in range(len(pydf)):
    y[i] = hurdle(sigma_plot_values, pydf[i][0], pydf[i][1], pydf[i][2], pydf[i][3])

stan_mean_full = np.median(y, axis=0)
stan_lower_full = np.quantile(y, 0.975, axis=0)
stan_upper_full = np.quantile(y, 0.025, axis=0)

# Split chains
chain1 = pydf[n_samples//n_chains * 0:n_samples//n_chains * 1, :]
chain2 = pydf[n_samples//n_chains * 1:n_samples//n_chains * 2, :]
chain3 = pydf[n_samples//n_chains * 2:n_samples//n_chains * 3, :]
chain4 = pydf[n_samples//n_chains * 3:n_samples//n_chains * 4, :]

#################################
# Create subplots for first param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 0], label='Chain 1')
axs[1].plot(steps, chain2[:, 0], label='Chain 2')
axs[2].plot(steps, chain3[:, 0], label='Chain 3')
axs[3].plot(steps, chain4[:, 0], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 0]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 0]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 0]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 0]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 0]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\beta_0$')
axs[1].set_ylabel(r'$\beta_0$')
axs[2].set_ylabel(r'$\beta_0$')
axs[3].set_ylabel(r'$\beta_0$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\beta_0$')
plt.show()

plt.savefig('beta0_post.png')

##################################
# Create subplots for second param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 1], label='Chain 1')
axs[1].plot(steps, chain2[:, 1], label='Chain 2')
axs[2].plot(steps, chain3[:, 1], label='Chain 3')
axs[3].plot(steps, chain4[:, 1], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 1]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 1]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 1]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 1]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 1]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\beta_1$')
axs[1].set_ylabel(r'$\beta_1$')
axs[2].set_ylabel(r'$\beta_1$')
axs[3].set_ylabel(r'$\beta_1$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\beta_1$')
plt.show()

plt.savefig('beta1_post.png')

#################################
# Create subplots for third param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 2], label='Chain 1')
axs[1].plot(steps, chain2[:, 2], label='Chain 2')
axs[2].plot(steps, chain3[:, 2], label='Chain 3')
axs[3].plot(steps, chain4[:, 2], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 2]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 2]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 2]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 2]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 2]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\gamma_0$')
axs[1].set_ylabel(r'$\gamma_0$')
axs[2].set_ylabel(r'$\gamma_0$')
axs[3].set_ylabel(r'$\gamma_0$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\gamma_0$')
plt.show()

plt.savefig('gamma0_post.png')

##################################
# Create subplots for fourth param
fig, axs = plt.subplots(4, 1, sharex=True, figsize=(8, 10))

# Plot data on each subplot
axs[0].plot(steps, chain1[:, 3], label='Chain 1')
axs[1].plot(steps, chain2[:, 3], label='Chain 2')
axs[2].plot(steps, chain3[:, 3], label='Chain 3')
axs[3].plot(steps, chain4[:, 3], label='Chain 4')

axs[0].hlines(np.mean(chain1[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain1[:, 3]):.2f}', linestyle='dashed')
axs[1].hlines(np.mean(chain2[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain2[:, 3]):.2f}', linestyle='dashed')
axs[2].hlines(np.mean(chain3[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain3[:, 3]):.2f}', linestyle='dashed')
axs[3].hlines(np.mean(chain4[:, 3]), steps[0], steps[-1], color='red', label=f'Mean = {np.mean(chain4[:, 3]):.2f}', linestyle='dashed')

# Add labels and legends
axs[0].set_ylabel(r'$\gamma_1$')
axs[1].set_ylabel(r'$\gamma_1$')
axs[2].set_ylabel(r'$\gamma_1$')
axs[3].set_ylabel(r'$\gamma_1$')

axs[3].set_xlabel('Step')

# Add legend to the last subplot
axs[0].legend(loc='upper right')
axs[1].legend(loc='upper right')
axs[2].legend(loc='upper right')
axs[3].legend(loc='upper right')
axs[0].set_title(r'Chain convergence for $\gamma_1$')
plt.show()

plt.savefig('gamma1_post.png')

In [None]:
pydf = np.loadtxt('./samples.txt', delimiter=" ", dtype=float)

sigma_plot_values = np.linspace(1, 2.8, num=100)
y = np.empty((len(pydf), 100))
lin = np.empty((len(pydf), 100))

for i in range(len(pydf)):
    y[i] = hurdle(sigma_plot_values, pydf[i][0], pydf[i][1], pydf[i][2], pydf[i][3])
    lin[i] = linear(sigma_plot_values, pydf[i][2], pydf[i][3])

stan_mean = np.median(y, axis=0)
stan_lower = np.quantile(y, 0.975, axis=0)
stan_upper = np.quantile(y, 0.025, axis=0)
lin_mean = np.median(lin, axis=0)

In [None]:
# Velocity dispersion (sigma) values for plotting
sigma_plot_values = np.linspace(1, 2.8, num=100)

# Calculating BH masses for VDB 2016
y_2016 = -4 + 5.35*sigma_plot_values # coefficients from Remco Van Den Bosch 2016

# Plot labels
fig = plt.figure(figsize=(10,8), layout='constrained')
plt.xlim(1.0, 2.8)
plt.ylim(-0.5, 11)

# Plotting average fit with previous ones
plt.plot(sigma_plot_values, stan_mean, color='black', label='Hurdle Model') # hurdle fit
plt.plot(sigma_plot_values, lin_mean, color='black', linestyle='dashed', label='Linear Portion') # linear portion
plt.plot(sigma_plot_values, y_2016, color='black', linestyle='dotted', label='van den Bosch 2016')
plt.fill_between(sigma_plot_values, stan_lower, stan_upper, alpha=0.15, color='black', label='95% Credible Interval') # CI
# plt.fill_between(sigma_plot_values, y_2016-3*0.49, y_2016+3*0.49, alpha=0.15, color='black')
# plt.plot(sigma_plot_values, y_2016+0.49, color='dimgray', linestyle='dashed', alpha=0.7)
# plt.plot(sigma_plot_values, y_2016-0.49, color='dimgray', linestyle='dashed', alpha=0.7)
# plt.plot(sigma_plot_values, y_2016+3*0.49, color='dimgray', linestyle='dotted', alpha=0.7)
# plt.plot(sigma_plot_values, y_2016-3*0.49, color='dimgray', linestyle='dotted', alpha=0.7)

# Plotting data
plt.errorbar(stars_upplim.SIG, stars_upplim.MBH, stars_upplim.DMBH, ls='', lw=0.75, color='darkred', marker='.')
plt.scatter(stars_upplim.SIG, stars_upplim.MBH, color='darkred', marker='v', s=50)
plt.errorbar(stars_true.SIG, stars_true.MBH, stars_true.DMBH, ls='', lw=0.75, color='darkred', marker='.', label='stars')
plt.scatter(stars_true.SIG, stars_true.MBH, color='darkred', s=36)

plt.scatter(stars_upplim_zero.SIG, stars_upplim_zero.MBH, color='darkred', marker='v', s=50)

plt.errorbar(gas_upplim.SIG, gas_upplim.MBH, gas_upplim.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.')
plt.scatter(gas_upplim.SIG, gas_upplim.MBH, color='dodgerblue', marker='v', s=50)
plt.errorbar(gas_true.SIG, gas_true.MBH, gas_true.DMBH, ls='', lw=0.75, color='dodgerblue', marker='.', label='gas')
plt.scatter(gas_true.SIG, gas_true.MBH, color='dodgerblue', s=36)

plt.errorbar(reverb_upplim.SIG, reverb_upplim.MBH, reverb_upplim.DMBH, ls='', lw=0.75, color='black', marker='.')
plt.scatter(reverb_upplim.SIG, reverb_upplim.MBH, color='black', marker='v', s=50)
plt.errorbar(reverb_true.SIG, reverb_true.MBH, reverb_true.DMBH, ls='', lw=0.75, color='black', marker='.', label='reverberation')
plt.scatter(reverb_true.SIG, reverb_true.MBH, color='black', s=36)

plt.errorbar(maser_upplim.SIG, maser_upplim.MBH, maser_upplim.DMBH, ls='', lw=0.75, color='forestgreen', marker='.')
plt.scatter(maser_upplim.SIG, maser_upplim.MBH, color='forestgreen', marker='v', s=50)
plt.errorbar(maser_true.SIG, maser_true.MBH, maser_true.DMBH, ls='', lw=0.75, color='forestgreen', marker='.', label='maser')
plt.scatter(maser_true.SIG, maser_true.MBH, color='forestgreen', s=36)


plt.scatter(omitted_upplim.SIG, omitted_upplim.MBH, color='grey', marker='v', s=50)
plt.errorbar(omitted_true.SIG, omitted_true.MBH, ls='', lw=0.75, color='grey', marker='.', label='omitted')
plt.scatter(omitted_true.SIG, omitted_true.MBH, color='grey', s=36)

# Customizing legend to add upper limit marker and remove error bars
ax = plt.gca()

ax.text(1.60, 1.70, '$\\log(M_\\bullet) = p(%.2f^{\\pm%.1f}+%.2f^{\\pm%.1f}\\log(\\sigma))$' % (gamma0, gamma0_err, gamma1, gamma1_err), fontsize=21)
ax.text(1.80, 0.70, '$p = \\frac{1}{1+exp{(-(%.2f^{\\pm%.1f}+%.2f^{\\pm%.2f}\\log(\\sigma)))}}$' % (beta0, beta0_err, beta1, beta1_err), fontsize=22)

# Customizing font size for axis labels
ax.set_xlabel(r'velocity dispersion $\sigma$ (log km/s)', fontsize=25)
ax.set_ylabel(r'BH mass $M_\bullet$ (log $M_\odot$)', fontsize=25)

handles, labels = ax.get_legend_handles_labels()

new_handles = []

for h in handles:
    # only needed to edit the errorbar legend entries
    if isinstance(h, container.ErrorbarContainer):
        new_handles.append(h[0])
    else:
        new_handles.append(h)
        
# Customizing tick marks
ax.tick_params(reset=True)
ax.tick_params(which='major', direction='in', length=10, top=True, right=True, labelsize=20)
ax.tick_params(which='minor', direction='in', length=5, top=True, right=True)
ax.xaxis.set_major_locator(MultipleLocator(0.5))
ax.xaxis.set_major_formatter('{x:.1f}')
ax.xaxis.set_minor_locator(MultipleLocator(0.1))

ax.yaxis.set_major_locator(MultipleLocator(2))
ax.yaxis.set_major_formatter('{x:.0f}')
ax.yaxis.set_minor_locator(MultipleLocator(0.5))

# Remove tick label at beginning of x axis
xticks = ax.xaxis.get_major_ticks()
xticks[1].set_visible(False)

# 95% credible interval
# labels.insert(3, 'Density') # modify the 2 depending on how many plots
# new_handles.insert(3, PolyCollection(verts=np.zeros((1,2,2)), color='lightgrey'))

# upper limit marker
labels.insert(4, 'upper limit') # modify the 3 depending on how many plots
new_handles.insert(4, Line2D([0], [0], color='black', marker='v', ls='', markersize=4))


legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=True, facecolor='white')
plt.grid(True, which='both', alpha=0.25)
# legend = ax.legend(new_handles, labels, markerscale=2.5, loc='upper left', fontsize='large', frameon=False) # test different


# Plotting and saving
# plt.savefig('../../Figures/Levy_Upper_Lims.png')
plt.savefig('Levy_Upper_Lims.png')
plt.show()