In [None]:

import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
import scipy.stats
plt.rcParams['xtick.minor.visible'], plt.rcParams['xtick.top'] = True,True 
plt.rcParams['ytick.minor.visible'], plt.rcParams['ytick.right'] = True,True 
plt.rcParams['xtick.direction'], plt.rcParams['ytick.direction'] = 'in','in' 
plt.rcParams['font.size'] = 18 

# RV data for 3 stars in Draco dwarf Galaxy from Aaronson 1983

![](./720px-Draco_Dwarf_Spheroidal_\(2024-017\).jpg)

Above: The Draco dwarf galaxy, By Science NASA, ESA, Eduardo Vitral (STScI), Roeland van der Marel (STScI), Sangmo Tony Sohn (STScI), DSS Image Processing: Joseph DePasquale (STScI) - Draco Dwarf Spheroidal, Public Domain, https://commons.wikimedia.org/w/index.php?curid=150260051

In [None]:

V = np.array([-298.15,-300.2,-279.7])
n = len(V)

def logl(mu,sig): 
    '''Draco log likelihood for parameters mu, sig'''
    return - n * np.log(sig) - np.sum((V-mu)**2)/(2.*sig**2)


## point estimates from setting derivatives = 0


In [None]:

muhat = np.sum(V)/n
sigmahat = np.sqrt(np.sum((V-muhat)**2)/n)
print(muhat, sigmahat)

## interval estimates 1: Cramér–Rao bound


In [None]:

sigma_mu = sigmahat/np.sqrt(n)
sigma_sigma = sigmahat/np.sqrt(2*n)

print(f'Mean   = {muhat} +/- {sigma_mu}')
print(f'Stddev = {sigmahat} +/- {sigma_sigma}')

### grid calculation for logl


In [None]:

mulist,siglist = np.linspace(-310,-270,100), np.linspace(3,30,100)

MU,SIG = np.meshgrid(mulist,siglist,indexing='ij')
MU = MU.flatten()
SIG = SIG.flatten()
LOGL = np.zeros_like(MU)
for i in range(len(SIG)) :
    LOGL[i] = logl(MU[i],SIG[i])
    
    


### first estimate plotted over the logl grid

Note the uncertainty interval is symmetric but the likelihood clearly isn't

In [None]:
    
plt.scatter(MU,SIG,c=LOGL,vmin=-10)
plt.errorbar([muhat],[sigmahat],[sigma_sigma],[sigma_mu])
plt.colorbar()
plt.xlabel('mean v [km/s]')
plt.ylabel('velocity dispersion [km/s]')
plt.show()

## Confidence region estimate with Wilks' Theorem


In [None]:
# Use the Percent Point Function (ppf) of scipy
Finv = scipy.stats.chi2.ppf(0.68,2)

Deltal = 0.5*Finv

maxl = np.max(LOGL)
CL68 = maxl-Deltal

### add the Wilks' theorem estimate to the plot

Note it's broader than the first, which was at the Cramér–Rao bound (smallest possible uncertainties)

It also follows the asymmetric shape of the liklihood function (CR bound was symmetric)

In [None]:
    
plt.scatter(MU,SIG,c=LOGL,vmin=-10)
plt.errorbar([muhat],[sigmahat],[sigma_sigma],[sigma_mu])
plt.colorbar()

plt.contour(mulist,siglist,LOGL.reshape((len(mulist),len(siglist))).T,[CL68])

plt.xlabel('mean v [km/s]')
plt.ylabel('velocity dispersion [km/s]')
plt.show()



## Bayesian credible interval


We have to calculate the posterior $\propto$ likelihood (with uniform priors), and find the region enclosing 68% of the pdf

In [None]:

L = np.exp(LOGL)
sumL = np.sum(L)
posterior = L/sumL
sort = np.sort(posterior)
cs = np.cumsum(sort)
CL68bayes = np.log(np.interp(0.32,cs,sort)*sumL)
CL95bayes = np.log(np.interp(0.05,cs,sort)*sumL)
print(np.sum(L))
print(CL68bayes)
print(CL95bayes)
print(np.exp(CL68bayes))
print(np.exp(CL95bayes))

### add Bayesian credible region (in white)

Note the Wilks' Theorem estimate assumed a large number of data points but here N=3. The Bayesian estimate is exact. But each have different interpretations

In [None]:

plt.scatter(MU,SIG,c=L)
plt.errorbar([muhat],[sigmahat],[sigma_sigma],[sigma_mu])
plt.colorbar()

plt.contour(mulist,siglist,LOGL.reshape((len(mulist),len(siglist))).T,[CL68])
plt.contour(mulist,siglist,LOGL.reshape((len(mulist),len(siglist))).T,[CL68bayes],colors='w')
plt.contour(mulist,siglist,LOGL.reshape((len(mulist),len(siglist))).T,[CL95bayes],colors='w',linestyles='dotted')

plt.xlabel('mean v [km/s]')
plt.ylabel('velocity dispersion [km/s]')
plt.show()