In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
plt.rcParams['xtick.minor.visible'], plt.rcParams['xtick.top'] = True,True 
plt.rcParams['ytick.minor.visible'], plt.rcParams['ytick.right'] = True,True 
plt.rcParams['xtick.direction'], plt.rcParams['ytick.direction'] = 'in','in' 

In [None]:
plt.rcParams['font.size'] = 18 

# Our data are measurements of the velocity dispersion ($\sigma$) and absolute magnitude $m_0$ of elliptical galaxies 

### These come from Schechter (1980), and are used to look at the Faber-Jackson relation, which says that the two properties are related through a power law, i.e. $L\propto\sigma^{\gamma}$, where $\gamma$ is unknown


We read in the data, and convert the 'absolute' magnitude (applying a correction to put it in terms of Solar luminosity

In [None]:
df = np.loadtxt('Sigma_M0.csv',
                delimiter=',',skiprows=1)
sigma = df[:,0]
m0 = df[:,1] 
L = 3e14*np.power(10,-m0/2.5)

log_sigma = np.log10(sigma)
log_L = np.log10(L)

In [None]:
plt.plot(log_sigma,log_L,'.')
plt.xlim(2,2.8)
plt.ylim(9,12)
plt.xlabel(r'$\log_{10}(\sigma/km\, s^{-1})$')
plt.ylabel(r'$\log_{10}(L/L_\odot)$')
plt.show()

## Fit as ordinary least squares $\log L = a + b \log \sigma$

In [None]:
# Functions that I will require: 
# np.matmul() - multiply matrices
# np.linalg.inv() - invert a matrix
A = np.zeros((len(log_sigma),2))
A[:,0] = 1.
A[:,1] = log_sigma
#ab = np.matmul(np.matmul(np.linalg.inv(np.matmul(A.T,A)),A.T),log_L.T)
ab = np.matmul(np.matmul(np.linalg.inv(np.matmul(A.T,A)),A.T),log_L.T)
plt.plot(log_sigma,log_L,'.')
plt.xlim(2,2.8)
plt.ylim(9,12)
plt.xlabel(r'$\log_{10}(\sigma/km\, s^{-1})$')
plt.ylabel(r'$\log_{10}(L/L_\odot)$')

xline = np.array([2,2.8])
yline = ab[0]+ab[1]*xline
plt.plot(xline,yline)

plt.title(f'$L \propto \sigma^{{{ab[1]:4.2f}}}$')
plt.show()


# But also fit as $\log \sigma = c + d \log L$ 

# One might expect a very similar result...


In [None]:
A2 = np.zeros((len(log_L),2))
A2[:,0] = 1.
A2[:,1] = log_L
cd = np.matmul(np.matmul(np.linalg.inv(np.matmul(A2.T,A2)),A2.T),log_sigma.T)


plt.plot(log_L,log_sigma,'.')
plt.ylim(2,2.8)
plt.xlim(9,12)
plt.ylabel(r'$\log_{10}(\sigma/km\, s^{-1})$')
plt.xlabel(r'$\log_{10}(L/L_\odot)$')

yline2 = np.array([9,12])
xline2 = cd[0]+cd[1]*yline
#print('gamma = ', 1./cd[1])

plt.plot(yline2,xline2)
plt.title('Note flip of axes')
plt.title(f'$L \propto \sigma^{{{1/cd[1]:4.2f}}}$')

plt.show()

# Why is this?

## Think about the models we are (implicitly) fitting here: 

$\log L_i = a + b \log \sigma_i + e_i$ 

where $e_i \sim N(0,\sigma)$

OR


$\log \sigma_i = a + b \log L_i + e_i$ 

where $e_i \sim N(0,\sigma)$



In [None]:
f,ax = plt.subplots(1,2,figsize=(12,5))
#plt.errorbar()
# Make up some plausibly-sized errorbars
ax[0].errorbar(log_sigma,log_L,yerr=0.5,fmt='x')
ax[1].errorbar(log_sigma,log_L,xerr=0.1,fmt='x')
for i in [0,1] :
    ax[i].set_xlim(2,2.8)
    ax[i].set_ylim(9,12)
    ax[i].set_xlabel(r'$\log_{10}(\sigma/km\, s^{-1})$')
    ax[i].set_ylabel(r'$\log_{10}(L/L_\odot)$')
ax[0].set_title(r'Fit $\log L = a + b\,\log \sigma$')
ax[1].set_title(r'Fit $\log \sigma = c + d\,\log L$')
plt.tight_layout()

plt.show()

## So, remember that this sort of approach is only valid if we can assume that the uncertainties on one value are negligible (and that this dictates what we fit against what)

I recommend looking at the online guide "Data analysis recipes: Fitting a model to data" by Hogg, Bovy \& Lang (https://arxiv.org/abs/1008.4686) if you want to look in to this more.

Here, there is no direct causal link going from $\sigma$ to $L$ or vice versa: rather, both are governed by the mass of the galaxy (and extra factors). So there are no "dependent" and "independent" variables. What is fitted against what depends on the uncertainties.

### In fact the product of the two gradients is $r^2$, where r is the sample correlation coefficient 

There is a sad history of saying 'well, we should take the line that bisects these two as the true best fit'. This is statistically unjustified. Hogg et al are, rightly, scathing about it. They also talk about more complete (maximum likelihood, or similar) approaches that should be taken.