# Linear Least Square Fitting

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib notebook 

# Declaring variables

Simulation data from synchroton.

In [2]:
synch_data = pd.read_csv('Synch_spectrum.txt',sep = "\s+", names = ['Frequency (Hz)','Intensity (erg cm-2 s-1 sr-1 Hz-1)'],skiprows = [0,1])
xpoints = synch_data['Frequency (Hz)']
ypoints = synch_data['Intensity (erg cm-2 s-1 sr-1 Hz-1)']

print(xpoints)

0     5.520614e+07
1     6.624737e+07
2     7.949685e+07
3     9.539622e+07
4     1.144755e+08
5     1.373706e+08
6     1.648447e+08
7     1.978136e+08
8     2.373763e+08
9     2.848516e+08
10    3.418219e+08
11    4.101863e+08
12    4.922235e+08
Name: Frequency (Hz), dtype: float64


In [3]:
#points for cubic fit
# xpoints = np.array([-3,-2,-1,0,1,1.5,2,2.5, 3]) 
# ypoints = np.array([-24,-5,2,-5,4,7,15,20,30])

xmatrix= [] #linear
xmatrix_quad = [] #quadratic
xmatrix_cub = [] #cubic
xmatrix_pow = [] #power function
yval_log = [] #y vector for power
sigma_sq = []

total = np.sum(xpoints)
avg_x = total / xpoints.size
one_arr = np.ones([xpoints.size,1]) #array of ones which is concatenated to matrices
arr = xpoints[np.arange(xpoints.size)].values.reshape((xpoints.size,1))

# Determining x matrices and y vector

Here, I am taking the x and y points and adding them to their respective matrices. Note that the 'xmatrix' is equivalent to the A matrix in Adrian's notes. For a linear or quadratic x matrix:

$$X_{linear} = \begin{bmatrix}
1 & x_{1} \\
1 & x_{2} \\
\vdots & \vdots \\
\end{bmatrix}, \quad
X_{quadratic} = \begin{bmatrix}
1 & x_{1} & x_{1}^{2}\\
1 & x_{2} & x_{2}^{2} \\
\vdots & \vdots & \vdots\\
\end{bmatrix}
$$

For a cubic function just add another column for $x^3$. While the y vector is just the y points:
$\bar{y} = \begin{bmatrix}
y_{1} \\
y_{2} \\
\vdots \\
\end{bmatrix}
$

In [4]:
for n in np.arange(xpoints.size):
    xmatrix.append([1.0,xpoints[n]]) #adding x points to x matrix for linear

for n in np.arange(xpoints.size):
    xmatrix_quad.append([1.0,xpoints[n],xpoints[n]**2]) #adding x points to x matrix for quadratic 
    
for n in np.arange(xpoints.size):
    xmatrix_cub.append([1.0,xpoints[n],xpoints[n]**2,xpoints[n]**3]) #adding x points to x matrix for cubic 
    
# changing to nparray
xmatrix = np.array(xmatrix) 
xmatrix_quad = np.array(xmatrix_quad) 
xmatrix_cub = np.array(xmatrix_cub)

# print(xmatrix_quad)

In [5]:
# a = np.array([1,xpoints[np.arange(xpoints.size)]])
# When you compute xmatrix, see if you can come up with a slick way of doing it that doesn’t require using any loops, 
# starting from xpoints. What you have is fine, but this is good practice for when you deal with lots of data. 
# (Loops are slow when you have really big arrays). Hint: np.vstack or np.hstack might be helpful.
    
xmatrix = np.hstack([one_arr, arr])
xmatrix_quad = np.hstack([xmatrix,arr**2])
xmatrix_cub = np.hstack([xmatrix_quad,arr**3])

print(xmatrix_cub)

[[1.00000000e+00 5.52061439e+07 3.04771832e+15 1.68252776e+23]
 [1.00000000e+00 6.62473727e+07 4.38871439e+15 2.90740797e+23]
 [1.00000000e+00 7.94968472e+07 6.31974872e+15 5.02400098e+23]
 [1.00000000e+00 9.53962166e+07 9.10043815e+15 8.68147369e+23]
 [1.00000000e+00 1.14475460e+08 1.31046309e+16 1.50015865e+24]
 [1.00000000e+00 1.37370552e+08 1.88706685e+16 2.59227415e+24]
 [1.00000000e+00 1.64844662e+08 2.71737627e+16 4.47944974e+24]
 [1.00000000e+00 1.97813595e+08 3.91302183e+16 7.74048915e+24]
 [1.00000000e+00 2.37376314e+08 5.63475144e+16 1.33755652e+25]
 [1.00000000e+00 2.84851577e+08 8.11404207e+16 2.31129768e+25]
 [1.00000000e+00 3.41821892e+08 1.16842206e+17 3.99392238e+25]
 [1.00000000e+00 4.10186270e+08 1.68252776e+17 6.90149788e+25]
 [1.00000000e+00 4.92223524e+08 2.42283998e+17 1.19257883e+26]]


# Power Law

This is an attempt to fit a power law in the form of $y = \beta x^\alpha$. To fit the power law, it must be linearized since, according to Adrian's notes, the "linear part of the term 'linear fit' just means linear in the parameters". One way to do that (based on online searching) is by applying log to both sides to make:

$log(y) = log (\beta x^\alpha) = log(\beta) + log(x^\alpha) = log(\beta) + \alpha log(x)$

Therefore, the linearization of $y = \beta x^\alpha$ is $log(y) = log(\beta) + \alpha log(x)$. Let $y^{'}=log(y)$ and $x^{'}=log(x)$ so that $y^{'} = log(\beta) + \alpha x^{'}$.

With this, we can pretty much proceed as with the linear case but here the x matrix and y vector will be:

$$X_{power} = \begin{bmatrix}
1 & log(x_{1}) \\
1 & log(x_{2}) \\
\vdots & \vdots \\
\end{bmatrix}, \quad
\bar{y} = \begin{bmatrix}
log(y_{1}) \\
log(y_{2}) \\
\vdots \\
\end{bmatrix}
$$

In [6]:
# for n in np.arange(xpoints.size):
#     xmatrix_pow.append([1.0, np.log(xpoints[n])]) #adding x points to x matrix for power function 
    
# #changing to nparray
# xmatrix_pow = np.array(xmatrix_pow) 
# print(xmatrix_pow)

xmatrix_pow = np.hstack([one_arr,np.log(arr)])

# for n in np.arange(ypoints.size):
#     yval_log.append(np.log(ypoints[n]))
    
# print(yval_log)

yval_log = np.log(ypoints[np.arange(ypoints.size)].values.reshape((ypoints.size,1)))
yval_log = np.hstack(yval_log)

[-39.66949574 -39.78246506 -39.89980601 -40.02139321 -40.14719978
 -40.27726338 -40.41166159 -40.55049346 -40.6938654  -40.84187993
 -40.99462639 -41.15217298 -41.31455971]


# Noise covariance matrix

This method is based on Adrian's notes to determing the $\hat{x}$ for $y^{model} = A\hat{x}$. From his notes, $\hat{x}$ is defined as $\hat{x} = [A^TN^{-1}A]^{-1}A^TN^{-1}\bar{y}$.

From the earlier code, I already found $A$ = xmatrix and $\bar{y}$. To find $N$, I first need to find the variance or $\sigma^{2}$. The $N$ matrix is as follows:
$$ N = \begin{pmatrix}
\sigma_{1}^{2} & 0 & 0 &\ldots{} \\
0 & \sigma_{2}^{2} & 0 & \ldots{} \\
0 & 0 & \sigma_{3}^{2} & \ldots{} \\
\vdots & \vdots & \ddots \\
\end{pmatrix} $$
To find $\sigma^{2}$, I used the following formula:
$$\sigma^{2} = \frac{1}{N} \sum_{i}^{N}{(x_{i} - \mu)^{2}}$$
where $N$ is the number of terms and $\mu$ is the mean.

In [7]:
# for n in np.arange(xpoints.size):
#         sigma_sq.append(((ypoints[n] - avg_x)**2)/(xpoints.size))

# sigma_sq= np.array(sigma_sq) #sigma squared or variance
# print(sigma_sq)

sigma_sq =(((ypoints[np.arange(ypoints.size)] - avg_x)**2)/(xpoints.size)).values.reshape((ypoints.size,1))
sigma_sq = np.hstack(sigma_sq)
print(sigma_sq)
noise_cov = np.diag(sigma_sq)

[3.26262682e+15 3.26262682e+15 3.26262682e+15 3.26262682e+15
 3.26262682e+15 3.26262682e+15 3.26262682e+15 3.26262682e+15
 3.26262682e+15 3.26262682e+15 3.26262682e+15 3.26262682e+15
 3.26262682e+15]


Since I have determined the values ($A, N, \bar{y}$) to find $\hat{x}$, it's just a matter of multiplying everything for $\hat{x} = [A^TN^{-1}A]^{-1}A^TN^{-1}\bar{y}$. I broke down the steps of the process:
<ol>
<li>$A^TN^{-1}$</li>
<li>$A^TN^{-1}\bar{y}$</li>
<li>$[A^TN^{-1}A]^{-1}$</li>
<li>$\hat{x} = [A^TN^{-1}A]^{-1}A^TN^{-1}\bar{y}$</li>
<li>$y^{model} = A\hat{x}$</li>
</ol>	


In [8]:
#returns y_model
def variance(m,yval):
    dot_matrix = np.dot(m.T,np.linalg.inv(noise_cov)) #Step 1
    doty_matrix = np.dot(dot_matrix,yval) #Step 2
    inv_matrix = np.linalg.inv(np.dot(dot_matrix,m)) #Step 3
    x_bar = np.dot(inv_matrix, doty_matrix) #Step 4
    y_model = np.dot(m, x_bar) #Step 5
    return y_model

# Error Covariance

$V= [A^{T}N^{-1}A]^{-1}$. The square root of the diagonal of $V$ gives the error bar of each parameter.

In [17]:
def error_cov(a_matrix):
    dot_matrix = np.dot(a_matrix.T,np.linalg.inv(noise_cov))
    error = np.linalg.inv(np.dot(dot_matrix,a_matrix))
    return error

def error_bar(a_matrix):
    return np.sqrt(np.diag(error_cov(xmatrix)))

print(error_cov(xmatrix))
print('\n',error_cov(xmatrix_quad))
print('\n',error_cov(xmatrix_cub))
print('\n',error_cov(xmatrix_pow))

print('\n',error_bar(xmatrix))
print('\n',error_bar(xmatrix_quad))
print('\n',error_bar(xmatrix_cub))
print('\n',error_bar(xmatrix_pow))

[[ 8.40785452e+14 -2.86391297e+06]
 [-2.86391297e+06  1.39060709e-02]]

 [[ 2.80242267e+15 -2.49842340e+07  4.29027501e-02]
 [-2.49842340e+07  2.63344957e-01 -4.83791089e-10]
 [ 4.29027501e-02 -4.83791089e-10  9.38321290e-19]]

 [[ 9.96136031e+15 -1.52337983e+08  6.15158988e-01 -7.13809466e-10]
 [-1.52337983e+08  2.52890141e+00 -1.06639297e-08  1.26982963e-17]
 [ 6.15158988e-01 -1.06639297e-08  4.66821478e-17 -5.70590135e-26]
 [-7.13809466e-10  1.26982963e-17 -5.70590135e-26  7.11731236e-35]]

 [[ 1.93308099e+17 -1.02035878e+16]
 [-1.02035878e+16  5.39287026e+14]]

 [2.89963007e+07 1.17924005e-01]

 [2.89963007e+07 1.17924005e-01]

 [2.89963007e+07 1.17924005e-01]

 [2.89963007e+07 1.17924005e-01]


# Calling Functions and Graphing

In [19]:
plt.figure(1)

lin_fit = variance(xmatrix, ypoints)
quad_fit = variance(xmatrix_quad, ypoints)
cub_fit = variance(xmatrix_cub, ypoints)
pow_fit = variance(xmatrix_pow, yval_log)

plt.plot(xpoints, lin_fit, label='linear')
plt.plot(xpoints, quad_fit, label='quadratic')
plt.plot(xpoints, cub_fit, label='cubic')
plt.scatter(xpoints,ypoints, color='m')

plt.legend(loc='lower left')
plt.title("Linear Best Fit")
plt.xlabel(xpoints.name)
plt.ylabel(ypoints.name)

plt.figure(2)

plt.scatter(np.log(xpoints),yval_log, color='m')
plt.plot(np.log(xpoints), pow_fit, label='power')

plt.title("Linear Best Fit for Power (log-log)")
plt.xlabel(xpoints.name)
plt.ylabel(ypoints.name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Intensity (erg cm-2 s-1 sr-1 Hz-1)')