In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import scipy.optimize as optimize
import datetime

In [None]:
data = np.loadtxt(open("../data/Vela_Flux.txt", 'rb'), usecols=range(7))

# This is how we pull out the data from columns in the array.

# This is the date in "Mission Elapsesd Time"
# For the Fermi mission, this is defined to be the number of seconds since the start of 2001.
date_MET = data[:,0]
# This is the offset in seconds between the Fermi "MET" and the UNIX "epoch" used by matplotlib
MET_To_Unix = 978336000

# These are the numbers of photons observed from Vela each week in the "low" Energy Band (100 MeV - 800 MeV)
nObs_LE = data[:,1]

# These are the number of photons expected from Vela each week, under the assumption that it is 
# not varying at all, and the only differences depend on how long we spent looking at Vela
# that particular weeek
nExp_LE = data[:,2]

# These are the band bounds, in MeV
LE_bounds = (100., 800.)

# This is the "significance" of the variation for each week.  We will discuss this more later
signif_LE = data[:,3]

nObs_HE = data[:,4]
nExp_HE = data[:,5]
signif_HE = data[:6]
HE_bounds = (800., 10000.)

# This converts the dates to something that matplotlib understands
dates = [datetime.datetime.fromtimestamp(date + MET_To_Unix) for date in date_MET]


In [None]:
excess_counts = nObs_LE-nExp_LE
_ = plt.scatter(dates, excess_counts)
_ = plt.xlabel(r"Date [year]]")
_ = plt.ylabel(r"$n_{\rm obs}$ [per week]")

In [None]:
date_YEAR = 2001 +  (date_MET / (24*3600*365))

In [None]:
print("The mean and standard deviation of the observation time are %0.2f, %0.2f" 
      % (np.mean(date_YEAR), np.std(date_YEAR)))
print("The variance is the sqaure of the standard deviation: %0.2f" % np.var(date_YEAR))

In [None]:
print("The mean and standard deviation of the excess counts are %0.2f, %0.2f" 
      % (np.mean(excess_counts), np.std(excess_counts)))
print("The variance is the sqaure of the standard deviation: %0.1f" % np.var(excess_counts))

### Variances and covariances:

The variance is a measure of the scatter of a quantity.

$\sigma^2 = \frac{\sum_i (x_i - \mu_x)}{n}^2$

Where $\mu_x$ is the mean of the measurements $\mu_x = \frac{\sum_i x_i}{n}$

The covariance is a measure of variantions in one quantity match variations in a second quantity.  

The equation for the covariance is quite similar to the equation for the variance:

$\sigma_{xy} = \frac{\sum_i (x_i - \mu_x) (y_i - \mu_y)}{n}$

I.e., we replace one of the factors of $(x_i - \mu_x)$ with $(y_i - \mu_y)$

Because the equations are so similar, we often compute both the variances and covariances at the same time, and we often pack both the variances and covariance into a single matrix.

In our case we have two quantities: the dates and the excess counts.  We have already computed the variance of each of those.  So let's compute it all:


In [None]:
cov = np.cov(date_YEAR, excess_counts)

### Let's have a look at the pieces of the covariance matrix

In [None]:
print("The xx element of the covarience matrix is %.2f" %  cov[0,0])
print("The scatter of the x element (i.e., the year) is %.2f years" % np.sqrt(cov[0,0]))
print("The yy element of the covarience matrix is %.2f" %  cov[1,1])
print("The scatter of the yy element (i.e., the excess counts) is %.1f counts" % np.sqrt(cov[1,1]))
print("The xy element of the covarience matrix is %.2f years*counts" %  cov[0,1])

### Another way to consider correlations is to ask what part of the variance in one quantity is  tied to the variance of another quanity.

To do this, we want to factor out the variances of the two quantities.

$c_{xy} = \frac{\sigma_{xy}}{\sqrt{\sigma_{xx}\sigma_{yy}}}$

Note that $c_{xx} = c_{yy} = 1$, i.e., each quantity is 100% correlated with itself.


In [None]:
np.corrcoef(date_MET, nObs_LE-nExp_LE)

### Interpretation

What this is saying is that about 1% of the variance in the excess is attributable to the change in time.

### comparision with quantities that are highly correlated.

In [None]:
_ = plt.scatter(nExp_LE, nObs_LE)
_ = plt.xlabel(r"$n_{\rm exp}$ [per week]")
_ = plt.ylabel(r"$n_{\rm obs}$ [per week]")

In [None]:
np.corrcoef(nObs_LE, nExp_LE)

In [None]:
def gen_2d_gaussian(n, sigma_xx, sigma_yy, sigma_xy):
    
    K_0 = np.array([[sigma_xx, sigma_xy],[sigma_xy, sigma_yy]])
    epsilon = 0.0001
    K = K_0 + epsilon*np.identity(2)
    L = np.linalg.cholesky(K)
    u = np.random.normal(size=2*n).reshape(2, n)
    x = np.dot(L, u)
    return x

In [None]:
def draw_2d_gaussian(n, sigma_xx, sigma_yy, sigma_xy):
    vals = gen_2d_gaussian(n, sigma_xx, sigma_yy, sigma_xy)
    _ = plt.xlim(-5, 5)
    _ = plt.ylim(-5, 5)
    _ = plt.scatter(vals[0], vals[1])

In [None]:
vals = draw_2d_gaussian(1000, 1, 1, 0)

In [None]:
vals = draw_2d_gaussian(1000, 1, 1, 1)

In [None]:
vals = draw_2d_gaussian(1000, 1, 1, -1)

In [None]:
vals = draw_2d_gaussian(1000, 1, 1, 0.5)