In [1]:
import numpy as np
import pandas as pd
from scipy import stats

Synthetic Data

In [2]:
x = np.arange(-5,6,1)
x

array([-5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5])

Generate y values with $\epsilon \sim N(0,1)$

In [3]:
y = 2 - 4*x + 2*np.random.normal(size =x.shape)
y

array([ 25.23177037,  21.9553629 ,  13.2803119 ,   9.51970464,
         2.59314017,   2.35549707,  -0.57031543,  -6.04664543,
        -6.7873656 , -13.20695451, -19.54909957])

(1) Estimate the correlation between $x$ and $y$.

$$Corr(x,y) = \dfrac{Cov(x,y)}{\sqrt{Var(x)Var(y)}}$$

$$Cov(x,y) = E(xy) - E(x) E(y)$$

$$Cov(x,y) = \dfrac{1}{n-1} [ \Sigma_{i-1}^n x_i y_i - n E(x) E(y)$$
where

$$E(X) = \dfrac{1}{n} \Sigma_{i-1}^n x_i $$

In [4]:
n = len(x)

In [5]:
cov_xy = (1/(n-1))*( np.sum(x*y) - n*np.mean(x)*np.mean(y) )
cov_xy

-45.90528076248061

In [6]:
corr_xy = cov_xy / np.sqrt(np.var(x, ddof = 1) * np.var(y, ddof = 1))
corr_xy

-0.988081812595407

(2) Find the estimators of $a$ and $b$.

In [7]:
b_hat = cov_xy / np.var(x, ddof = 1)
b_hat

-4.173207342043692

In [8]:
a = np.mean(y) - b_hat*np.mean(x)
a

2.6159460467327715

(3) Find the estimators of $\sigma^2$.

In [9]:
y_pred = a+b_hat*x
sigma2 = (1/(n-2)) * np.sum((y - y_pred)**2)
sigma2

5.165932805107845

(4) What is the distribution of $\hat b$?

$$ Var(\hat b) = \dfrac{\sigma^2}{\Sigma_{i-1}^n x_i^2 - n\bar x^2}$$

In [10]:
var_b = sigma2 / ((np.sum(x**2)  - n*np.mean(x)**2))
var_b

0.04696302550098041

(5) Find $t$ statistic.

$$t_{n-2} = \dfrac{\hat \beta \hspace{3 mm} \sqrt { \Sigma_{i=1}^n x_i^2 - n \bar x^2}}{\hat\sigma}$$

In [11]:
(b_hat * np.sqrt((np.sum(x**2)  - n*np.mean(x)**2)))/ np.sqrt(sigma2)

-19.257146259715455

$$t_{v} = \dfrac{z}{\sqrt {\dfrac{\chi_v^2}{v}}}$$

In [22]:
b_null = 0
z = (b_hat - b_null) / np.sqrt(var_b)
v_df = n - 2
y_pred = a+b_hat*x
chi_v = np.sum((y - y_pred)**2) / sigma2
tv = z / np.sqrt( chi_v  / v_df)
tv

-19.257146259715455