# Chapter 12 - Covariance and Random Vectors

In [2]:
import numpy as np

## Exercises

### 1. Suppose the pair $(X,Y)'$ has mean vector $(0,2)$ and covariance matrix $\begin{pmatrix} 1 & 2 \\ 2 & 6 \end{pmatrix}$. Find the covariance matrix of the pair $U = (X+Y, X-2Y)'$

### Ans

$$
\begin{equation}
    \begin{aligned}
        U &= \begin{pmatrix} 
            X+Y \\
            X-2Y
            \end{pmatrix} \\
        &= \begin{pmatrix} 
            1 & 1 \\
            1 & -2
            \end{pmatrix} \begin{pmatrix}
            X \\
            Y
            \end{pmatrix} \\
        &= AW \\
        \text{Cov}(AW) &= A\text{Cov}(W)A' \\
        &= \begin{pmatrix} 
            1 & 1 \\
            1 & -2
            \end{pmatrix} \begin{pmatrix}
            1 & 2 \\
            2 & 6 \\
            \end{pmatrix} \begin{pmatrix} 
            1 & 1 \\
            1 & -2
            \end{pmatrix} \\
        &= \begin{pmatrix} 
            11 & -13\\
            -13 & 17
            \end{pmatrix}
    \end{aligned}
\end{equation}
$$

In [3]:
np.array([
        [1, 1], 
        [1, -2]
    ]).dot([
        [1, 2], 
        [2, 6]
    ]).dot([
        [1, 1], 
        [1, -2]
    ])

array([[ 11, -13],
       [-13,  17]])

### 2. Show that $$ \rho (aX+b,cY+d) = \rho(X,Y) $$  for any constants a,b,c and d.

$$
\begin{equation}
\begin{aligned}
    \rho(X,Y) &= \frac{\text{Cov}(X,Y)}{\sqrt{\text{Var}(X)}\sqrt{\text{Var}(Y)}} \\
    &= \frac{E(XY)-E(X)E(Y)}{
        \sqrt{E(X^2)-E(X)^2}\sqrt{E(Y^2)-E(Y)^2}
       } \\
    \rho(aX+b,cY+d) &= \frac{\text{Cov}(aX+b,cY+d)}{\sqrt{\text{Var}(aX+b)}\sqrt{\text{Var}(cY+d)}} \\
\end{aligned}
\end{equation}
$$

### 3. Suppose $X$, $Y$ and $Z$ are "i.i.d." (independent, identically distributed) random variables, with $E(X^k)$ being denoted by $v_k$, $k = 1,2,3$. Find $Cov(XY,XZ)$ in terms of the $v_k$.

$$
\begin{equation}
\begin{aligned}
    Cov(XY, XZ) &= E(XYXZ) - E(XY)E(XZ) \\
    &= E(X^2YZ) - E(X)E(Y)E(X)E(Z) && \text{X & Y, X & Z are independent} \\
    &= E(X^2)E(YZ) - E(X)^2E(Y)E(Z) && \text{independence} \\
    &= E(X^2)E(Y)E(Z) - E(X)^2E(Y)E(Z) && \text{independence} \\
    &= (E(X^2) - E(X)^2)E(Y)E(Z) \\
    &= (E(X^2) - E(X)^2)E(X)E(X) && \text{identically-distributed assumption} \\
    &= (E(X^2) - E(X)^2)E(X)^2 \\
    &= (v_2 - v_1^2)v_1^2
\end{aligned}
\end{equation}
$$

### 4. Using the properties of covariance in Section 12.1.1, show that for any random variables $X$ and $Y$, $Cov(X+Y,X-Y) = Var(X) - Var(Y)$.

$$
\begin{equation}
\begin{aligned}
    Cov(X+Y,X-Y) &= E[(X+Y)(X-Y)] - E(X+Y)E(X-Y) \\
    &= E(X^2-Y^2)-[(EX + EY)(EX-EY)] \\
    &= E(X^2-Y^2)-[E(X)^2-E(Y)^2] \\
    &= E(X^2) - E(X)^2 - [E(Y^2) - E(Y)^2] \\
    &= Var(X) - Var(Y)
\end{aligned}
\end{equation}
$$


### 5. Suppose we wish to predict a random variable Y by using another random variable, $X$. We may consider predictors of the form $cX + d$ for constants $c$ and $d$. Show that the values of $c$ and $d$ that minimize the mean squared prediction error, $E[(Y-cX-d)^2]$ are $c = \frac{E(XY) - EX \cdot EY}{Var(X)}$, $d = \frac{E(X^2)EY - EX \cdot E(XY)}{Var(X)}$

$$
\begin{equation}
\begin{aligned}
    E[(Y-cX-d)^2] &= E\big(\big[Y - X\frac{Cov(X,Y)}{Var(X)} - \frac{E(X^2) \cdot EY - EX \cdot E(XY)}{Var(X)}\big]^2\big) && \text{substitute, use defn. of Cov} \\
    &= E\big(\big[Y - X\frac{Cov(X,Y)}{Var(X)} - \frac{E(X^2) \cdot EY - EX \cdot (Cov(X,Y) + EX \cdot EY)}{Var(X)}\big]^2\big) && \text{substitute } E(XY) = Cov(X,Y) - E(X)E(Y) \\
    &= E\big(\big[Y - X\frac{Cov(X,Y)}{Var(X)} - \frac{E(X^2) \cdot EY - EX \cdot Cov(X,Y) - E(X)^2 \cdot EY)}{Var(X)}\big]^2\big) \\
    &= E\big(\big[Y - X\frac{Cov(X,Y)}{Var(X)} - \frac{[E(X^2) - E(X)^2 ] \cdot EY - EX \cdot Cov(X,Y)}{Var(X)}\big]^2\big) && \text{regroup} \\ 
    &= E\big(\big[Y - X\frac{Cov(X,Y)}{Var(X)} - \frac{Var(X) \cdot EY - EX \cdot Cov(X,Y)}{Var(X)}\big]^2\big) && \text{definition of Var(X)} \\ 
    &= E\big(\big[Y - EY - X\frac{Cov(X,Y)}{Var(X)} + \frac{ EX \cdot Cov(X,Y)}{Var(X)}\big]^2\big) && \text{simplify} \\
    &= E\big(\big[Y - EY + \frac{(EX-X)Cov(X,Y)}{Var(X)} \big]^2\big) && \text{simplify} \\
    &= E\big(Y^2 - Y \cdot EY - \frac{Y(X-EX)(Cov(X,Y))}{Var(X)}- Y \cdot EY + E(Y)^2 + \frac{EY(X-EX)Cov(X,Y)}{Var(X)} \\ &\quad - \frac{Y(X-EX)Cov(X,Y)}{Var(X)} + \frac{EY(X-EX)Cov(X,Y)}{Var(X)} + \frac{Y(X-EX)(Cov(X,Y))}{Var(X)} \big) && \text{distribute} \\
    &= E(Y^2) - 2E(Y)^2 - \frac{2(E(YX) - EY \cdot EX)Cov(X,Y)}{Var(X)} + E(Y)^2 \\ &\quad + \frac{[2EY \cdot EX - 2EY \cdot EX]Cov(X,Y)}{Var(X)} + \frac{E((X-EX)^2)Cov(X,Y)^2}{Var(X)} \\
    &= Var(Y) - \frac{2Cov(X,Y)^2}{Var(X)} + \frac{Cov(X,Y)^2}{Var(X)} && \text{simplify}\\
    &= Var(Y) - \frac{Cov(X,Y)^2}{Var(X)} && \text{simplify}\\
  0 &= Var(Y) - \frac{Cov(X,Y)^2}{Var(X)} && \text{assume expected squared error is 0 since we want to minimize it} \\
  \frac{Cov(X,Y)^2}{Var(X)} &= Var(Y) \\
  \frac{Cov(X,Y)^2}{Var(X)Var(Y)} &= 1 \\
  \frac{Cov(X,Y)}{\sqrt{Var(X)}\sqrt{Var(Y)}} &= 1 \\
  Corr(X,Y) &= 1
\end{aligned}
\end{equation}
$$

Thus, values for $c$ and $d$ minimize mean-squared error. 


### 6. Programs A and B consist of $r$ and $s$ modules, respectively, of which $c$ modules are common to both. As a simple model, assume that each module has probability $p$ of being correct, with the modules acting independently. Let $X$ and $Y$ denote the numbers of correct modules in A and B respectively. Find the correlation $A(X,Y)$ as a function of $r,s,c$ and $p$.

Hint: Write $X = X_1 + ... + X_r$ where $X_i$ is 1 or 0 depending on whether module $i$ of $A$ is correct. Of those, let $X_1, ..., X_c$ correspond to the modules in common to $A$ and $B$. Similarly, write $Y=Y_1 + ... + Y_s$, for the modules in $B$, again having the first $c$ of them correspond to the modules in common. Do the same for $B$, and for the set of common modules.

$$
\begin{equation}
\begin{aligned}
    \rho(X,Y) &= \frac{Cov(X,Y)}{\sqrt{Var(X)}\sqrt{Var(Y)}} && \text{Definition} \\
    &= \frac{Cov(X_1 + ... + X_r, Y_1 + ... + Y_r}{\sqrt{Var(X_1 + ... + X_r)}\sqrt{Var(Y_1 + ... + Y_r)}} && \text{Sum of Bernoulli R.V.} \\
    &= \frac{\sum_{i=1}^{r}\sum_{j=1}^{s}Cov(X_i, Y_j)}{\sqrt{Var(X_1 + ... + X_r)}\sqrt{Var(Y_1 + ... + Y_r)}} && \text{Linearity of Sums} \\
    &= \frac{\sum_k^{c}Cov(X_k, Y_k) + \sum_{i=1}^{r}\sum_{j=1}^{s}Cov(X_i, Y_j) - \sum_{k=1}^{c}Cov(X_k, Y_k) }{\sqrt{Var(X_1 + ... + X_r)}\sqrt{Var(Y_1 + ... + Y_r)}} && \text{Separate out dependent and independent variables, remove duplications} \\
    &= \frac{\sum_{k=1}^{c}Cov(X_k, Y_k)}{\sqrt{Var(X_1 + ... + X_r)}\sqrt{Var(Y_1 + ... + Y_r)}} && \text{Indep. vars. have 0 covariance} \\
    &= \frac{\sum_{k=1}^{c}Var(X_k)}{\sqrt{Var(X_1 + ... + X_r)}\sqrt{Var(Y_1 + ... + Y_r)}} && \text{$X_k$ is the same as $Y_k$} \\
    &= \frac{cp(1-p)}{\sqrt{Var(X_1 + ... + X_r)}\sqrt{Var(Y_1 + ... + Y_r)}} && \text{Simplify} \\
    &= \frac{cp(1-p)}{\sqrt{rp(1-p)}\sqrt{sp(1-p)}} && \text{Simplify} \\
    &= \frac{cp(1-p)}{\sqrt{rs}p(1-p)} && \text{Simplify} \\
    &= \frac{c}{\sqrt{rs}}
\end{aligned}
\end{equation}
$$


In [87]:
def simulate_correlated_program_modules_corrcoeff(
    num_common=10,
    p_proba=0.4,
    num_r_components=20,
    num_s_components=30,
    num_experiments=10000
):
    false_true_probas = [1-p_proba, p_proba]
    trials = []
    for i in range(num_experiments):
        r_components = np.random.choice(
            [0,1],
            size=num_r_components, 
            p=false_true_probas
        )
        s_components = np.concatenate(
            [
                r_components[:num_common], 
                np.random.choice(
                    [0,1], 
                    size=num_s_components - num_common,
                    p=false_true_probas
                )
            ]
        )
        
        trials.append(
            [
                sum(r_components), sum(s_components)
            ]
        )
    
    results = np.array(trials).T
    X, Y = results[0], results[1]
    
    return np.corrcoef(X,Y)

simulate_correlated_program_modules_corrcoeff(
    p_proba=0.9,
    num_common=10,
    num_r_components=20,
    num_s_components=30,
    num_experiments=100000
)       
        

array([[ 1.        ,  0.40945345],
       [ 0.40945345,  1.        ]])

In [86]:
def analytical_correlated_program_modules_corrcoeff(
    p=0.4,
    c=10,
    r=20,
    s=30
):
    
    return c/((r*s)**0.5)

analytical_correlated_program_modules_corrcoeff(
    p=0.4,
    c=10,
    r=20,
    s=30
)       
        

0.408248290463863

### 7. Suppose we have random variables $X$ and $Y$, and define the new random variable $Z=8Y$. Then which of the following is correct? $(i)$ $\rho(X,Z) = \rho(X,Y)$. $(ii)$ $\rho(X,Z)=0$. $(iii)$ $\rho(Y,Z)=0$. $(iv)$ $\rho(X,Z) = 8\rho(X,Y)$. $(v)$ $\rho(X,Z) = \frac{1}{8}\rho(X,Y)$. $(vi)$ There is no special relationship.

$$
\begin{equation}
\begin{aligned}
    \rho(X,Z) &= \frac{Cov(X,Z)}{\sqrt{Var(X)}\sqrt{Var(Z)}} \\ 
    &= \frac{Cov(X,8Y)}{\sqrt{Var(X)}\sqrt{Var(8Y)}} \\ 
    &= \frac{E(8XY) - E(X)E(8Y)}{\sqrt{Var(X)}\sqrt{Var(8Y)}} \\ 
    &= \frac{8E(XY) - 8E(X)E(Y)}{\sqrt{Var(X)}\sqrt{Var(8Y)}} \\ 
    &= \frac{8(E(XY) - E(X)E(Y))}{\sqrt{Var(X)}\sqrt{8^2Var(Y)}} \\ 
    &= \frac{Cov(X,Y)}{\sqrt{Var(X)}\sqrt{Var(Y)}} \\ 
    &= \rho(X,Y)
\end{aligned}
\end{equation}
$$

Thus, $(i)$ is true. Can't assume $\rho(X,Z) = 0$, so $(ii)$ is false. 

$$
\begin{equation}
\begin{aligned}
    \rho(Y,Z) &= \frac{Cov(Y,8Y)}{\sqrt{Var(Y)}\sqrt{Var(8Y)}} \\
    &= \frac{E(8Y^2)-E(Y)E(8Y)}{\sqrt{Var(Y)}\sqrt{Var(8Y)}} \\
    &= \frac{8E(Y^2)-E(Y)8E(Y)}{\sqrt{Var(Y)}\sqrt{Var(8Y)}} \\
    &= \frac{8(E(Y^2)-E(Y)E(Y))}{\sqrt{Var(Y)}\sqrt{8^2Var(Y)}} \\
    &= \frac{(E(Y^2)-E(Y)E(Y))}{\sqrt{Var(Y)}\sqrt{Var(Y)}} \\
    &= \frac{Var(Y)}{Var(Y)} \\
    &= 1
\end{aligned}
\end{equation}
$$

Thus, $(iii)$ is incorrect.

$(iv)$, $(v)$, and $(vi)$ are false, since $\rho(X,Z)=\rho(X,Y)$ as seen earlier.


### 8. Derive (12.3). Hint: A constant, $q$ here, is a random variable, trivially, with 0 variance.

$$
\begin{equation}
\begin{aligned}
    Cov(X,Y+q) &= Cov(X,Y) + Cov(X,q) && \text{Linearity of Sums} \\
    &= Cov(X,Y) + E(Xq) - E(X)E(q) \\
    &= Cov(X,Y) + qE(X) - qE(X) \\
    &= Cov(X,Y)
\end{aligned}
\end{equation}
$$


### 9. Consider a three-card hand drawn from a 52-card deck. Let $X$ and $Y$ denote the number of hearts and diamonds, respectively. Find $\rho(X, Y )$.

In [44]:
def corr_three_cards(num_experiments=10000):
    """ 
        Simulate sampling from 52-card deck. Finds correlation between number of hearts and diamonds
        when drawing three cards.
    """
    num_cards_drawn = 3
    heart_val = 1
    diamond_val = 2
    spades_val = 3
    clubs_val = 4
    
    deck = np.concatenate([
        np.ones(13) * heart_val, 
        np.ones(13) * diamond_val,
        np.ones(13) * spades_val,
        np.ones(13) * clubs_val
    ])
    
    def count(deck, type_val):
        """ How many type_vals were in the drawn set of cards. """
        arr, = np.where(deck == type_val)
        return len(arr)
    
    collection = []
    for i in range(num_experiments):
        drawn_cards = np.random.choice(deck, replace=False, size=3)
        collection.append(
            [
                count(drawn_cards, heart_val),
                count(drawn_cards, diamond_val)
            ]
        )
    
    vectors = np.array(collection)
    
    return np.corrcoef(vectors.T[0], vectors.T[1])

res = corr_three_cards(num_experiments=10000)
res

array([[ 1.        , -0.33057586],
       [-0.33057586,  1.        ]])

Thus the correlation between $X$ and $Y$ is about -0.33.

### 10. Consider the lightbulb example in Section 9.1. Use the “mailing tubes” on $Var()$ and $Cov()$ to find $\rho(X1, T2)$.

$$
\begin{equation}
\begin{aligned}
    T_r &= X_1 + ... + X_r && \text{Given} \\
    T_2 &= X_1 + X_2 \\
    \rho(X_1, T_2) &= \frac{Cov(X_1, T_2)}{\sqrt{Var(X_1)Var(T_2)}} \\
    &= \frac{Cov(X_1, X_1 + X_2)}{\sqrt{Var(X_1)Var(X_1+X_2)}} \\
    &= \frac{Cov(X_1, X_1) + Cov(X_1, X_2)}{\sqrt{Var(X_1)Var(X_1+X_2)}} && \text{Linearity of Sums}\\
    &= \frac{Cov(X_1, X_1)}{\sqrt{Var(X_1)Var(X_1+X_2)}} && \text{Independence}\\
    &= \frac{Var(X_1)}{\sqrt{Var(X_1)Var(X_1+X_2)}} && \text{Simplify}\\    
    &= \frac{Var(X_1)}{\sqrt{Var(X_1)[Var(X_1) + Var(X_2)]}} && \text{Independence}\\
    &= \frac{Var(X_1)}{\sqrt{Var(X_1)^2*2}} && \text{Independence} \\
    &= \frac{1}{\sqrt{2}}
\end{aligned}
\end{equation}
$$


In [100]:
1/2**0.5

0.7071067811865475

In [102]:
def simulate_lightbulb_corrcoeff(
    num_experiments=100000,
):
    """ Check results """
    
    trials = []
    for i in range(num_experiments):
        x_1 = np.random.exponential()
        x_2 = np.random.exponential()
        trials.append([
            x_1, x_1+x_2
        ])
        
    res = np.array(trials).T
    X_1 = res[0]
    T_2 = res[1]
    
    return np.corrcoef(X_1, T_2)

simulate_lightbulb_corrcoeff()

array([[ 1.        ,  0.71319282],
       [ 0.71319282,  1.        ]])

### 11. Find the following quantities for the dice example in Section 12.3.2.1:

(a) $Cov(X,2S)$

(b) $Cov(X,S+Y)$

(c) $Cov(X+2Y,3X-Y)$

(d) $p_{X,S}(3,8)$

In [114]:
(np.arange(1,7)**2).mean() - np.arange(1,7).mean()**2

2.9166666666666661

(a) $Cov(X,2S)$

$$
\begin{equation}
\begin{aligned}
    Cov(X,2S) &= Cov(X, 2(X+Y)) && S=X+Y \\
    &= Cov(X, 2X) + Cov(X, 2Y) \\
    &= 2Var(X) + 2Cov(X,Y) \\
    &= 2Var(X) + 2Var(X) && \text{X and Y are i.i.d.}\\
    &\approx 4*2.916 \\
    &\approx 11.664
\end{aligned}
\end{equation}
$$


In [115]:
4*((np.arange(1,7)**2).mean() - np.arange(1,7).mean()**2)

11.666666666666664

(b) $Cov(X,S+Y)$

$$
\begin{equation}
\begin{aligned}
    Cov(X,S+Y) &= Cov(X, X+Y + Y) \\
    &= Cov(X, X+2Y) \\
    &= Cov(X,X) + 2Cov(X,Y) \\
    &= Var(X) + 2Var(X) && \text{X and Y are i.i.d.} \\
    &= 3Var(X) \\
    &\approx 3 * 2.916 \\
    &\approx 8.748
\end{aligned}
\end{equation}
$$

(c) $Cov(X+2Y,3X-Y)$

$$
\begin{equation}
\begin{aligned}
    Cov(X+2Y, 3X-Y) &= 3Cov(X,X) - Cov(X,Y) + 6Cov(X,Y) - 2Cov(Y,Y) \\
    &= 3Cov(X,X) + 5Cov(X,Y) - Cov(Y,Y) \\
    &= 3Var(X) + 5Var(X) - Var(X) && \text{Var of i.i.d. vars are the same} \\
    &= 7Var(X) \\
    &\approx 7 * 2.916 \\
    &= 20.412
\end{aligned}
\end{equation}
$$

(d) $p_{X,S}(3,8)$

$$
\begin{equation}
\begin{aligned}
    p_{X,S}(3,8) &= P(X=3, S=8) \\
    &= P(X=3,Y=5) && S-X = Y\\
    &= P(X=3)P(Y=5) && \text{independent} \\
    &= \frac{1}{6} \cdot \frac{1}{6} \\
    &= \frac{1}{36}
\end{aligned}
\end{equation}
$$


### 12. Suppose $X_i, i=1,2,3,4,5$ are independent and each have mean 0 and variance 1. Let $Y_i = X_{i+1} - X_i, i=1,2,3,4$. Using the material in Section 12.4, find the covariance matrix of $Y=(Y_1,Y_2,Y_3,Y_4)$.

$$
\begin{equation}
\begin{aligned}
    Y &= AW \\
    \begin{pmatrix} -X_1 + X_2 \\
    -X_2 + X_3 \\
    -X_3 + X_4 \\
    -X_4 + X_5
    \end{pmatrix} &= \begin{pmatrix} 
    -1 & 1 & 0 & 0 & 0 \\
    0 & -1 & 1 & 0 & 0 \\
    0 & 0 & -1 & 1 & 0 \\
    0 & 0 & 0 & -1 & 1 \\
    \end{pmatrix} \times \begin{pmatrix}
    X_1 \\
    X_2 \\
    X_3 \\
    X_4 \\
    X_5
    \end{pmatrix} \\
    Cov(Y) &= ACov(W)A' \\
    &= \begin{pmatrix} 
    -1 & 1 & 0 & 0 & 0 \\
    0 & -1 & 1 & 0 & 0 \\
    0 & 0 & -1 & 1 & 0 \\
    0 & 0 & 0 & -1 & 1 \\
    \end{pmatrix} \times \begin{pmatrix}
    1 & 0 & 0 & 0 & 0 \\
    0 & 1 & 0 & 0 & 0 \\
    0 & 0 & 1 & 0 & 0 \\
    0 & 0 & 0 & 1 & 0 \\
    0 & 0 & 0 & 0 & 1 \end{pmatrix} \times
    \begin{pmatrix} \\
    -1 & 0 & 0 & 0 \\
    1 & -1 & 0 & 0 \\
    0 & 1 & -1 & 0 \\
    0 & 0 & 1 & -1 \\
    0 & 0 & 0 &  1 
    \end{pmatrix} \\
    &= \begin{pmatrix}
     2 & -1 &  0 &  0 \\
    -1 &  2 & -1 &  0 \\
     0 & -1 &  2 & -1 \\
     0 &  0 & -1 &  2 \\
    \end{pmatrix}
\end{aligned}
\end{equation}
$$


In [117]:
np.identity(5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [118]:
def calculate_Y():
    A = np.array(
        [
            [-1,1,0,0,0],
            [0,-1,1,0,0],
            [0,0,-1,1,0],
            [0,0,0,-1,1]
        ]
    )
    
    return A.dot(
        np.identity(5)
    ).dot(
        A.T
    )

calculate_Y()

array([[ 2., -1.,  0.,  0.],
       [-1.,  2., -1.,  0.],
       [ 0., -1.,  2., -1.],
       [ 0.,  0., -1.,  2.]])