# Biol 359A  | Linear Algebra
### Spring 2022, Week 8
<hr>

Objectives:
-  Intuition about matrix multiplication  
-  Relationship between eigen vectors and types of matrices 
-  Introduce the covariance matrix 



In [None]:
!pip install palmerpenguins

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns 
import sklearn as sk
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab as pl
import ipywidgets as widgets
from palmerpenguins import load_penguins

%matplotlib inline

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

TITLE_FONT = 20
LABEL_FONT = 16
TICK_FONT = 16
FIG_SIZE = (10,10)
COLORS= ["#008080","#CA562C"]

sns.set(font_scale=1, rc={'figure.figsize':FIG_SIZE}) 
sns.set_style("white",  {'axes.linewidth': 2, 'axes.edgecolor':'black'})
plt.rc("axes.spines", top=False, right=False)

In [None]:
def colorizer(x, y):
    """
    Map x-y coordinates to a rgb color
    """
    r = min(1, 1+y/3)
    b = min(1, 1-y/3)
    g = 1/4 + x/16
    return (r, g, b)

def gen_basic_plot(A, eigen=False):
    xvals = np.linspace(-4, 4, 9)
    yvals = np.linspace(-3, 3, 7)
    xygrid = np.column_stack([[x, y] for x in xvals for y in yvals])

    uvgrid = np.dot(A, xygrid)
    # Map grid coordinates to colors
    colors = list(map(colorizer, xygrid[0], xygrid[1]))

    # Plot grid points 
    plt.scatter(xygrid[0], xygrid[1], s=40, c=colors, edgecolor="none")
    # Set axis limits
    plt.grid(True)
    plt.axis("equal")
    plt.title("Original Grid")
    if eigen:
        eigen_values, eigen_vectors = np.linalg.eig(A)
    
        eig_vec1 = eigen_vectors[:,0]
        eig_vec2 = eigen_vectors[:,1]
        np.set_printoptions(precision=3)
        print(f"Eigen Vector: {eig_vec1} - Eigen Value: {eigen_values[0]:.2f}")
        print(f"Eigen Vector: {eig_vec2} - Eigen Value: {eigen_values[1]:.2f}")
        origin = [0,0]
        plt.quiver(*origin, *eig_vec1, color=['r'], scale=21)
        plt.quiver(*origin, *eig_vec2, color=['b'], scale=21) 


    plt.show()
    plt.scatter(uvgrid[0], uvgrid[1], s=40, c=colors, edgecolor="none")
    # Set axis limits
    plt.grid(True)
    plt.title("Transformed Grid")
    if eigen:
        plt.quiver(*origin, *eig_vec1, color=['r'], scale=21)
        plt.quiver(*origin, *eig_vec2, color=['b'], scale=21)
    plt.axis("equal")
    plt.show()
    

def plot_wrapper(a=2,b=1,c=-1,d=1, eigen=False):
    A = np.column_stack([[a, b], [c, d]])
    gen_basic_plot(A, eigen=eigen)



We are going to start with the following matrix:

$$\begin{bmatrix} 2 & 1 \\ -1 & 1 \\ \end{bmatrix}$$

(This matrix has no real eigen vectors)

In [None]:
plot_wrapper(a=2,b=1,c=-1,d=1, eigen=False)

This matrix has real eigen vectors:

$$\begin{bmatrix} 5 & 1 \\ 4 & 2 \\ \end{bmatrix}$$

In [None]:
plot_wrapper(a=5, b=1, c=4,d=2, eigen=True)

This matrix has real and orthogonal eigenvectors:
$$\begin{bmatrix} 3 & 4 \\ 4 & 2 \\ \end{bmatrix}$$

In [None]:
plot_wrapper(a=3, b=4, c=4,d=2, eigen=True)

Introducing the Covariance Matrix:

$$ var(x) = E[(x-\mu_{x})^2] $$
Recall when we defined the covariance:

$$ cov(x,y) = E[x-\mu_{x}]E[y-\mu_{y}] $$

So for example, If I had a data matrix, with N observations, and 3 features - a, b, and c - I will define my covariance matrix $\Sigma$ as:
 
$$ \Sigma = \begin{bmatrix} var(a) & cov(a,b) & cov(a,c)\\ cov(b,a) & var(b)& cov(b,c) \\ cov(c,a) & cov(c,b) & var(c) \end{bmatrix}$$

Which we usually estimate with:

$$ \frac{1}{N-1} XX^T = \hat{\Sigma} $$

These matrices will be incredibly important next week.

In [None]:
penguins=load_penguins()
penguins.dropna(inplace=True)
features=["bill_length_mm", "bill_depth_mm", "flipper_length_mm","body_mass_g"]
penguins

In [None]:
penguins.describe()

In [None]:
cov = np.cov(penguins[features], rowvar=False)
sns.heatmap(cov, annot=True, fmt='.2f', xticklabels=features, yticklabels=features, cmap='YlGnBu')
plt.show()