# M2 Genomics & High Dimensional Data

#### Requirements

In [1]:
import numpy as np
from scipy import stats

In [2]:
P = np.array(
    [
        [
            1/np.sqrt(2),
            1/np.sqrt(2)
        ],
        [
            1/np.sqrt(2),
            -1/np.sqrt(2)
        ]
    ]
)

print(P.shape)

print(P)

(2, 2)
[[ 0.70710678  0.70710678]
 [ 0.70710678 -0.70710678]]


In [11]:
D = np.eye(2)

print(D)

[[1. 0.]
 [0. 1.]]


In [12]:
E_in = np.matmul(
    np.matmul(
        P,
        D
    ),
    P
)

In [13]:
np.matmul(
    np.array(
        [
            [
                -1,
                0
            ]
        ]
    ),
    E_in
)

array([[-1.00000000e+00,  2.23711432e-17]])

## Visualization of High Dimensional Data

### Principal Component Analysis (PCA)

In [13]:
X = np.array(
    [
        [
            0,
            1,
        ],
        [
            0,
            -1,
        ]
    ]
)

print(X.shape)
print(X)


(2, 2)
[[ 0  1]
 [ 0 -1]]


In [14]:
S = (1 / (X.shape[0]-1)) * np.matmul(
    X.T,
    X
)

print(S.shape)

print(S)

(2, 2)
[[0. 0.]
 [0. 2.]]


In [15]:
np.linalg.eig(S)

EigResult(eigenvalues=array([0., 2.]), eigenvectors=array([[1., 0.],
       [0., 1.]]))

In [16]:
X = np.array(
    [
        [
            0,
            2
        ],
        [
            0,
            -2
        ],
        [
            1,
            1
        ],
        [
            -1,
            -1
        ]
    ]
)

print(X.shape)

print(X)

(4, 2)
[[ 0  2]
 [ 0 -2]
 [ 1  1]
 [-1 -1]]


In [18]:
S = np.cov(
    X,
    bias = True,
    rowvar = False
)

print(S.shape)

print(S)

(2, 2)
[[0.5 0.5]
 [0.5 2.5]]


In [20]:
e_decom = np.linalg.eig(S)

In [21]:
e_decom.eigenvalues

array([0.38196601, 2.61803399])

In [23]:
e_decom.eigenvectors[:,0]

array([-0.97324899,  0.22975292])

In [17]:
X = np.array(
    [
        [
            1,
            2,
        ],
        [
            3,
            4,
        ],
        [
            -1,
            0
        ]
    ]
)

print(X.shape)
print(X)


(3, 2)
[[ 1  2]
 [ 3  4]
 [-1  0]]


In [18]:
I1 = np.array(
    [
        [
            1, 1, 1
        ]
    ]
).reshape(3, 1)

print(I1)

print(I1.shape)


[[1]
 [1]
 [1]]
(3, 1)


In [20]:
H = np.eye(3) - (np.matmul(I1, I1.T)/3)

In [22]:
S = 1/3 * np.matmul(
    np.matmul(
        X.T,
        H
    ),
    X
)

print(S.shape)

print(S)

(2, 2)
[[2.66666667 2.66666667]
 [2.66666667 2.66666667]]


In [23]:
uS = np.matmul(
    np.matmul(
        u.T,
        S
    ),
    u
)

print(uS.shape)

print(uS)

(1, 1)
[[4.8]]
