In [2]:
import numpy as np
import fractions

# np.set_printoptions(formatter={'all':lambda x: str(fractions.Fraction(x).limit_denominator())}) # display as fractions (1/3)
np.set_printoptions(formatter={'all':lambda x: str(x)}) # display as decimal numbers (0.33333...)

# Lineare Regression

In [3]:
# Datenmatrix X und Vektor y der Zielgrössen

X = np.array([
    [1, 2],
    [1, 3],
    [1, 5],
    [1, 7],
    [1, 11],
])

y = np.array([
    [4],
    [2],
    [0],
    [-1],
    [-3],
])

## "$ X^TX $"

In [4]:
X.T @ X

array([[5, 28],
       [28, 208]])

In [5]:
X.T @ y

array([[2],
       [-26]])

## Vektor der Regressionskoeffizienten "$ \hat{b} = (X^TX)^{-1}X^Ty $"

In [21]:
bhat = np.linalg.inv(X.T @ X) @ X.T @ y
bhat

array([[2.1824104234527684],
       [0.5146579804560261]])

## Projektionsmatrix "$ P = X(X^TX)^{-1}X^T $"

In [22]:
P = X @ np.linalg.inv(X.T @ X) @ X.T
P

array([[0.5130293159609121, 0.4625407166123779, 0.15960912052117263,
        -0.042345276872964216, -0.09283387622149843],
       [0.4625407166123779, 0.4201954397394137, 0.16612377850162866,
        -0.0032573289902280422, -0.04560260586319222],
       [0.15960912052117265, 0.16612377850162868, 0.2052117263843648,
        0.23127035830618892, 0.23778501628664492],
       [-0.04234527687296414, -0.0032573289902279867,
        0.23127035830618892, 0.38762214983713356, 0.4267100977198697],
       [-0.09283387622149833, -0.04560260586319215, 0.23778501628664495,
        0.4267100977198697, 0.4739413680781759]])

## Residualmatrix "$ Q = I - P $"

In [23]:
I = np.identity(P.shape[0])
Q = I - P
Q

array([[0.48697068403908794, -0.4625407166123779, -0.15960912052117263,
        0.042345276872964216, 0.09283387622149843],
       [-0.4625407166123779, 0.5798045602605864, -0.16612377850162866,
        0.0032573289902280422, 0.04560260586319222],
       [-0.15960912052117265, -0.16612377850162868, 0.7947882736156352,
        -0.23127035830618892, -0.23778501628664492],
       [0.04234527687296414, 0.0032573289902279867, -0.23127035830618892,
        0.6123778501628665, -0.4267100977198697],
       [0.09283387622149833, 0.04560260586319215, -0.23778501628664495,
        -0.4267100977198697, 0.5260586319218241]])

## Prädiktionsvektor "$ \hat{y} = Py $"

In [24]:
yhat = P @ y
yhat

array([[-0.39087947882736207],
       [0.12377850162866413],
       [3.211726384364821],
       [5.270358306188925],
       [5.78501628664495]])

## Fehlervektor "$ \mathbf{e} = y - \hat{y} = Qy $"

In [25]:
# entweder
e = y - yhat

# oder
e = Q @ y

e

array([[-1.6091205211726378],
       [1.876221498371336],
       [-0.21172638436482055],
       [-0.2703583061889243],
       [0.2149837133550494]])

## SQR (Summe der Quadrate der Restabweichungen) "$ e^Te $"

In [26]:
SQR = e.T @ e
SQR

array([[6.273615635179153]])

## SQT (Summe der Quadrate der Totalen Abweichungen)

### Zentrierende Matrix "$ M = I - \frac{1}{m}\mathbf{1}\mathbf{1}^T $"

In [27]:
m, _ = y.shape
M = np.eye (m) - np.ones ((m, m)) / m
print(f"m={m}")
M

m=5


array([[0.8, -0.2, -0.2, -0.2, -0.2],
       [-0.2, 0.8, -0.2, -0.2, -0.2],
       [-0.2, -0.2, 0.8, -0.2, -0.2],
       [-0.2, -0.2, -0.2, 0.8, -0.2],
       [-0.2, -0.2, -0.2, -0.2, 0.8]])

## SQT ausrechnen "$ y^TMy $"

In [28]:
SQT = y.T @ M @ y
SQT

array([[38.800000000000004]])

## Bestimmtheitsmass "$ R^2 = 1 - \frac{SQR}{SQT} $"

In [29]:
R2 = 1 - SQR/SQT
R2

array([[0.8383088753819806]])

# Hauptkomponentenanalyse

In [14]:
# Datenmatrix x-tilde
Xtilde = np.array([
    [-1, 8],
    [3, 7],
    [5, 1],
    [7, 5],
    [11, -1],
])

## Zentrierende Datenmatrix "$ M = I - \frac{1}{m}\mathbf{1}\mathbf{1}^T $"

In [15]:
m, _ = Xtilde.shape
M = np.eye (m) - np.ones ((m, m)) / m
print(f"m={m}")
M

m=5


array([[0.8, -0.2, -0.2, -0.2, -0.2],
       [-0.2, 0.8, -0.2, -0.2, -0.2],
       [-0.2, -0.2, 0.8, -0.2, -0.2],
       [-0.2, -0.2, -0.2, 0.8, -0.2],
       [-0.2, -0.2, -0.2, -0.2, 0.8]])

## Zentrierte Datenmatrix "$ X = M\tilde{X} $"

In [16]:
X = M @ Xtilde
X

array([[-6.0, 4.0],
       [-2.0, 3.0],
       [5.551115123125783e-17, -3.0],
       [2.0, 1.0],
       [6.0, -5.0]])

## Kovarianzmatrix "$ C = \frac{1}{m-1}X^TX $"

In [19]:
C = (1 / (m-1)) * X.T @ X
C

array([[20.0, -14.5],
       [-14.5, 15.0]])

## Eigenwertzerlegung / Spektralzerlegung der Kovarianzmatrix "$ \mathbf{C} $"

## Eigenwerte und Eigenvektoren berechnen

In [20]:
ew, ev = np.linalg.eig(C)
print(ew)
print()
print(ev)

[32.213938969562165 2.786061030437841]

[[0.7648225011424699 0.6442410587242766]
 [-0.6442410587242766 0.7648225011424699]]


### Matrizen zusammensetzen: $ \mathbf{C}=\mathbf{V}\Lambda\mathbf{V}^\intercal $

- $ \mathbf{V} $: Spalten = Eigenvektoren
- $ \Lambda $: Diagonalmatrix mit Eigenwerten
- $ \mathbf{V}^\intercal $: Zeilen = Eigenvektoren

## Singulärwertzerlegung der zentrierten Datenmatrix $ \mathbf{X} $

In [6]:
X = np.array([
    [1,0,0],
    [1,0,0],
    [0,2,1]
])

U, D, VT = np.linalg.svd(X, full_matrices=False)
Sigma = np.diag (D)

print(U)
print()
print(Sigma)
print()
print(VT)

[[0.0 -0.7071067811865472 0.7071067811865475]
 [0.0 -0.7071067811865475 -0.7071067811865475]
 [-1.0 0.0 0.0]]

[[2.23606797749979 0.0 0.0]
 [0.0 1.4142135623730951 0.0]
 [0.0 0.0 0.0]]

[[-0.0 -0.8944271909999159 -0.4472135954999579]
 [-1.0 -0.0 -0.0]
 [0.0 -0.4472135954999579 0.8944271909999159]]


# Eigenwerte / Eigenvektoren

In [11]:
B= X.T@X
B

array([[2, 0, 0],
       [0, 4, 2],
       [0, 2, 1]])

In [10]:
A = np.array([
    [3, 7],
    [7, 2],
])

np.linalg.eig(B)

(array([0.0, 5.0, 2.0]),
 array([[0.0, 0.0, 1.0],
        [0.4472135954999579, -0.8944271909999159, 0.0],
        [-0.8944271909999159, -0.4472135954999579, 0.0]]))

# Projektion auf einen Vektor (eindimensionaler Teilraum)

In [21]:
# Aufgabenstellung: Projizieren Sie den Vektor b auf eine Gerade durch a.

b = np.array([
    [6],
    [8]
])

a = np.array([
    [2],
    [5]
])

## "$ \mathbf{a}^\intercal\mathbf{a} $"

In [22]:
ata = a.T @ a
ata

array([[29]])

## Projektionsmatrix "$ P = \frac{1}{a^Ta} $"

In [26]:
P = (1 / ata) * a @ a.T
P

array([[0.13793103448275862, 0.3448275862068966],
       [0.3448275862068966, 0.8620689655172414]])

## Projektion von $ \mathbf{b} $ "$ Pb $"

In [24]:
Pb = P @ b
Pb

array([[3.586206896551724],
       [8.965517241379311]])

## Fehlervektor "$ e = b -Pb $"

In [25]:
e = b - Pb
e

array([[2.413793103448276],
       [-0.9655172413793114]])

## Check ob Fehlervektor orthogonal zu "$ \mathbf{a} $" ist

In [71]:
# falls 0 => orthogonal
a.T @ e

array([[0]])

# Projektion auf einen mehrdimensionalen Teilraum

In [72]:
# Aufgabenstellung: Projizieren Sie den Vektor v auf die Ebene A

b = np.array([
    [3],
    [4],
    [4],
])

A = np.array([
    [2, 1],
    [2, 0],
    [1, 0],
])

## "$ \mathbf{A}^\intercal\mathbf{A} $"

In [73]:
ATA = A.T @ A
ATA

array([[9, 2],
       [2, 1]])

## Projektionsmatrix "$ P = A(A^TA)^{-1}A^T $"

In [75]:
P = A @ np.linalg.inv(ATA) @ A.T
P

array([[1, 0, 0],
       [0, 4/5, 2/5],
       [0, 2/5, 1/5]])

## Projektion von $ \mathbf{b} $ "$ Pb $"

In [76]:
Pb = P @ b
Pb

array([[3],
       [24/5],
       [12/5]])

## Fehlervektor "$ \mathbf{e} $"

In [77]:
e = b - Pb
e

array([[0],
       [-4/5],
       [8/5]])

## Check ob Fehlervektor orthogonal zu den Spaltenvektoren von "$ \mathbf{A} $" ist

In [81]:
a1 = A[:, 0]
a2 = A[:, 1]

array([1, 0, 0])

In [82]:
# falls 0 => orthogonal
a1.T @ e

array([0])

In [83]:
# falls 0 => orthogonal
a2.T @ e

array([0])

# Schnittwinkel zweier Geraden

$ \cos{\alpha} = \frac{\vec{u} * \vec{v}}{\lvert \vec{u} \rvert * \lvert \vec{v} \rvert}$

In [89]:
u = np.array([4, 2, -1])  # Richtungsvektor erste Gerade
v = np.array([5, -2, 3])  # Richtungsvektor zweite Gerade

c = np.dot(u, v) / np.linalg.norm(u) / np.linalg.norm(v)  # Kosinus des Winkels
angle_rad = np.arccos(np.clip(c, -1, 1)) # Winkel in rad
angle_deg = np.rad2deg(angle_rad) # Winkel in grad
angle_deg


62.60030804589705