In [2]:
import numpy as np
from tqdm import tqdm  # type: ignore


def de_mean(data: np.ndarray) -> np.ndarray:
    mean = np.mean(data, axis=0)
    return data - mean


def direction(w: np.ndarray) -> np.ndarray:
    mag = np.linalg.norm(w)
    return w / mag


def directional_variance(data: np.ndarray, w: np.ndarray) -> float:
    w_dir = direction(w)
    return sum(np.dot(x, w_dir) ** 2 for x in data)


def directional_variance_gradient(data: np.ndarray, w: np.ndarray) -> np.ndarray:
    w_dir = direction(w)
    ans= np.array(
        [sum(2 * np.dot(x, w_dir) * x[i] for x in data) for i in range(len(w))]
    )
    return ans


def gradient_step(v: np.ndarray, direction: np.ndarray, step_size: float) -> np.ndarray:
    return v + step_size * direction


In [45]:

def first_principal_component(
    data: np.ndarray, n: int = 1000, step_size: float = 0.001
) -> np.ndarray:
    guess = np.random.random(data.shape[1])
    with tqdm(total=n) as t:
        for _ in range(n):  # 这个循环确保了多次迭代
            dv = directional_variance(data, guess)
            gradient = directional_variance_gradient(data, guess)
            guess = gradient_step(guess, gradient, step_size)
            t.update(1)
            t.set_description(f"directional variance: {dv:.3f}")
    return direction(guess)


In [39]:


def project(v: np.ndarray, w: np.ndarray) -> np.ndarray:
    w_dir = direction(w)
    projection_length = np.dot(v, w_dir)
    return projection_length * w_dir


def remove_projection_from_vector(v: np.ndarray, w: np.ndarray) -> np.ndarray:
    return v - project(v, w)


def remove_projection(data: np.ndarray, w: np.ndarray) -> np.ndarray:
    return np.array([remove_projection_from_vector(v, w) for v in data])


def principal_component_analysis(data: np.ndarray, num_components: int) -> np.ndarray:
    components = []
    for _ in range(num_components):
        component = first_principal_component(data, n=1000, step_size=0.01)
        components.append(component)
        print('Test',directional_variance(data, component))
        data = remove_projection(data, np.array(component))
    return np.array(components)


def transform_vector(v: np.ndarray, components: np.ndarray) -> np.ndarray:
    return np.array([np.dot(v, w) for w in components])


def transform(data: np.ndarray, components: np.ndarray) -> np.ndarray:
    return np.array([transform_vector(v, components) for v in data])




In [12]:

"""
data = np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]])
components = principal_component_analysis(data, data.shape[1] - 1)
ans = transform(data, components)
for v in ans:
    print(*[f"{i:8.3f}" for i in v], sep=", ")
"""

'\ndata = np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]])\ncomponents = principal_component_analysis(data, data.shape[1] - 1)\nans = transform(data, components)\nfor v in ans:\n    print(*[f"{i:8.3f}" for i in v], sep=", ")\n'

In [50]:

X = np.array([
    [1, 2, 3,1],
    [2, 3, 4,1],
    [3, 4, 5,1],
    [4, 5, 6,1],
    [5, 6, 7,1]
])
X = de_mean(X)
print(X)

components = principal_component_analysis(X, 2)
print(components)
ans1 = transform(X, components)
for v in ans1:
     print(*[f"{i:8.3f}" for i in v], sep=", ")

[[-2. -2. -2.  0.]
 [-1. -1. -1.  0.]
 [ 0.  0.  0.  0.]
 [ 1.  1.  1.  0.]
 [ 2.  2.  2.  0.]]


directional variance: 30.000: 100%|██████████| 1000/1000 [00:01<00:00, 750.15it/s]


Test 29.999938684675598


directional variance: 0.000: 100%|██████████| 1000/1000 [00:01<00:00, 740.29it/s]

Test 1.4150874739938597e-05
[[0.57661802 0.57756733 0.57786369 0.00109404]
 [0.16769666 0.21044933 0.92613041 0.26433193]]
  -3.464,   -2.609
  -1.732,   -1.304
   0.000,    0.000
   1.732,    1.304
   3.464,    2.609



