In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.LeakyReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size, input_size),
            nn.LeakyReLU()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [7]:
import numpy as np
 
def PCA(X, num_components, return_error=False):
    X_meaned = X - np.mean(X, axis=0)
    cov_mat = np.cov(X_meaned, rowvar=False)
    eigen_values, eigen_vectors = np.linalg.eigh(cov_mat)

    sorted_index = np.argsort(eigen_values)[::-1]
    sorted_eigenvectors = eigen_vectors[:, sorted_index]

    eigenvector_subset = sorted_eigenvectors[:, 0:num_components]

    X_reduced = np.dot(eigenvector_subset.transpose(), X_meaned.transpose()).transpose()
    if return_error:
        X_restored = np.dot(X_reduced, eigenvector_subset.transpose()) + np.mean(X, axis=0)
        reconstruction_error = np.mean(np.square(X - X_restored))
        return X_reduced, reconstruction_error
    else:
        return X_reduced

In [8]:
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler

data = load_diabetes()
X = data.data
y = data.target

In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
 
X_pca, pca_error = PCA(X_scaled, 2, return_error=True)
print("Ошибка для PCA:", pca_error)

Ошибка для PCA: 0.44834695722485207


In [10]:
input_size = X_scaled.shape[1]
hidden_size = 2

autoencoder = Autoencoder(input_size, hidden_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

num_epochs = 100
batch_size = 16

X_torch = torch.tensor(X_scaled, dtype=torch.float32)

for epoch in range(num_epochs):
    for i in range(0, len(X_torch), batch_size):
        batch_x = X_torch[i:i+batch_size]
        output = autoencoder(batch_x)
        loss = criterion(output, batch_x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

with torch.no_grad():
    X_torch_restored = autoencoder(X_torch).numpy()

autoencoder_error = np.mean((X_scaled - X_torch_restored) ** 2)

print("Ошибка для автоэнкодера:", autoencoder_error)

Ошибка для автоэнкодера: 0.7443582631929421
