In [None]:
import numpy as np

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.decomposition import KernelPCA
from skimage.metrics import structural_similarity as ssim


In [None]:
def plot_digits(X, title):
    """Small helper function to plot 100 digits."""
    fig, axs = plt.subplots(nrows=10, ncols=10, figsize=(8, 8))
    for img, ax in zip(X, axs.ravel()):
        ax.imshow(img.reshape((16, 16)), cmap="Greys")
        ax.axis("off")
    fig.suptitle(title, fontsize=24)

def SSIM_Batch(X, X_true):
    m, _ = X.shape
    ssim_val = 0
    for i in range(m):
        ns = X[i].reshape((16, 16))
        gt = X_true[i].reshape((16, 16))

        ssim_val += ssim(ns, gt, data_range=1.0)

    return ssim_val / m


In [None]:
X, y = fetch_openml(data_id=41082, as_frame=False, return_X_y=True)
X = MinMaxScaler().fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, random_state=0, train_size=1_000, test_size=100
)

rng = np.random.RandomState(0)
noise = rng.normal(scale=0.25, size=X_test.shape)
X_test_noisy = X_test + noise

noise = rng.normal(scale=0.25, size=X_train.shape)
X_train_noisy = X_train + noise

In [None]:
plot_digits(X_test, "Uncorrupted test images")
plot_digits(
    X_test_noisy, f"Noisy test images\nMSE: {np.mean((X_test - X_test_noisy) ** 2):.2f}"
)

# Kernel PCA - Principal Component Analysis

We will build on PCA but this time, in order to perform the denoising, what we will do is the following:


# Calculate PCA

In [None]:
GAMMA = 1e-3
ALPHA = 1e-4
r_val = 32

In [None]:
kernel_pca = KernelPCA(
    n_components=r_val,
    kernel="rbf",
    gamma=GAMMA,
    alpha=ALPHA,
    fit_inverse_transform=True,
)

_ = kernel_pca.fit(X_train_noisy)

Z_test_noisy = kernel_pca.transform(X_test_noisy)
X_test_kpca = kernel_pca.inverse_transform(Z_test_noisy)
print('MSE Error: ', np.mean((X_test_kpca - X_test) ** 2))

In [None]:
plot_digits(X_test, "Uncorrupted test images")
plot_digits(
    X_test_noisy, f"Noisy test images\nMSE: {np.mean((X_test - X_test_noisy) ** 2):.3f}"
)
plot_digits(
    X_test_kpca,
    f"PCA reconstruction, k = {r_val}, \nMSE: {np.mean((X_test - X_test_kpca) ** 2):.3f}".format(k=r_val),
)

## Find the best $r$ value for this denoising method

In [None]:
k_vals = np.arange(1, 256 + 1, 1)

In [None]:
MSE = []
SSIM_array = []
for k in k_vals:

    kernel_pca = KernelPCA(
        n_components=k,
        kernel="rbf",
        gamma=GAMMA,
        alpha=ALPHA,
        fit_inverse_transform=True)
    
    _ = kernel_pca.fit(X_train_noisy)

    Z_test_noisy = kernel_pca.transform(X_test_noisy)
    X_test_kpca = kernel_pca.inverse_transform(Z_test_noisy)

    MSE.append(np.mean((X_test_kpca - X_test) ** 2))
    SSIM_array.append(SSIM_Batch(X_test_kpca, X_test))

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=k_vals, y=MSE, name='MSE'), secondary_y=False)
# fig.add_trace(go.Scatter(x=k_vals, y=SSIM_array, name='SSIM'), secondary_y=True)

fig.update_layout(
    xaxis_title='Principal Components',
    yaxis_title='Loss',
    width=1000,  # Set width of the graph
    height=400  # Set height of the graph
)

fig.show()

In [None]:
k = 31
kernel_pca = KernelPCA(
    n_components=k,
    kernel="rbf",
    gamma=GAMMA,
    alpha=ALPHA,
    fit_inverse_transform=True)

_ = kernel_pca.fit(X_train_noisy)

Z_test_noisy = kernel_pca.transform(X_test_noisy)
X_test_kpca = kernel_pca.inverse_transform(Z_test_noisy)
print('MSE Error: ', np.mean((X_test_kpca - X_test) ** 2))

In [None]:
def plot_digits_plotly(X):
    """Small helper function to plot 64 digits using plotly."""
    # Create subplot grid
    fig = make_subplots(rows=8, cols=8)
    
    # Add each image as a heatmap
    for idx, img in enumerate(X[:64]):
        row = idx // 8 + 1
        col = idx % 8 + 1
        
        # Reshape image and create heatmap
        img_reshaped = img.reshape((16, 16))
        fig.add_trace(
            go.Heatmap(z=img_reshaped, 
                      colorscale='Greys',
                      showscale=False),
            row=row, col=col
        )
        
        # Remove axes for each subplot
        fig.update_xaxes(showticklabels=False, showgrid=False, row=row, col=col)
        fig.update_yaxes(autorange="reversed", showticklabels=False, showgrid=False, row=row, col=col)
    
    # Update layout
    fig.update_layout(
        width=800,
        height=800,
        showlegend=False,
        margin=dict(t=0, l=0, r=0, b=0)
    )
    
    return fig


In [None]:
plot_digits_plotly(X_test)


In [None]:
plot_digits_plotly(X_test_noisy)

In [None]:
plot_digits_plotly(X_test_kpca)

In [None]:
SSIM_Batch(X_test_kpca, X_test)