In [20]:
from keras.datasets import mnist
import numpy as np

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 784).astype(np.float32) / 255.0
X_test = X_test.reshape(-1, 784).astype(np.float32) / 255.0

gamma: 0.005   | 0.01  | 0.1  | 0.05
latent_dim: 50 | 200   | 200  | 200
accuracy: 0.096| 0.108 | 0.088| 0.088

Gamma: Controls the smoothness of the RBF kernel. Higher gamma = more local, sensitive to small changes. Smaller gamma = smoother, more global similarity.
Latent_dim: Number of random Fourier features. Larger = better approximation of the true RBF kernel, but slower and more memory.

In [21]:
from sklearn.kernel_approximation import RBFSampler

gamma = 0.01
latent_dim = 200

# Define RFF transformer
rff = RBFSampler(gamma=gamma, n_components=latent_dim, random_state=42)

# Fit on training data
rff.fit(X_train)

# Transform data
X_train_latent = rff.transform(X_train)
X_test_latent = rff.transform(X_test)

print(f"RFF latent space shape: {X_train_latent.shape}")


RFF latent space shape: (60000, 200)


In [22]:
from sklearn.linear_model import Ridge

# Train approximate inverse: latent -> pixel
inverse_regressor = Ridge(alpha=1.0)
inverse_regressor.fit(X_train_latent, X_train)

# Example: encode-decode check
z_sample = X_train_latent[0:1]
x_recon = inverse_regressor.predict(z_sample)

print(f"Reconstructed pixel shape: {x_recon.shape}")

Reconstructed pixel shape: (1, 784)


In [None]:
from sklearn.linear_model import LinearRegression

noise = np.random.normal(scale=0.1, size=X_train_latent.shape)
Z_noisy = X_train_latent + noise
# Suppose you have your clean latent codes:
Z_clean = X_train_latent  # shape (n_samples, latent_dim)

# Create noisy versions:
noise = np.random.normal(0, 1, size=Z_clean.shape)
Z_noisy = Z_clean + noise

# The target is: learn to predict the noise!
# Input: Z_noisy
# Target: noise

denoiser = LinearRegression()
denoiser.fit(Z_noisy, noise)

print("✅ Denoiser is now fitted!")

# Now you can run your loop:
z = np.random.normal(size=(1, latent_dim))

T=100

for t in range(T):
    predicted_noise = denoiser.predict(z)
    z = z - predicted_noise  # update rule: remove predicted noise

In [None]:
Dn = []
m = 100  # samples per digit

for digit in range(10):
    for i in range(m):
        # Start with Gaussian latent
        z0 = np.random.normal(size=(1, latent_dim))
        # (Optional: condition z0 on digit label if your version does this)
        zT = z0  # For demonstration — replace with your diffusion steps
        x_gen = inverse_regressor.predict(zT)
        Dn.append((x_gen.flatten(), digit))

print(f"Total generated samples: {len(Dn)}")


Total generated samples: 1000


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Train classifier on real MNIST
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# Predict on generated data
X_gen = np.array([x for x, _ in Dn])
y_gen = np.array([y for _, y in Dn])

y_pred = clf.predict(X_gen)

print("Classifier accuracy on generated data:", accuracy_score(y_gen, y_pred))


Classifier accuracy on generated data: 0.104
