In [20]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models


In [21]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype('float32') / 255.0  # Normalize the data between 0 and 1
print(f"Shape of dataset: {X.shape}")


Shape of dataset: (70000, 784)


In [22]:
# Convert X to a NumPy array before reshaping
X_np = X.to_numpy()  # Convert to NumPy array
X_reshaped = X_np.reshape(-1, 28, 28, 1)  # 28x28 images with a single color channel (grayscale)

print(f"Shape after reshaping: {X_reshaped.shape}")


Shape after reshaping: (70000, 28, 28, 1)


In [23]:
# Define the autoencoder model
input_img = layers.Input(shape=(28, 28, 1))

# Encoder
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)

# Decoder
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

# Autoencoder model
autoencoder = models.Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.summary()


In [27]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype('float32') / 255.0  # Normalize the data between 0 and 1

# Explicitly convert to NumPy array
X_np = np.array(X)

# Check for NaNs or infinite values before reshaping
assert not np.isnan(X_np).any(), "NaN values found in the dataset!"
assert not np.isinf(X_np).any(), "Infinite values found in the dataset!"

# Reshape the data for the autoencoder
X_reshaped = X_np.reshape(-1, 28, 28, 1).astype('float32')

print(f"Shape after reshaping: {X_reshaped.shape}")

Shape after reshaping: (70000, 28, 28, 1)


In [28]:
# Check for NaNs or Infinite values in the dataset
print(np.isnan(X_np).any())  # Check for NaNs
print(np.isinf(X_np).any())  # Check for infinite values



False
False


In [29]:
print(X_reshaped.dtype)  # Should be 'float32'

float32


In [31]:
# Use only a small subset of the data for testing
X_small = X_reshaped[:1000]  # Take only the first 1000 samples

# Train the autoencoder on the smaller subset
autoencoder.fit(X_small, X_small,
                epochs=50,
                batch_size=64,
                shuffle=True,
                validation_split=0.2)

ValueError: object __array__ method not producing an array

In [32]:
# Convert the data to a TensorFlow tensor
X_tensor = tf.convert_to_tensor(X_reshaped)

# Train the autoencoder
autoencoder.fit(X_tensor, X_tensor,
                epochs=50,
                batch_size=64,
                shuffle=True,
                validation_split=0.2)

ValueError: object __array__ method not producing an array