In [None]:
!wget http://ufldl.stanford.edu/housenumbers/train_32x32.mat
!wget http://ufldl.stanford.edu/housenumbers/test_32x32.mat


--2024-10-15 20:10:29--  http://ufldl.stanford.edu/housenumbers/train_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 182040794 (174M) [text/plain]
Saving to: ‘train_32x32.mat’


2024-10-15 20:10:43 (12.6 MB/s) - ‘train_32x32.mat’ saved [182040794/182040794]

--2024-10-15 20:10:43--  http://ufldl.stanford.edu/housenumbers/test_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64275384 (61M) [text/plain]
Saving to: ‘test_32x32.mat’


2024-10-15 20:10:52 (7.21 MB/s) - ‘test_32x32.mat’ saved [64275384/64275384]



In [None]:
from scipy.io import loadmat
import numpy as np

# Load the data
train_data = loadmat('train_32x32.mat')
test_data = loadmat('test_32x32.mat')

# Preprocess the images (normalize and reshape)
x_train = np.transpose(train_data['X'], (3, 0, 1, 2)).astype('float32') / 255.0
y_train = train_data['y'].flatten()
x_test = np.transpose(test_data['X'], (3, 0, 1, 2)).astype('float32') / 255.0
y_test = test_data['y'].flatten()

# Display dataset shapes
print(f'Training data shape: {x_train.shape}')
print(f'Test data shape: {x_test.shape}')


Training data shape: (73257, 32, 32, 3)
Test data shape: (26032, 32, 32, 3)


In [None]:
from tensorflow.keras import layers, models

def build_encoder(latent_dim):
    # Define the encoder model
    encoder_input = layers.Input(shape=(32, 32, 3))
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(encoder_input)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    z_mean = layers.Dense(latent_dim)(x)
    z_log_var = layers.Dense(latent_dim)(x)

    # Return encoder model and latent representations
    encoder = models.Model(encoder_input, [z_mean, z_log_var])
    return encoder

# Example usage: build the encoder with latent dimension of 2
encoder = build_encoder(latent_dim=2)
encoder.summary()


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Encoder
def build_encoder(latent_dim):
    encoder_input = layers.Input(shape=(32, 32, 3))
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(encoder_input)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)

    z_mean = layers.Dense(latent_dim, name='z_mean')(x)
    z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)

    encoder = models.Model(encoder_input, [z_mean, z_log_var], name='encoder')
    return encoder

# Decoder
def build_decoder(latent_dim):
    decoder_input = layers.Input(shape=(latent_dim,))
    x = layers.Dense(8*8*64, activation='relu')(decoder_input)
    x = layers.Reshape((8, 8, 64))(x)
    x = layers.Conv2DTranspose(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    x = layers.Conv2DTranspose(32, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    decoder_output = layers.Conv2DTranspose(3, (3, 3), activation='sigmoid', padding='same')(x)

    decoder = models.Model(decoder_input, decoder_output, name='decoder')
    return decoder

# Latent space sampling function
def sample_z(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# Build VAE model
def build_vae(latent_dim):
    # Encoder
    encoder = build_encoder(latent_dim)
    z_mean, z_log_var = encoder.output

    # Lambda layer for latent space sampling
    z = layers.Lambda(sample_z, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

    # Decoder
    decoder = build_decoder(latent_dim)
    vae_output = decoder(z)

    # VAE model
    vae = models.Model(encoder.input, vae_output, name='vae')
    return vae

# Instantiate and summarize the VAE
vae = build_vae(latent_dim=2)
vae.summary()



In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import load_model

In [None]:
# Assume the previous imports and data loading code is already executed

# Prepare training and validation data
x_train = np.transpose(train_data['X'], (3, 0, 1, 2)).astype('float32') / 255.0
y_train = train_data['y'].flatten()  # Labels for training data

x_test = np.transpose(test_data['X'], (3, 0, 1, 2)).astype('float32') / 255.0
y_test = test_data['y'].flatten()  # Labels for test data

# Display dataset shapes
print(f'Training data shape: {x_train.shape}')  # Should be (num_samples, 32, 32, 3)
print(f'Test data shape: {x_test.shape}')  # Should be (num_samples, 32, 32, 3)

# Now, let's define validation data. For simplicity, we can use a split from the training data
val_split = int(0.8 * len(x_train))  # Using 80% for training and 20% for validation
x_val = x_train[val_split:]
y_val = y_train[val_split:]
x_train = x_train[:val_split]
y_train = y_train[:val_split]

# Callbacks for training
checkpoint_callback = ModelCheckpoint(
    'vae_model.keras',
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)

early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1
)

# Compile and train the VAE
vae.compile(optimizer='adam', loss='binary_crossentropy')

# Fit the model using the defined variables
vae.fit(
    x_train, x_train,  # Input images as both data and labels (unsupervised learning)
    validation_data=(x_val, x_val),  # Validation data for evaluation
    epochs=100,
    callbacks=[checkpoint_callback, early_stopping_callback]
)

# Optionally, evaluate the model on the test set
# Here you would implement testing if necessary




Training data shape: (73257, 32, 32, 3)
Test data shape: (26032, 32, 32, 3)
Epoch 1/100
[1m1831/1832[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 108ms/step - loss: 0.6489
Epoch 1: val_loss improved from inf to 0.63394, saving model to vae_model.keras
[1m1832/1832[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 116ms/step - loss: 0.6489 - val_loss: 0.6339
Epoch 2/100
[1m1831/1832[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 106ms/step - loss: 0.6342
Epoch 2: val_loss improved from 0.63394 to 0.63304, saving model to vae_model.keras
[1m1832/1832[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 113ms/step - loss: 0.6342 - val_loss: 0.6330
Epoch 3/100
[1m1831/1832[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 105ms/step - loss: 0.6333
Epoch 3: val_loss improved from 0.63304 to 0.63284, saving model to vae_model.keras
[1m1832/1832[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 113ms/step - loss: 0.6333 - val_loss: 0.6328
Epoch 4/100


<keras.src.callbacks.history.History at 0x7d1a7b68eec0>

In [None]:
# Split the training data for validation purposes
val_split = int(0.8 * len(x_train))  # Use 80% for training and 20% for validation
x_val = x_train[val_split:]  # Validation images
x_train = x_train[:val_split]  # Training images

# You are using the same images as both input and output because it's unsupervised learning (VAE)
train_data = (x_train, x_train)
val_data = (x_val, x_val)

# ModelCheckpoint and EarlyStopping callbacks were already defined earlier

# Now run the training process
history = vae.fit(
    x_train, x_train,  # Using x_train for both input and output
    epochs=100,  # Number of epochs to train
    batch_size=32,  # Batch size
    validation_data=val_data,  # Validation data
    callbacks=[checkpoint_callback, early_stopping_callback]  # Apply the callbacks
)

# Optionally, plot training history to observe the training and validation loss



Epoch 1/100
[1m1465/1466[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 103ms/step - loss: 0.6318
Epoch 1: val_loss did not improve from 0.63100
[1m1466/1466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 111ms/step - loss: 0.6318 - val_loss: 0.6318
Epoch 2/100
[1m1465/1466[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 103ms/step - loss: 0.6320
Epoch 2: val_loss did not improve from 0.63100
[1m1466/1466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 111ms/step - loss: 0.6320 - val_loss: 0.6342
Epoch 3/100
[1m1465/1466[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 104ms/step - loss: 0.6333
Epoch 3: val_loss did not improve from 0.63100
[1m1466/1466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 111ms/step - loss: 0.6333 - val_loss: 0.6325
Epoch 4/100
[1m1465/1466[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 103ms/step - loss: 0.6329
Epoch 4: val_loss did not improve from 0.63100
[1m1466/1466[0m [32m━━━━━━━━━━━━━━━━━━━━

In [None]:
# Load the saved model with the custom loss function
vae = load_model('vae_model.h5', custom_objects={'vae_loss': vae_loss})

# Continue training the loaded model
history = vae.fit(
    train_data,  # Input data
    epochs=50,  # Number of additional epochs to train
    batch_size=64,  # Batch size of 64
    validation_data=(val_data, val_data),  # Validation data (input is same as output)
    callbacks=[checkpoint_callback, early_stopping_callback]  # Use callbacks again
)




FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'vae_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
history = vae.fit(
    train_data,  # Only pass the input data once
    epochs=50,  # Train for 50 epochs
    batch_size=64,  # Batch size of 64
    validation_data=(val_data, None),  # Pass validation input only, no need for output since it's auto-encoded
    callbacks=[checkpoint_callback, early_stopping_callback]  # Apply the callbacks
)



Epoch 1/50


ValueError: Layer "vae" expects 1 input(s), but it received 2 input tensors. Inputs received: [<tf.Tensor 'data:0' shape=(None, 32, 32, 3) dtype=float32>, <tf.Tensor 'data_1:0' shape=(None, 32, 32, 3) dtype=float32>]

In [None]:


# Clarity
# Most reconstructed images look clear and show the numbers well.
# Some may be a bit blurry or lose some details.

# Distortion
# Some reconstructions look different from the originals.
# This means the model didn't capture everything perfectly.

# Variability
# The dataset has many types of numbers and styles.
# The VAE does okay, but it can struggle with tricky images.

# Generalization
# The VAE works well on new images, showing it learned from the training data.
# Still, some new numbers don't look quite right, which suggests it may have overfitted.

# Conclusion
# Overall, the VAE does a good job but can improve on details and handling different styles.

