In [2]:
%pip install scikit-learn
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.losses import binary_crossentropy
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


2024-03-03 23:11:37.668215: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-03 23:11:37.733219: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-03 23:11:37.733278: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-03 23:11:37.734934: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-03 23:11:37.748918: I tensorflow/core/platform/cpu_feature_guar

In [3]:
def prepare_data(text, num_words):
    # Tokenize the text into words
    words = word_tokenize(text)
    # Create a dictionary mapping words to indices
    word_to_index = {word: i for i, word in enumerate(set(words))}
    # Convert the words to their corresponding indices
    text_indices = np.array([word_to_index[word] for word in words])
    # Convert indices to one-hot vectors
    text_one_hot = tf.keras.utils.to_categorical(text_indices, num_classes=num_words)
    return text_one_hot, word_to_index


In [4]:
def create_vae(input_dim, latent_dim):
    # Encoder
    inputs = Input(shape=(input_dim,))
    h = Dense(64, activation='relu')(inputs)
    z_mean = Dense(latent_dim)(h)
    z_log_var = Dense(latent_dim)(h)
    # Reparameterization trick
    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
        mean=0., stddev=1.)
        return z_mean + K.exp(z_log_var / 2) * epsilon
    z = Lambda(sampling)([z_mean, z_log_var])
    # Decoder
    decoder_h = Dense(64, activation='relu')
    decoder_mean = Dense(input_dim, activation='sigmoid')
    h_decoded = decoder_h(z)
    x_decoded_mean = decoder_mean(h_decoded)
    # VAE model
    vae = Model(inputs, x_decoded_mean)
    # Loss
    reconstruction_loss = binary_crossentropy(inputs, x_decoded_mean) * input_dim
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) -
    K.exp(z_log_var), axis=-1)
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    return vae

In [25]:
def train_vae(vae, data, epochs, batch_size=100):
    vae.compile(optimizer='adam')
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        for i in range(0, len(data), batch_size):
            batch = data[i:i+batch_size]
            loss = vae.train_on_batch(batch, None)
            print(f" Batch {i//batch_size+1}/{len(data)//batch_size}, Loss: {loss[0]:.4f}", end='\r')

        print()


In [6]:
def generate_text(vae, word_to_index, seed_word, length):
    index_to_word = {i: word for word, i in word_to_index.items()}
    current_word = seed_word
    generated_text = [current_word]
    for i in range(length):
        x_pred = np.zeros((1, len(word_to_index)))
        x_pred[0, word_to_index[current_word]] = 1
        next_index = vae.predict(x_pred.reshape(1, -1))[0].argmax() #Reshape the input
        next_word = index_to_word[next_index]
        generated_text.append(next_word)
        current_word = next_word
    return ' '.join(generated_text)

In [7]:
# You can read about style transfer in text for more complicated examples
def add_modern_twist(text):
    shakespeare_to_modern = {
    'thou': 'you',
    'thy': 'your',
    'hast': 'have',
    'art': 'are',
    'doth': 'does',
    'hath': 'has',
    }
    # Tokenize the text into words
    words = word_tokenize(text)
    # Replace old English words with their modern equivalents
    modern_words = [shakespeare_to_modern.get(word, word) for word in
    words]
    return ' '.join(modern_words)


In [8]:
text = open('/home/Goyal/allin.txt').read()

In [9]:
num_words = len(set(word_tokenize(text)))
text_one_hot, word_to_index = prepare_data(text, num_words)


In [10]:
# Create the VAE
vae = create_vae(num_words, 50)
# Train the VAE
train_vae(vae, text_one_hot, epocs=50)
# Generate new text
generated_text = generate_text(vae, word_to_index, 'the', 100)


2024-03-03 23:11:43.044118: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: system has unsupported display driver / cuda driver combination
2024-03-03 23:11:43.044193: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:129] retrieving CUDA diagnostic information for host: Server3A6000
2024-03-03 23:11:43.044203: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:136] hostname: Server3A6000
2024-03-03 23:11:43.044488: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:159] libcuda reported version is: 550.54.14
2024-03-03 23:11:43.044577: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:163] kernel reported version is: 545.29.6
2024-03-03 23:11:43.044588: E external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:244] kernel version 545.29.6 does not match DSO version 550.54.14 -- cannot find working devices in this configuration


Epoch 1/50
 Batch 7/951, Loss: 5044.1040

 Batch 952/951, Loss: 17.91538
Epoch 2/50
 Batch 952/951, Loss: 14.5595
Epoch 3/50
 Batch 952/951, Loss: 14.8349
Epoch 4/50
 Batch 952/951, Loss: 18.7312
Epoch 5/50
 Batch 952/951, Loss: 9.87922
Epoch 6/50
 Batch 952/951, Loss: 8.09195
Epoch 7/50
 Batch 952/951, Loss: 7.7599
Epoch 8/50
 Batch 952/951, Loss: 7.0365
Epoch 9/50
 Batch 952/951, Loss: 7.0522
Epoch 10/50
 Batch 952/951, Loss: 6.9978
Epoch 11/50
 Batch 952/951, Loss: 7.0309
Epoch 12/50
 Batch 952/951, Loss: 6.9746
Epoch 13/50
 Batch 952/951, Loss: 6.8902
Epoch 14/50
 Batch 952/951, Loss: 7.0825
Epoch 15/50
 Batch 952/951, Loss: 6.9219
Epoch 16/50
 Batch 952/951, Loss: 6.9294
Epoch 17/50
 Batch 952/951, Loss: 6.8499
Epoch 18/50
 Batch 952/951, Loss: 6.7423
Epoch 19/50
 Batch 952/951, Loss: 6.7996
Epoch 20/50
 Batch 952/951, Loss: 7.0101
Epoch 21/50
 Batch 952/951, Loss: 6.9459
Epoch 22/50
 Batch 952/951, Loss: 6.7585
Epoch 23/50
 Batch 952/951, Loss: 6.8784
Epoch 24/50
 Batch 952/951, Loss: 6.8997
Epoch 25/50
 Batch 952/951, L

In [13]:
print(generated_text)

the . the the the , , , , , , , , , , , , , , `` . `` `` `` . , , , , , , `` `` `` . . `` `` , , , , , , , , , `` `` `` `` `` `` `` the `` `` `` `` `` `` `` `` `` `` `` `` `` . . . . . `` `` `` `` `` , , , , , , , , , , , , , `` `` `` , , `` `` `` `` ``


In [11]:
altered_text = add_modern_twist(generated_text)
print(altered_text)

the . the the the , , , , , , , , , , , , , , `` . `` `` `` . , , , , , , `` `` `` . . `` `` , , , , , , , , , `` `` `` `` `` `` `` the `` `` `` `` `` `` `` `` `` `` `` `` `` . . . . . `` `` `` `` `` , , , , , , , , , , , , , `` `` `` , , `` `` `` `` ``


In [18]:
# Create the VAE
vae = create_vae(num_words, 100) # changed dimension of latent space
# Train the VAE
train_vae(vae, text_one_hot, epochs=10)
# Generate new text
generated_text = generate_text(vae, word_to_index, 'the', 100)


Epoch 1/10
 Batch 952/951, Loss: 17.27918
Epoch 2/10
 Batch 952/951, Loss: 16.2628
Epoch 3/10
 Batch 952/951, Loss: 16.4719
Epoch 4/10
 Batch 952/951, Loss: 12.7833
Epoch 5/10
 Batch 952/951, Loss: 9.58162
Epoch 6/10
 Batch 952/951, Loss: 7.30930
Epoch 7/10
 Batch 952/951, Loss: 7.0495
Epoch 8/10
 Batch 952/951, Loss: 7.0534
Epoch 9/10
 Batch 952/951, Loss: 7.2230
Epoch 10/10
 Batch 952/951, Loss: 6.9677


In [19]:
print(generated_text)

the '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' , '' '' '' '' '' '' '' '' '' '' '' '' , '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' ''


In [20]:
altered_text = add_modern_twist(generated_text)
print(altered_text)

the `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` , `` `` `` `` `` `` `` `` `` `` `` `` , `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` ``


Changes made:

I added the KL divergence loss as an additional metric to the model using add_metric. This helps monitor the KL divergence during training.

I replaced sigmoid activation with linear activation for the decoder_mean layer. 

In [21]:
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras.losses import binary_crossentropy
import keras.backend as K

def create_vae(input_dim, latent_dim):
    # Encoder
    inputs = Input(shape=(input_dim,))
    h = Dense(64, activation='relu')(inputs)
    z_mean = Dense(latent_dim)(h)
    z_log_var = Dense(latent_dim)(h)

    # Reparameterization trick
    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
                                  mean=0., stddev=1.)
        return z_mean + K.exp(z_log_var / 2) * epsilon

    z = Lambda(sampling)([z_mean, z_log_var])

    # Decoder
    decoder_h = Dense(64, activation='relu')
    decoder_mean = Dense(input_dim, activation='sigmoid')
    h_decoded = decoder_h(z)
    x_decoded_mean = decoder_mean(h_decoded)

    # VAE model
    vae = Model(inputs, x_decoded_mean)

    # Loss
    reconstruction_loss = binary_crossentropy(inputs, x_decoded_mean) * input_dim
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    vae_loss = K.mean(reconstruction_loss + kl_loss)

    # Add the KL divergence loss as an additional metric
    vae.add_metric(kl_loss, name='kl_loss')
    
    # Set the loss function
    vae.add_loss(vae_loss)

    return vae

In [26]:
# Create the VAE
vae = create_vae(num_words, 50)
# Train the VAE
train_vae(vae, text_one_hot, epochs=5)
# Generate new text
generated_text = generate_text(vae, word_to_index, 'the', 100)

Epoch 1/5
 Batch 952/951, Loss: 17.88472
Epoch 2/5
 Batch 952/951, Loss: 15.4405
Epoch 3/5
 Batch 952/951, Loss: 14.3518
Epoch 4/5
 Batch 952/951, Loss: 12.3553
Epoch 5/5
 Batch 952/951, Loss: 10.7540


In [27]:
altered_text = add_modern_twist(generated_text)
print(altered_text)

the `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` `` ``
