In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import numpy as np
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity




In [None]:
# Constants
DB_SEED_NUMBER = 42
QUERY_SEED_START = 0
QUERY_SEED_END = 1000
size = 1_000_000
DIMENSION = 70
LATENT_DIM = 20
MODEL_PATH = f"autoencoder_model_{size}.keras"

In [None]:
rng = np.random.default_rng(DB_SEED_NUMBER)
vectors = rng.random((size, DIMENSION), dtype=np.float32)

In [None]:
query_vectors = []
for seed in range(QUERY_SEED_START, QUERY_SEED_END + 1):
    query_rng = np.random.default_rng(seed)
    query_vectors.append(query_rng.random((1, DIMENSION), dtype=np.float32))

In [None]:
query_vectors = np.vstack(query_vectors)
vectors = np.vstack([vectors, query_vectors])

In [None]:
# Encoder
input_layer = layers.Input(shape=(DIMENSION,), name="input_layer")
encoded = layers.Dense(240, activation='relu', name="encoder_dense_0")(input_layer) 
encoded = layers.Dense(220, activation='relu', name="encoder_dense_1")(encoded)
encoded = layers.Dense(180, activation='relu', name="encoder_dense_2")(encoded)
encoded = layers.Dense(150, activation='relu', name="encoder_dense_3")(encoded)
encoded = layers.Dense(120, activation='relu', name="encoder_dense_4")(encoded)
encoded = layers.Dense(100, activation='relu', name="encoder_dense_5")(encoded)
encoded = layers.Dense(80, activation='relu', name="encoder_dense_6")(encoded)
encoded = layers.Dense(60, activation='relu', name="encoder_dense_7")(encoded)
encoded = layers.Dense(40, activation='relu', name="encoder_dense_8")(encoded)
encoded = layers.Dense(LATENT_DIM, activation='relu', name="latent_space")(encoded)

# Decoder
decoded = layers.Dense(40, activation='relu', name="decoder_dense_1")(encoded)
decoded = layers.Dense(60, activation='relu', name="decoder_dense_2")(decoded)
decoded = layers.Dense(80, activation='relu', name="decoder_dense_3")(decoded)
decoded = layers.Dense(100, activation='relu', name="decoder_dense_4")(decoded)
decoded = layers.Dense(120, activation='relu', name="decoder_dense_5")(decoded)
decoded = layers.Dense(150, activation='relu', name="decoder_dense_6")(decoded)
decoded = layers.Dense(180, activation='relu', name="decoder_dense_7")(decoded)
decoded = layers.Dense(220, activation='relu', name="decoder_dense_8")(decoded) 
decoded = layers.Dense(240, activation='relu', name="decoder_dense_9")(decoded)
decoded = layers.Dense(DIMENSION, activation='sigmoid', name="output_layer")(decoded)


# Autoencoder model
autoencoder = models.Model(input_layer, decoded)

encoder = models.Model(input_layer, encoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
autoencoder.fit(vectors, vectors, epochs=25, batch_size=256, shuffle=True)

In [None]:
autoencoder.save(MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")

In [None]:
encoded_vectors = encoder.predict(vectors)  

print(f"Original Vectors Shape: {vectors.shape}")
print(f"Compressed Vectors Shape (Latent Space): {encoded_vectors.shape}")

In [None]:
# Load the autoencoder model
autoencoder = tf.keras.models.load_model('autoencoder_model_1000000.keras')

for layer in autoencoder.layers:
    print(layer.name)

# Extract the encoder model from the autoencoder
encoder = tf.keras.models.Model(autoencoder.input, autoencoder.get_layer('latent_space').output)

# Extract the decoder model from the autoencoder
decoder_input = tf.keras.layers.Input(shape=(20,)) 
decoder_layer_1 = autoencoder.get_layer('decoder_dense_1')
decoder_output_1 = decoder_layer_1(decoder_input)
decoder_layer_2 = autoencoder.get_layer('decoder_dense_2')
decoder_output_2 = decoder_layer_2(decoder_output_1)
decoder_layer_3 = autoencoder.get_layer('decoder_dense_3')
decoder_output_3 = decoder_layer_3(decoder_output_2)
decoder_layer_4 = autoencoder.get_layer('decoder_dense_4')
decoder_output_4 = decoder_layer_4(decoder_output_3)
decoder_layer_5 = autoencoder.get_layer('decoder_dense_5')
decoder_output_5 = decoder_layer_5(decoder_output_4)
decoder_layer_6 = autoencoder.get_layer('decoder_dense_6')
decoder_output_6 = decoder_layer_6(decoder_output_5)
decoder_layer_7 = autoencoder.get_layer('decoder_dense_7')
decoder_output_7 = decoder_layer_7(decoder_output_6)
decoder_layer_8 = autoencoder.get_layer('decoder_dense_8')
decoder_output_8 = decoder_layer_8(decoder_output_7)
decoder_layer_9 = autoencoder.get_layer('decoder_dense_9')
decoder_output_9 = decoder_layer_9(decoder_output_8)
decoder_output = autoencoder.get_layer('output_layer')(decoder_output_9)

# Create the full decoder model
decoder = tf.keras.models.Model(decoder_input, decoder_output)

# Generate a random vector for testing
random_vector_index = np.random.randint(0, vectors.shape[0])
test_vector = vectors[random_vector_index].reshape(1, -1)

print("Original Vector:")
print(test_vector)

# Encode the vector
encoded_vector = encoder.predict(test_vector)
print("\nEncoded Vector:")
print(encoded_vector)

# Decode the encoded vector using the decoder
decoded_vector = decoder.predict(encoded_vector)
print("\nDecoded Vector:")
print(decoded_vector)

# Measure the similarity between the original and decoded vectors using cosine similarity
similarity = cosine_similarity(test_vector, decoded_vector)
print(f"\nCosine Similarity between Original and Decoded Vector: {similarity[0][0]}")

print(f"Original Vectors Shape: {vectors.shape}")
print(f"Compressed Vectors Shape (Latent Space): {encoded_vector.shape}")
