In [1]:
import numpy as np, tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os, numpy as np, pandas as pd

KeyboardInterrupt: 

In [3]:
folder = "tep"
files = sorted([f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))])

X_list = []
for f in files:
    temp = pd.read_csv(f"{folder}/{f}").to_numpy()
    temp = temp[:, 1:] # ensure matrix, no header row
    X_list.append(temp)

X = np.stack(X_list, axis=0)
X = X[..., None]
print(X.shape)

(800, 300, 50, 1)


In [4]:
inp = keras.Input(shape=(X.shape[1], X.shape[2], 1), name="encoder_input")

# Encoder (all CNN)
x = layers.Conv2D(32, 3, strides=2, padding="same", activation="relu")(inp)
x = layers.Conv2D(64, 3, strides=2, padding="same", activation="relu")(x)
x = layers.Conv2D(128,3, strides=2, padding="same", activation="relu")(x)
latent_map = layers.Conv2D(256,3, strides=2, padding="same", activation="relu", name="latent_map")(x)

# Decoder
y = layers.Conv2DTranspose(128,3, strides=2, padding="same", activation="relu")(latent_map)
y = layers.Conv2DTranspose(64, 3, strides=2, padding="same", activation="relu")(y)
y = layers.Conv2DTranspose(32, 3, strides=2, padding="same", activation="relu")(y)
y = layers.Conv2DTranspose(1,  3, strides=2, padding="same")(y)
y = layers.Activation("sigmoid", name="recon_prob")(y)
y = layers.CenterCrop(X.shape[1], X.shape[2])(y)

auto = keras.Model(inp, y, name="autoencoder")


auto.compile(optimizer=keras.optimizers.Adam(1e-3),
             loss=keras.losses.BinaryCrossentropy(from_logits=False),
             metrics=[keras.metrics.BinaryAccuracy(threshold=0.5,name="bin_acc")])

auto.summary()

history = auto.fit(
    X, X,
    epochs=30,
    batch_size=min(64, X.shape[0]),
    validation_split=0.2,
    verbose=1
)


enc_out = auto.get_layer("latent_map").output
encoder_flat = keras.Model(
    inputs=auto.input,
    outputs=layers.Flatten(name="latent_flat")(enc_out),
    name="encoder_flat",
)

# Z = encoder_flat.predict(X, verbose=0)


Epoch 1/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - bin_acc: 0.7883 - loss: 0.6875 - val_bin_acc: 0.8328 - val_loss: 0.5827
Epoch 2/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - bin_acc: 0.8987 - loss: 0.4986 - val_bin_acc: 0.8410 - val_loss: 0.4794
Epoch 3/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - bin_acc: 0.9050 - loss: 0.3511 - val_bin_acc: 0.8410 - val_loss: 0.4091
Epoch 4/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2s/step - bin_acc: 0.9047 - loss: 0.2864 - val_bin_acc: 0.8410 - val_loss: 0.3692
Epoch 5/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2s/step - bin_acc: 0.9051 - loss: 0.2605 - val_bin_acc: 0.8414 - val_loss: 0.3587
Epoch 6/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - bin_acc: 0.9077 - loss: 0.2479 - val_bin_acc: 0.8489 - val_loss: 0.3434
Epoch 7/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [5]:
def latent_l2_distance(A: np.ndarray, B: np.ndarray, encoder: keras.Model) -> float:
    """
    Compute L2 distance between latent representations of two 2D binary arrays.
    A, B shape: (m, v). Encoder is from build_and_train_cnn_autoencoder.
    """
    assert A.ndim == 2 and B.ndim == 2, "A and B must be 2D arrays"
    A_in = A.astype(np.float32)[None, ..., None]  # (1, m, v, 1)
    B_in = B.astype(np.float32)[None, ..., None]  # (1, m, v, 1)
    zA = encoder.predict(A_in, verbose=0)[0]
    zB = encoder.predict(B_in, verbose=0)[0]
    return float(np.linalg.norm(zA - zB, ord=2))

In [37]:
latent_l2_distance(X[10, :, :, 0], X[550, :, :, 0], encoder_flat)

245.53582763671875