In [1]:
import random
import lr_scheduler
import matplotlib.pyplot as plt
#from func import *
import func
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler
from PIL import Image
import cv2
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.initializers import GlorotUniform
from PIL import Image, ImageEnhance

folder_path = "F:/code/barlow/UR5"
file_pairs = func.load_file_pairs(folder_path)

images = []
npy_file = []
for jpg_path, npy_path in file_pairs:
    images.append(func.read_jpg_files(jpg_path))
    npy_file.append(func.read_and_parse_npy_file(npy_path))

In [2]:
AUTO = tf.data.AUTOTUNE
CROP_TO = 32
SEED = 42

PROJECT_DIM = 2048
BATCH_SIZE = 32
EPOCHS = 10

### Traning

In [3]:
# Augmentation Functions

def augment_image(image, crop_ratio=0.05):
    h, w = image.shape
    # Calculate crop dimensions
    crop_h = int(h * crop_ratio)
    crop_w = int(w * crop_ratio)

    # cropping
    cropped_image = image[crop_h:h-crop_h, crop_w:w-crop_w]

    # Resize back to original
    resized_image = cv2.resize(cropped_image, (w, h), interpolation=cv2.INTER_LINEAR)

    return resized_image

# No2. Augmentation Functions
def adjust_brightness(image, factor=1.1):
    """
    scale pixel values
    """
    adjusted = np.clip(image * factor, 0, 255).astype(np.uint8)
    return adjusted

def add_noise_vector(vector,alpha= 0.01):
    noise = np.random.normal(0, alpha, size=len(vector)) # alpha standard deviation
    augmented_vector = [v + n for v, n in zip(vector, noise)]
    #augmented_vector = [i.astype(float) for i in augmented_vector]
    return augmented_vector


In [4]:
STEPS_PER_EPOCH = 597 // BATCH_SIZE
TOTAL_STEPS = STEPS_PER_EPOCH * EPOCHS
WARMUP_EPOCHS = int(EPOCHS * 0.1)
WARMUP_STEPS = int(WARMUP_EPOCHS * STEPS_PER_EPOCH)

In [5]:
# create train loss data
scaler = StandardScaler()

# image
image1 = [augment_image(img) for img in images]
image2 = [adjust_brightness(img, factor=1.2) for img in images]

# noise
npy_file_1 = [add_noise_vector(i,alpha= 0.05) for i in npy_file]
npy_file_2 = [add_noise_vector(i,alpha= 0.10) for i in npy_file]

# Normalize
image1 = [(i.astype("float32") / 255.0) for i in image1]
image2 = [(i.astype("float32") / 255.0) for i in image2]
npy_file_1 = scaler.fit_transform(npy_file_1)
npy_file_2 = scaler.fit_transform(npy_file_2)


img_ds_1 = tf.data.Dataset.from_tensor_slices(image1).batch(BATCH_SIZE)
img_ds_2 = tf.data.Dataset.from_tensor_slices(image2).batch(BATCH_SIZE)
npy_ds_1 = tf.data.Dataset.from_tensor_slices(npy_file_1).batch(BATCH_SIZE)
npy_ds_2 = tf.data.Dataset.from_tensor_slices(npy_file_2).batch(BATCH_SIZE)

# Combine image và vector thành input pairs
BL_ds = tf.data.Dataset.zip((img_ds_1, img_ds_2, npy_ds_1, npy_ds_2)).prefetch(tf.data.AUTOTUNE)
BL_ds_test = BL_ds

In [6]:
for batch in BL_ds_test.take(1):
    (x1, x2, y1, y2) = batch
    print("Image 1 shape:", x1.shape)  
    print("Image 2 shape:", x2.shape)  
    print("Vector 1 shape:", y1.shape)  
    print("Vector 2 shape:", y2.shape)  

Image 1 shape: (32, 128, 128)
Image 2 shape: (32, 128, 128)
Vector 1 shape: (32, 8)
Vector 2 shape: (32, 8)


In [7]:
# Barlow Twin Loss
def off_diagonal(x):
    n = tf.shape(x)[0]
    flattened = tf.reshape(x, [-1])[:-1]
    off_diagonals = tf.reshape(flattened, (n - 1, n + 1))[:, 1:]
    return tf.reshape(off_diagonals, [-1])

def normalize_repr(z):
    z_norm = (z - tf.reduce_mean(z, axis=0)) / (tf.math.reduce_std(z, axis=0) + 1e-8)
    return z_norm

def compute_loss(z_a, z_b, lambd = 5e-3):
    batch_size = tf.cast(tf.shape(z_a)[0], z_a.dtype)
    repr_dim = tf.shape(z_a)[1]

    # Normalize the representations along the batch dimension.
    z_a_norm = normalize_repr(z_a)
    z_b_norm = normalize_repr(z_b)

    # Cross-correlation matrix.
    c = tf.matmul(z_a_norm, z_b_norm, transpose_a=True) / batch_size

    # Loss.
    on_diag = tf.linalg.diag_part(c) + (-1)
    on_diag = tf.reduce_sum(tf.pow(on_diag, 2))
    off_diag = off_diagonal(c)
    off_diag = tf.reduce_sum(tf.pow(off_diag, 2))
    loss = on_diag + (lambd * off_diag)
    return loss   


In [8]:
def network_1():
    inputs = layers.Input(shape=(128, 128, 1), name="image_input") 
    x = tf.keras.layers.Conv2D(128, (3, 3), activation="relu")(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.3)(x)

    x = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.3)(x)

    x = tf.keras.layers.Flatten()(x)
    outputs = tf.keras.layers.Dense(30, activation="relu")(x)  # Image embedding
    return tf.keras.Model(inputs, outputs, name="ImageNetwork")

def network_2():
    inputs = layers.Input(shape=(8,), name="npy_input")
    outputs = tf.keras.layers.Dense(10, activation="relu")(inputs)  # Joint embedding
    return tf.keras.Model(inputs, outputs, name="JointStateNetwork")


def combined_model():
    image_input = layers.Input(shape=(128, 128, 1), name="image_input")
    npy_input = layers.Input(shape=(8,), name="npy_input")
    
    image_output = network_1()(image_input)
    npy_output = network_2()(npy_input)
    
    combined = layers.Concatenate()([image_output, npy_output])
    latent_space = layers.Dense(20, activation="relu", name="latent_space")(combined)
    
    return tf.keras.Model(inputs=[image_input, npy_input], outputs=latent_space, name="CombinedModel")

@tf.function
def train_step(image1, image2, npy1, npy2, network1, network2, optimizer, lambda1, lambda2):
    with tf.GradientTape() as tape:
        # Forward pass for images
        z_img1 = network1(image1, training=True)
        z_img2 = network1(image2, training=True)
        loss1 = compute_loss(z_img1, z_img2)

        # Forward pass for joints
        z_npy1 = network2(npy1, training=True)
        z_npy2 = network2(npy2, training=True)
        loss2 = compute_loss(z_npy1, z_npy2)

        # Total loss
        # with lambda = 1/Dimension
        total_loss = loss1 * lambda1 + loss2 * lambda2
    
    gradients = tape.gradient(total_loss, network1.trainable_variables + network2.trainable_variables)
    optimizer.apply_gradients(zip(gradients, network1.trainable_variables + network2.trainable_variables))
    
    return {"loss_total": total_loss, "loss1": loss1, "loss2": loss2}


In [9]:
lr_decayed_fn = lr_scheduler.WarmUpCosine(
    learning_rate_base=1e-4,
    total_steps=EPOCHS * STEPS_PER_EPOCH,
    warmup_learning_rate=0.0,
    warmup_steps=WARMUP_STEPS
)

#lr_decayed_fn = 5e-4     # 0.005 = 5e-3

model = combined_model()
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_decayed_fn, momentum=0.9)
lambd = 5e-3 
EPOCHS = 20
lambda1 = 1/16
lambda2 = 1
network1 = network_1()
network2 = network_2()

for epoch in range(EPOCHS):
    for image1_batch, image2_batch, npy1_batch, npy2_batch in BL_ds_test:
        losses = train_step(image1_batch, image2_batch, npy1_batch, npy2_batch, network1, network2, optimizer, lambda1, lambda2)
    
    print(f"Epoch {epoch+1}, Total Loss: {losses['loss_total']:.4f}, Loss1: {losses['loss1']:.4f}, Loss2: {losses['loss2']:.4f}")


Epoch 1, Total Loss: 10.6736, Loss1: 42.1321, Loss2: 8.0403
Epoch 2, Total Loss: 8.9237, Loss1: 16.4785, Loss2: 7.8938
Epoch 3, Total Loss: 8.5998, Loss1: 15.5495, Loss2: 7.6279
Epoch 4, Total Loss: 8.0723, Loss1: 12.8648, Loss2: 7.2683
Epoch 5, Total Loss: 7.6492, Loss1: 11.7760, Loss2: 6.9132
Epoch 6, Total Loss: 7.3995, Loss1: 12.0388, Loss2: 6.6471
Epoch 7, Total Loss: 7.2920, Loss1: 12.9254, Loss2: 6.4841
Epoch 8, Total Loss: 7.2225, Loss1: 11.8166, Loss2: 6.4840
Epoch 9, Total Loss: 7.3166, Loss1: 13.3224, Loss2: 6.4839
Epoch 10, Total Loss: 7.2230, Loss1: 11.8260, Loss2: 6.4839
Epoch 11, Total Loss: 7.2197, Loss1: 11.7726, Loss2: 6.4839
Epoch 12, Total Loss: 7.2756, Loss1: 12.6673, Loss2: 6.4839
Epoch 13, Total Loss: 7.2228, Loss1: 11.8222, Loss2: 6.4839
Epoch 14, Total Loss: 7.2778, Loss1: 12.7019, Loss2: 6.4839
Epoch 15, Total Loss: 7.2844, Loss1: 12.8075, Loss2: 6.4839
Epoch 16, Total Loss: 7.2224, Loss1: 11.8156, Loss2: 6.4839
Epoch 17, Total Loss: 7.2293, Loss1: 11.9254, Lo

In [10]:
# func
def process_data(data_list):
    a = []
    b = []
    for jpg_path, npy_path in data_list:
        a.append(func.read_jpg_files(jpg_path))
        b.append(func.read_and_parse_npy_file(npy_path))
    return a,b

# create train test dataset
train_similar, train_dissimilar, test_similar, test_dissimilar = func.split_and_shuffle_pairs(file_pairs, folder_path)

xtrain1 = process_data(train_similar)
xtrain2 = process_data(train_dissimilar)
xtest1 = process_data(test_similar)
xtest2 = process_data(test_dissimilar)

# label 1 - similar, 0 - disimilar
ytrain1 = [1] * 500
ytrain2 = [0] * 500
ytest1 = [1] *79
ytest2 = [0] *79

#
t1 = np.array([np.expand_dims(item, axis=-1) for item in xtrain1[0]])  # Expand dims
t1 = t1.astype("float32") / 255.0  # Normalize to [0, 1]
t2 = np.array([item for item in xtrain1[1]])  # NPY input
t2 = scaler.fit_transform(t2)  # Standardize to mean=0, std=1
train_ds1 = tf.data.Dataset.from_tensor_slices(((t1, t2), ytrain1))

#
t3 = np.array([np.expand_dims(item, axis=-1) for item in xtrain2[0]])  # Expand dims
t3 = t3.astype("float32") / 255.0  # Normalize to [0, 1]
t4 = np.array([item for item in xtrain2[1]])  # NPY input
t4 = scaler.transform(t4)  # Sử dụng scaler đã fit từ trước
train_ds2 = tf.data.Dataset.from_tensor_slices(((t3, t4), ytrain2))


# concat
train_ds = train_ds1.concatenate(train_ds2)
train_ds = train_ds.shuffle(buffer_size=1000, seed=226)
train_ds = train_ds.batch(32).prefetch(tf.data.AUTOTUNE)

#
t5 = np.array([np.expand_dims(item, axis=-1) for item in xtest1[0]])  # Expand dims
t5 = t5.astype("float32") / 255.0  # Normalize to [0, 1]
t6 = np.array([item for item in xtest1[1]])  # NPY input
t6 = scaler.transform(t6)  # Sử dụng scaler đã fit từ dữ liệu training
test_ds1 = tf.data.Dataset.from_tensor_slices(((t5, t6), ytest1))

#
t7 = np.array([np.expand_dims(item, axis=-1) for item in xtest2[0]])  # Expand dims
t7 = t7.astype("float32") / 255.0  # Normalize to [0, 1]
t8 = np.array([item for item in xtest2[1]])  # NPY input
t8 = scaler.transform(t8)  # Sử dụng scaler đã fit từ dữ liệu training
test_ds2 = tf.data.Dataset.from_tensor_slices(((t7, t8), ytest2))

#
test_ds = test_ds1.concatenate(test_ds2)
test_ds = test_ds.shuffle(buffer_size=1000, seed=226)
test_ds = test_ds.batch(32).prefetch(tf.data.AUTOTUNE)

In [11]:
class FrozenConcatenate(tf.keras.layers.Layer):
    """Custom layer to concatenate tensors with trainable=False."""
    def __init__(self, **kwargs):
        super(FrozenConcatenate, self).__init__(**kwargs)
        self.trainable = False  # Set trainable=False explicitly
    
    def call(self, inputs):
        return tf.concat(inputs, axis=-1)

def create_downstream_task_model(freeze=True):
    # Load pretrained sub-networks
    image_network = network_1()
    joint_network = network_2()

    # Freeze hoặc unfreeze các lớp của pretrained models
    if freeze:
        image_network.trainable = False
        joint_network.trainable = False
    else:
        image_network.trainable = True
        joint_network.trainable = True

    # Input layers
    image_input = layers.Input(shape=(128, 128, 1), name="image_input")
    joint_input = layers.Input(shape=(8,), name="joint_input")

    # Feature extraction
    image_embedding = image_network(image_input)
    joint_embedding = joint_network(joint_input)

    # Concatenate embeddings using custom frozen layer
    combined_embedding = FrozenConcatenate(name="frozen_concatenate")([image_embedding, joint_embedding])

    # Add a classification head
    output = layers.Dense(2, activation="softmax", name="classification")(combined_embedding)

    # Create model
    downstream_model = tf.keras.Model(inputs=[image_input, joint_input], outputs=output, name="DownstreamTaskModel")
    
    return downstream_model


In [12]:
# Sau khi tạo downstream model
downstream_model = create_downstream_task_model(freeze=True)

# Kiểm tra lại trọng số của các lớp cuối cùng trong downstream task
print("Trọng số lớp cuối network_1 trong downstream model:")
print(downstream_model.get_layer("ImageNetwork").layers[-1].get_weights())

print("Trọng số lớp cuối network_2 trong downstream model:")
print(downstream_model.get_layer("JointStateNetwork").layers[-1].get_weights())


for layer in downstream_model.layers:
    print(f"Layer {layer.name} trainable: {layer.trainable}")



Trọng số lớp cuối network_1 trong downstream model:
[array([[ 1.7791046e-03,  1.6006832e-03, -2.6084578e-03, ...,
         2.1543987e-03,  1.8573399e-03,  8.8305678e-05],
       [-4.1144942e-03, -3.5310960e-03,  4.3727746e-03, ...,
         2.3173885e-03,  1.1221957e-03,  4.1901181e-03],
       [-3.5800021e-03, -1.7465944e-03,  2.8693588e-03, ...,
        -1.4517049e-03, -1.2668660e-03,  4.7524245e-03],
       ...,
       [-1.8497983e-03,  1.4876155e-03, -4.2687636e-05, ...,
         3.4739599e-03, -1.8873890e-03, -4.6235183e-04],
       [-2.1921620e-03,  9.4010960e-04, -1.6554489e-03, ...,
         3.0351132e-03,  1.1770888e-03, -6.0964189e-04],
       [ 3.6701662e-03, -3.7156683e-03, -4.5210710e-03, ...,
         7.6347264e-04,  1.8666014e-03,  2.9561697e-03]], dtype=float32), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)]
Trọng số lớp cuối network_2 trong downstream model:
[array

In [13]:
# Compile the model
downstream_model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.0005),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

# Train the model
history = downstream_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=20,
    batch_size=32,
    verbose=1
)

Epoch 1/20




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.5158 - loss: 0.6980 - val_accuracy: 0.4937 - val_loss: 0.7020
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.5030 - loss: 0.7013 - val_accuracy: 0.4937 - val_loss: 0.7019
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.5366 - loss: 0.6934 - val_accuracy: 0.4937 - val_loss: 0.7018
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.5116 - loss: 0.7012 - val_accuracy: 0.5000 - val_loss: 0.7018
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.5437 - loss: 0.6935 - val_accuracy: 0.5000 - val_loss: 0.7017
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.5120 - loss: 0.6973 - val_accuracy: 0.5000 - val_loss: 0.7017
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━

In [14]:
# print
# Predict on the test dataset
predictions = downstream_model.predict(test_ds)
# Extract the true labels from the test dataset
true_labels = np.concatenate([y for x, y in test_ds], axis=0)
# Print predictions and true labels
for i, prediction in enumerate(predictions):
    predicted_label = np.argmax(prediction)  # Get the predicted class index
    print(f"Sample {i} --- Result:{prediction}")
    print(f"Predicted Label = {predicted_label}, True Label = {true_labels[i]}")


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Sample 0 --- Result:[0.60739094 0.39260906]
Predicted Label = 0, True Label = 0
Sample 1 --- Result:[0.4211709 0.5788291]
Predicted Label = 1, True Label = 0
Sample 2 --- Result:[0.38916326 0.6108368 ]
Predicted Label = 1, True Label = 1
Sample 3 --- Result:[0.41653994 0.58346003]
Predicted Label = 1, True Label = 0
Sample 4 --- Result:[0.46937034 0.53062963]
Predicted Label = 1, True Label = 0
Sample 5 --- Result:[0.53009886 0.4699011 ]
Predicted Label = 0, True Label = 0
Sample 6 --- Result:[0.60917145 0.39082852]
Predicted Label = 0, True Label = 0
Sample 7 --- Result:[0.4593875  0.54061246]
Predicted Label = 1, True Label = 1
Sample 8 --- Result:[0.525556 0.474444]
Predicted Label = 0, True Label = 0
Sample 9 --- Result:[0.35477707 0.64522296]
Predicted Label = 1, True Label = 0
Sample 10 --- Result:[0.3671954 0.6328046]
Predicted Label = 1, True Label = 1
Sample 11 --- Result:[0.50526565 0.49473438]
Predicted 