# Importing the required libraries

In [9]:
import os
import numpy as np
import tensorflow as tf
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import glob 
import shutil
import keras_tuner
from tensorflow import keras
from keras import backend as K
from keras_tuner.tuners import RandomSearch
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Subtract, concatenate, Input, Flatten, Activation, Dense, Dropout, Lambda, Conv2D, BatchNormalization, MaxPooling2D
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping

# Utility functions

In [2]:
def test_accuracy(model):
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print("Test Loss:", test_loss)
    print("Test Accuracy:", test_accuracy)
    
def plot_loss(history):
    # Historique des valeurs de précision d'entraînement et de validation
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Historique des numéros d'époque
    epochs = range(1, len(train_loss) + 1)

    # Tracer la courbe de précision d'entraînement
    plt.plot(epochs, train_loss, 'b', label='Train Loss')
    # Tracer la courbe de précision de validation
    plt.plot(epochs, val_loss, 'r', label='Validation Loss')
    plt.title('Training and Validation Losses')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Afficher le graphique
    plt.show()
    
    
def plot_accuracy(history):
    # Historique des valeurs de précision d'entraînement et de validation
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']

    # Historique des numéros d'époque
    epochs = range(1, len(train_accuracy) + 1)

    # Tracer la courbe de précision d'entraînement
    plt.plot(epochs, train_accuracy, 'b', label='Train Accuracy')
    # Tracer la courbe de précision de validation
    plt.plot(epochs, val_accuracy, 'r', label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Afficher le graphique
    plt.show()
    
def predicting_on_dataset(X_pred, model):
    # Select a subset of the test data for visualization
    subset_size = 300
    X_subset = X_pred[:subset_size]

    # Make predictions on the subset of test data
    predictions = model.predict(X_subset)

    # Plot the images and predictions
    fig, axes = plt.subplots(subset_size, 2, figsize=(10, subset_size*2))
    for i in range(subset_size):
        # Plot first image
        axes[i, 0].imshow(X_subset[0][i])
        axes[i, 0].axis('off')

        # Plot second image
        axes[i, 1].imshow(X_subset[1][i])
        axes[i, 1].axis('off')

        # Add predicted score as title
        score = predictions[i]  # Assuming the second element represents the score
        axes[i, 1].set_title(score)

    plt.tight_layout()
    plt.show()

# Preparing the data

In [3]:
data = pd.read_csv("data\question_1\duels_question_1.csv",usecols=[0,1,2], header=None)
data.columns = ["Image 1", "Image 2", "labels"]

#Deleting the no preference data
data = data[data["labels"] != "No preference"]

### Splitting and formatting the data for the comparison model using the duels data

In [4]:
shape = 224

def prepare_dataset_arrays(image_folder, data, shape):

    image1_names = data.iloc[:,0].values
    image2_names = data.iloc[:,1].values
    labels = data.iloc[:,2].values

    image1_array = []
    image2_array = []
    
    for image1_name, image2_name in zip(image1_names, image2_names):
        for filename in os.listdir(image_folder):
            if image1_name in filename:
                image1_path = os.path.join(image_folder, filename)
                image1 = cv2.imread(image1_path)
                image1 = cv2.resize(image1, (shape, shape))
                image1 = image1.astype(np.float32) / 255.0
                image1_array.append(image1)
            elif image2_name in filename:
                image2_path = os.path.join(image_folder, filename)
                image2 = cv2.imread(image2_path)
                image2 = cv2.resize(image2, (shape, shape))
                image2 = image2.astype(np.float32) / 255.0
                image2_array.append(image2)
                
    return image1_array, image2_array, labels

image1_array, image2_array, labels = prepare_dataset_arrays("data\question_1\Sample_web_green", data, shape)


### Creating the prediction dataset

In [5]:
def prepare_prediction_siamese(directory, shape):
    image_pred = []
    for img in glob.glob(directory):
        image1 = cv2.imread(img)
        image1 = cv2.resize(image1, (shape, shape))
        image1 = image1.astype(np.float32) / 255.0
        image_pred.append(image1)
    
    image_pred_1 = tf.convert_to_tensor(np.array(image_pred[:300])) 
    image_pred_2 = tf.convert_to_tensor(np.array(image_pred[300:600])) 

    X_pred = [image_pred_1, image_pred_2]
    
    return X_pred

X_pred = prepare_prediction_siamese("data/question_1/ForPrediction/*/*", shape)

### Creating the Training, Validation and Testing datasets with a split of (60%, 20%, 20%)

In [22]:
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
def prepare_dataset_for_network(image1_array, image2_array, labels):
    # Format the labels of left and right images
    labels_formatted = []

    for label in labels:
        if label == "left":
            labels_formatted.append([1,0])  # 0 represents left image
        elif label == "right":
            labels_formatted.append([0,1])  # 1 represents right image

    labels_formatted = np.array(labels_formatted)

    # Conversion of the lists into numpy arrays
    image1_array = np.array(image1_array)
    image2_array = np.array(image2_array)

    labels_formatted = tf.convert_to_tensor(labels_formatted)

    image1_array = tf.convert_to_tensor(image1_array)
    image2_array = tf.convert_to_tensor(image2_array)
    
    # Split the data into training, validation, and test sets using array slicing
    train_size = int(0.6 * len(image1_array))
    valid_size = int(0.2 * len(image1_array))

    X_train = [image1_array[:train_size], image2_array[:train_size]]
    y_train = labels_formatted[:train_size]

    X_valid = [image1_array[train_size:train_size + valid_size], image2_array[train_size:train_size + valid_size]]
    y_valid = labels_formatted[train_size:train_size + valid_size]

    X_test = [image1_array[train_size + valid_size:], image2_array[train_size + valid_size:]]
    y_test = labels_formatted[train_size + valid_size:]
    
    return (X_train, y_train), (X_valid, y_valid), (X_test, y_test)

(X_train, y_train), (X_valid, y_valid), (X_test, y_test) = prepare_dataset_for_network(image1_array, image2_array, labels)

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

# Building the siamese network

## Building the model for the comparison between the two pictures

In [21]:
def comparison_siamese_model(input_shape):

    base_model = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers[:-4]:
        layer.trainable=False

    # Create inputs for pairs of images
    input_1 = Input(shape=input_shape)
    input_2 = Input(shape=input_shape)

    # Get embeddings of the images using the shared VGG19 model
    output_1 = base_model(input_1)
    output_2 = base_model(input_2)

    concat = concatenate([output_1, output_2])

    # Classification layer to predict similarity
    flatten = Flatten()(concat)
    x = Conv2D(512, (3, 3), activation='relu', padding='same')(concat)
    x = Dropout(0.3)(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = Dropout(0.1)(x)
    x = Flatten()(x)
    output = Dense(2, activation='sigmoid')(x)

    # Create the complete siamese model
    siamese_model = Model(inputs=[input_1, input_2], outputs=output)

    # Compile the model
    siamese_model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.000001), metrics=['accuracy'])

    # Print model summary
    siamese_model.summary()
    
    return siamese_model

# Train the siamese network

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

siamese_comparison_model = comparison_siamese_model((224, 224, 3))

history = siamese_comparison_model.fit(X_train, y_train, batch_size=64, epochs=5, validation_data=(X_valid, y_valid), callbacks=[early_stopping])

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_22 (InputLayer)           [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
input_23 (InputLayer)           [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
vgg19 (Functional)              (None, 7, 7, 512)    20024384    input_22[0][0]                   
                                                                 input_23[0][0]                   
__________________________________________________________________________________________________
concatenate_3 (Concatenate)     (None, 7, 7, 1024)   0           vgg19[0][0]                

ValueError: in user code:

    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\engine\training.py:788 train_step
        loss = self.compiled_loss(
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\engine\compile_utils.py:201 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\losses.py:141 __call__
        losses = call_fn(y_true, y_pred)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\losses.py:245 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\losses.py:1809 binary_crossentropy
        backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\keras\backend.py:5000 binary_crossentropy
        return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\ops\nn_impl.py:245 sigmoid_cross_entropy_with_logits_v2
        return sigmoid_cross_entropy_with_logits(
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\ops\nn_impl.py:132 sigmoid_cross_entropy_with_logits
        raise ValueError("logits and labels must have the same shape (%s vs %s)" %

    ValueError: logits and labels must have the same shape ((None, 2) vs (None, 1))


### Testing accuracy check

In [None]:
# Evaluate the model on the test set
test_accuracy(siamese_comparison_model)

### Plotting the accuracy metric for the validation and training datasets

In [None]:
plot_accuracy(history)

### Plotting the loss for the validation and training datasets

In [None]:
plot_loss(history)

### Plotting some of the results from the prediction on the Prediction dataset

In [None]:
#predicting_on_dataset(X_pred, siamese_comparison_model)

# Building the ranking model

In [36]:
def prepare_label_for_ranking(labels):
    # Format the labels of left and right images
    labels_formatted = []

    for label in labels:
        if label == "left":
            labels_formatted.append(0)  # 0 represents left image
        elif label == "right":
            labels_formatted.append(1)  # 1 represents right image

    labels_formatted = np.array(labels_formatted)
    labels_formatted = tf.convert_to_tensor(labels_formatted)
    # Split the data into training, validation, and test sets using array slicing
    train_size = int(0.6 * len(image1_array))
    valid_size = int(0.2 * len(image1_array))
    
    y_train = labels_formatted[:train_size]
    y_valid = labels_formatted[train_size:train_size + valid_size]
    y_test = labels_formatted[train_size + valid_size:]
    
    return y_train, y_valid, y_test

y_train, y_valid, y_test = prepare_label_for_ranking(labels)

### Guillaume's model

In [19]:
def create_ranking_network(img_size):
    """
    Create ranking network which give a score to an image.

    :param img_size: size of input images during training
    :type img_size: tuple(int)
    :return: ranking network model
    :rtype: keras.Model
    """
    # Create feature extractor from VGG19
    feature_extractor = VGG19(weights="imagenet", include_top=False, input_shape=(img_size, img_size, 3))
    # for layer in feature_extractor.layers[:-4]:
    #     layer.trainable = False

    # Add dense layers on top of the feature extractor
    inp = Input(shape=(img_size, img_size, 3), name='input_image')
    base = feature_extractor(inp)
    base = Flatten(name='Flatten')(base)

    # Block 1
    base = Dense(32, activation='relu', name='Dense_1')(base)
    base = BatchNormalization(name='BN1')(base)
    base = Dropout(0.490, name='Drop_1')(base)

    # # Block 2
    # base = Dense(128, activation='relu', name='Dense_2')(base)
    # base = BatchNormalization(name='BN2')(base)
    # base = Dropout(0.368, name='Drop_2')(base)

    # Final dense
    base = Dense(1, name="Dense_Output")(base)
    base_network = Model(inp, base, name='Scoring_model')
    return base_network


def create_meta_network(img_size, weights=None):
    """
    Create meta network which is used to to teach the ranking network.

    :param img_size: dimension of input images during training.
    :type img_size: tuple(int)
    :param weights: path to the weights use for initialization
    :type weights: str
    :return: meta network model
    :rtype: keras.Model
    """

    # Create the two input branches
    input_left = Input(shape=(img_size, img_size, 3), name='left_input')
    input_right = Input(shape=(img_size, img_size, 3), name='right_input')
    base_network = create_ranking_network(img_size)
    left_score = base_network(input_left)
    right_score = base_network(input_right)

    # Subtract scores
    diff = Subtract()([left_score, right_score])

    # Pass difference through sigmoid function.
    prob = Activation("sigmoid", name="Activation_sigmoid")(diff)
    model = Model(inputs=[input_left, input_right], outputs=prob, name="Meta_Model")

    if weights:
        print('Loading weights ...')
        model.load_weights(weights)


    sgd = SGD(lr=1e-6, decay=1e-6, momentum=0.393, nesterov=True)
    model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=['accuracy'])

    return model

meta_network = create_meta_network(224)
meta_network.summary()
meta_network.fit(X_train, y_train, batch_size=16, epochs=5, validation_data=(X_valid, y_valid), callbacks=[early_stopping])

Model: "Meta_Model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
left_input (InputLayer)         [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
right_input (InputLayer)        [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Scoring_model (Functional)      (None, 1)            20827393    left_input[0][0]                 
                                                                 right_input[0][0]                
__________________________________________________________________________________________________
subtract_4 (Subtract)           (None, 1)            0           Scoring_model[0][0]     

Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150


KeyboardInterrupt: 

### Custom Ranking model

In [41]:
def ranking_siamese_model(input_shape):
    
    base_model = VGG19(weights="imagenet", include_top=False, input_shape=input_shape)
    # Get embeddings of the images using the shared VGG19 model
    input_1 = Input(shape=input_shape)
    input_2 = Input(shape=input_shape)

    output_1 = base_model(input_1)
    output_2 = base_model(input_2)

    # Flatten the embeddings
    flatten_1 = Flatten()(output_1)
    flatten_2 = Flatten()(output_2)

    # Concatenate the flattened embeddings
    concat = concatenate([flatten_1, flatten_2])

    # Classification layers to predict similarity
    x = Dense(512, activation='relu')(concat)
    x = Dropout(0.3)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.1)(x)
    output_similarity = Dense(2, activation='softmax', name='similarity')(x)

    # Ranking sub-network
    ranking_net = Dense(32, activation='relu', name="Ranking_Dense1")(concat)
    ranking_net = BatchNormalization(name='Ranking_BN1')(ranking_net)
    ranking_net = Dense(32, activation='relu', name="Ranking_Dense2")(ranking_net)
    ranking_net = BatchNormalization(name='Ranking_BN2')(ranking_net)
    ranking_score_1 = Dense(1, name="Ranking_Final_dense_1")(ranking_net)  # Output a single ranking score
    ranking_score_2 = Dense(1, name="Ranking_Final_dense_2")(ranking_net)  # Output a single ranking score

    # Create the complete model with both similarity and ranking outputs
    siamese_model = Model(inputs=[input_1, input_2], outputs=[output_similarity, ranking_score_1, ranking_score_2])

    # Compile the model with appropriate losses and weights
    siamese_model.compile(loss=['binary_crossentropy', 'mse', 'mse'],
                          loss_weights=[1.0, 0.5, 0.5],  # Adjust the weights as needed
                          optimizer=Adam(learning_rate=0.000001),
                          metrics=['accuracy'])

    # Print model summary
    siamese_model.summary()

    return siamese_model

ranking_model = ranking_siamese_model((224, 224, 3))

y_train_encoded = tf.one_hot(y_train, depth=2)

ranking_model.fit(X_train, y_train_encoded, epochs = 5, batch_size = 4, validation_data = (X_valid, y_valid))

ResourceExhaustedError: failed to allocate memory [Op:Mul]

tf.Tensor([1 1 0 ... 1 1 1], shape=(2737,), dtype=int32)
