In [None]:
# Imports
from pathlib import Path

import numpy as np
import pandas as pd
import random
import tensorflow as tf
#print('Tesorflow version: ' ,tf.__version__)
import matplotlib.pyplot as plt

In [None]:
# Initialization
# Hyperparmeters Model Training
epochs = 10
batch_size = 10
margin = 1

# Path
home = Path.home()
PATH_LIB = Path(home, 'repositories','MLDeepSimilarity','MLDeepSimilarity','Data','Fe-PMI.xlsx')

In [None]:
# Load PMI-Library e.g. Fe-Matrix
df= pd.read_excel(PATH_LIB)
df.rename(columns={'Material Name': 'Label'}, inplace=True, errors='raise')    # unify header dataframes
print(df.head(3))

In [None]:
# create values and label list from df
def create_lists_values_labels(df):
    df_labels = df['Label']
    labels = df_labels.values.tolist()
    df_values = df.drop(['Label'], axis=1)
    values = df_values.values.tolist()
    values = np.array(values) 
    return values, labels

In [None]:
# Buid Pairs positiv and negativ
# The third parameter: min_equals. indicate how many equal pairs, as minimun, we want in the dataset. 
# If we just created random pairs the number of equal pairs would be very small. 
def create_pairs(x, y, min_equals = 600): # counts of full data set is 684 items
    pairs = []
    labels = []
    equal_items = 0
    l = len(y)
    label = list(range(l))
    while equal_items < min_equals:
            for i in range(0, l):
                k = random.choice(label)
                if i == k:
                    equal_items += 1
                    a = x[i]
                    b = x[k]
                    pairs.append([a,b])
                    labels.append(0)
                else:
                    a = x[i]
                    b = x[k]
                    pairs.append([a,b])
                    labels.append(1)
    return np.array(pairs).astype('float'), np.array(labels).astype('float')

In [None]:
x_train_1 = np.array([])
x_train_2 = np.array([])
x_val_1 = np.array([])
x_val_2 = np.array([])
labels_train = np.array([])
labels_val = np.array([])

x,y = create_lists_values_labels(df)
pairs, labels = create_pairs(x,y,600)
count = int(len(labels)/2)

train_i1 = [] 
train_i2 = []
for i in pairs:
   train_i1.append(i[0])
   train_i2.append(i[1])
x_test_1 = np.array(train_i1[0:1])
x_test_2 = np.array(train_i1[0:1])
x_train_1 = np.array(train_i1[100:count])
x_train_2 = np.array(train_i2[100:count])
x_val_1 = np.array(train_i2[count+1::])
x_val_2 = np.array(train_i2[count+1::])
labels_test = np.array(labels[0:1])
labels_train = np.array(labels[100:count])
labels_val = np.array(labels[count+1::])

print(x_test_2)
print(x_test_1)
print(x_train_1.shape)
print(x_train_2.shape)
print(x_val_1.shape)
print(x_val_2.shape)
print(labels_train.shape)
print(labels_val.shape)

In [None]:
def euclidian_distance(vects):
    x,y = vects
    sum_square = tf.math.reduce_sum(tf.math.square(x-y),axis=1,keepdims=True)
    return tf.math.sqrt(tf.math.minimum(sum_square,tf.keras.backend.epsilon()))

In [None]:
def loss(margin=1):
    
    def contrative_loss(y_true,y_pred):
        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * square_pred + (y_true) * margin_square
        )
    return contrative_loss

In [None]:
# create Siamese Network
input = tf.keras.layers.Input(40,)
#normal_layer = tf.keras.layers.BatchNormalization()(input)
x = tf.keras.layers.Dense(40, activation='relu')(input)
x = tf.keras.layers.Dense(40, activation='relu',name='First',dtype=float)(x)
embedding_network = tf.keras.Model(input,x)

input_a = tf.keras.layers.Input(40,)
input_b = tf.keras.layers.Input(40,)

embedding_a = embedding_network(input_a)
embedding_b = embedding_network(input_b)

merge_layer = tf.keras.layers.Lambda(euclidian_distance)([embedding_a,embedding_b])
#normal_layer = tf.keras.layers.BatchNormalization()(merge_layer)
output_layer = tf.keras.layers.Dense(1,activation='sigmoid')(merge_layer)
siamese_network = tf.keras.Model(inputs=[input_a,input_b],outputs=output_layer)


In [None]:
#siamese_network.compile(loss=loss(margin=margin), optimizer='RMSprop', metrics=["accuracy"])
siamese_network.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),metrics=['accuracy'])
history = siamese_network.fit([x_train_1, x_train_2],labels_train,validation_data=([x_val_1, x_val_2],labels_val),batch_size=batch_size,epochs=epochs,)
print(siamese_network.summary())

In [None]:
def plt_metric(history, metric, title, has_valid=True):
    """Plots the given 'metric' from 'history'.

    Arguments:
        history: history attribute of History object returned from Model.fit.
        metric: Metric to plot, a string value present as key in 'history'.
        title: A string to be used as title of plot.
        has_valid: Boolean, true if valid data was passed to Model.fit else false.

    Returns:
        None.
    """
    plt.plot(history[metric])
    if has_valid:
        plt.plot(history["val_" + metric])
        plt.legend(["train", "validation"], loc="upper left")
    plt.title(title)
    plt.ylabel(metric)
    plt.xlabel("epoch")
    plt.show()

# Plot the accuracy
plt_metric(history=history.history, metric="accuracy", title="Model accuracy")

# Plot the constrastive loss
plt_metric(history=history.history, metric="loss", title="Constrastive Loss")

In [None]:
results = siamese_network.evaluate([x_test_1, x_test_2], labels_test)
print("test loss, test acc:", results)

predictions = siamese_network.predict([x_test_1,x_test_2])
print(predictions)