In [None]:
import pandas as pd
import os
from PIL import Image
import numpy as np
import glob
from tqdm.auto import tqdm
import cv2
import re
import random
import seaborn as sn
import matplotlib.pyplot as plt

from PrepSteps.ReSize import FindSize ,SizeScaler
from PrepSteps.OneShotAugment import FindMax , AugmentData
from PrepSteps.PairGen import make_pairs
from PrepSteps.PrepareData import PrepareData
from LoadModels.SiameseNet import SiameseNet


from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, roc_curve, auc, classification_report
 

from tensorflow.keras.layers import Activation,Input,Lambda, Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
import tensorflow as tf

In [None]:
# DIRECTORIES

data_path = r'DIRECTORY OF THE DATASET'

# NEW DIRECTORY
rem = len(data_path) - len(data_path.split('\\')[-1]) - 1
new_path = data_path[:rem]

_____________________________________________

##### Preparation of the dataset

In [None]:
## The least acceptable images in each category
thrush = 42
## The final size of the images
rescaler = (100,100)


#### All the preparation steps in a sequence
PrepareData(data_path,image_size = rescaler, thrush,left_overs=True)

###### If you do not need all the preparation steps uncomment bellow

### Siamese Neural Network

###### load the dataset

In [None]:
def counter(data, location):
    
    '''Creates a dataframe with the location and
    the category + sub-category information'''
    
    df = pd.DataFrame(data)
    df.columns = ['Path']
    df['Category'] = df['Sub-Category'] = ''

    ## Split in categories and sub-categories
    for i in range(len(df)):

        tmp_categories = df['Path'][i].split(location)[1].split("\\")[1:3]
        df['Category'].iat[i] = tmp_categories[0]
        df['Sub-Category'].iat[i] = tmp_categories[1]
    
    return df


def DataLoad(location): #, size
    
    # find the size of our dataset
    data = glob.glob(location+'\\***\\**\\*.png')
    
    # create the dataframe with the categories and shuffle the data
    info = counter(data,location)
#     info = shuffle(info)
    
    # define the size of train and test 
#     x_size = int(len(info) * (1 - split_size))
#     y_size = 1 - x_size
    
    # Encode the Labels of the dataset
    le = preprocessing.LabelEncoder()
    
    info['Label'] = le.fit_transform(list(info['Sub-Category'])) 
#     print(info['Label'],info['Sub-Category'])
    ## Labels to array
    labels = np.array(info['Label'])
    
    images = []
    # Append the images to X
    for i in info['Path']:
        image = cv2.imread(i)
        images.append(image)
    # Change the list to numpy array
    images = np.asarray(images) 
    
    return images, labels, info

In [None]:
x, y ,dataset= DataLoad(PATH)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

#### Loads the Siamese Neural Network

In [None]:
siamese_net = SiameseNet((100,100,3))

# create the pairs: split_size, num_pairs
pairs_train, labels_train , pairs_test, labels_test= make_pairs(x,y,.3, 20)

siamese_net.summary()

### Run the model

In [None]:
## create folder to save weights for later use
if not os.path.exists('weights'):
    os.makedirs('weights'

                
# monitoring = [tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy', patience=20)]

siamese = siamese_net.fit([pairs_train[:,0].reshape(-1,100,100,3),pairs_train[:,1].reshape(-1,100,100,3)], labels_train[:],
        batch_size=100,
        epochs=100,
        verbose=1,
        validation_data=([pairs_test[:,0].reshape(-1,100,100,3),pairs_test[:,1].reshape(-1,100,100,3)], labels_test[:]))
        #callbacks= monitoring)
                
                
## Save the weights in the folder
siamese_net.save_weights('weights/model_20Shot.h5')

### Plot the accuracy and loss of the model

In [None]:
### PLot of loss and accuracy
acc = siamese.history['acc']
val_acc = siamese.history['val_acc']
loss = siamese.history['loss']
val_loss = siamese.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

_________________________________________________

## Test the model

In [None]:
# generate the pairs for the test, similar to the training data
pairs_test, labels_test , your, no= make_pairs(x_test,y_test,0.5,20)

##### If your GPU is exchausted uncomment bellow to keep a small sample

In [None]:
### Cast the input data for the testing
left = K.cast(pairs_test[:,0],dtype=float)
right = K.cast(pairs_test[:,1],dtype=float)

### predict on the model
pred = siamese_net.predict([left, right],steps=1)


def compute_accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    pred = y_pred.ravel()>.5
    return np.mean(pred == y_true)

print("The accuarcy of the model is",round(compute_accuracy(targets_test,pred)*100,2),"%")

### ROC Curve

In [None]:

fpr, tpr, thresholds = roc_curve(targets_test, pred)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=1, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.show()

### Plot the confusion matrix 

In [None]:
pred_fixed = pred.ravel()>.5
cmf= (confusion_matrix(targets_test,pred_fixed))

# df_cm = pd.DataFrame(array, range(6), range(6))
plt.figure(figsize=(10,7))
sn.set(font_scale=1.4) # for label size
sn.heatmap(cmf, annot=True,fmt='d', cmap="YlGnBu", annot_kws={"size": 16}) # font size

plt.show()

### Precision and Recall

In [None]:
print(classification_report(targets_test, pred_fixed))