# Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import sklearn.model_selection
import random
from termcolor import colored
from tensorflow.keras.layers import Input,Lambda,Dense,Flatten,MaxPool2D,Dropout,BatchNormalization,Activation,GlobalAveragePooling2D,LeakyReLU,subtract
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam,RMSprop
import tensorflow.keras.backend as K
import tensorflow as tf
from keras.regularizers import l2
from keras.layers import concatenate
import math
from timeit import default_timer as timer
from tensorflow.python.keras.utils.vis_utils import plot_model
import pandas as pd
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.utils import shuffle
from tensorflow.keras.callbacks import TensorBoard
import time

# Data splitting 

In [None]:
data=[]

def create_data():
	for category in categories:
		path = os.path.join(DATADIR, category)
		class_num=categories.index(category)
		for img in os.listdir(path):
			try:
				img_array = cv2.imread(os.path.join(path, img))
				data.append([img_array,class_num])
			except Exception as e:
				pass
start = timer()
create_data()
dt = timer() - start
print("Data collected in %f s" % dt)



random.shuffle(data)
features=[]
labels=[]

for i,j in data:
	features.append(i)
	labels.append(j)


features=np.array(features)
labels=np.array(labels)

print(colored(f"Before reshaping: {features.shape}",color="green"))
features=np.reshape(features,((-1,182,218,3)))
print(colored(f"After reshaping: {features.shape}",color="green"))



np.save("features",features)
np.save("labels",labels)

features=np.load("features.npy")
labels=np.load("labels.npy")


data_train, data_test, target_train, target_test = sklearn.model_selection.train_test_split(features, labels,test_size=0.2)

np.save("train_data_0.5",data_train)
np.save("test_data_0.5",data_test)
np.save("train_label_0.5",target_train)
np.save("test_label_0.5",target_test)

# Loading data and normalizing 

In [None]:
with tf.device('/CPU:0'):

    X_train = np.load('//home//adwitiya123//Downloads//train_data.npy')
    X_train = X_train/255
    Y_train = np.load('//home//adwitiya123//Downloads//train_label.npy')

    X_test = np.load('//home//adwitiya123//Downloads//test_data.npy')
    X_test=X_test/255
    Y_test = np.load('//home//adwitiya123//Downloads//test_label.npy')

    np.save('X_train_normalized',X_train)
    np.save('X_test_normalized',X_test)
X_val = np.load('D:\\data\\data_first\\val_data.npy')
Y_val = np.load('D:\\data\\data_first\\val_label.npy')

print(X_train.shape)

# VGG-16 model for Feature extraction

In [None]:
with tf.device('/CPU:0'):
    
    from tensorflow.keras.applications.vgg16 import VGG16
    
    vgg = VGG16(input_tensor = Input(shape = (182, 218, 3)), include_top = False, weights = 'imagenet')
    
    for layer in vgg.layers:
        layer.trainable = False
        
    last_layer = vgg.get_layer('block5_pool')
    last_output = last_layer.output
    
    features = Flatten()(last_output)
    
    model = Model(inputs = vgg.input, outputs = features, name = 'VGG16_FeatureExtractor')
    plot_model(model, show_shapes = True, show_layer_names = True, to_file = 'feat.png')

    X_train=np.load("X_train_normalized.npy")
    train_features = model.predict(X_train)
    np.save('train_features_normalized_first.npy', train_features)
    
    X_test=np.load("X_test_normalized.npy")
    test_features = model.predict(X_test)
    np.save('test_features_normalized_first.npy', test_features)
    
    val_features = model.predict(X_val)
    np.save('val_features.npy', val_features)

# Standardize

In [None]:
test_features=np.load("test_features_normalized_first.npy")
train_features=np.load("train_features_normalized_first.npy")

scaler = StandardScaler()

X_sc_train = scaler.fit_transform(train_features)
X_sc_test = scaler.transform(test_features)

# Finding components for PCA 

In [None]:
NCOMPONENTS = 4000

pca = PCA(n_components = NCOMPONENTS)
#pca.fit(X_sc_train)

features_pca_train = pca.fit_transform(X_sc_train)
features_pca_test = pca.transform(X_sc_test)
#features_pca_val = pca.transform(X_sc_val)


# plt.plot(np.cumsum(pca.explained_variance_ratio_))
# plt.xlabel('Number of components')
# plt.ylabel('Cumulative explained variance')



# plt.rcParams["figure.figsize"] = (12,6)

# fig, ax = plt.subplots()
# xi = np.arange(1, 15001, step = 1)
# y = np.cumsum(pca.explained_variance_ratio_)

# plt.ylim(0.0,1.1)
# plt.plot(xi, y, marker='o', linestyle='--', color='b')

# plt.xlabel('Number of Components')
# plt.xticks(np.arange(0, 15000, step = 1000)) #change from 0-based array index to 1-based human-readable label
# plt.ylabel('Cumulative variance (%)')
# plt.title('The number of components needed to explain variance')

# plt.axhline(y=0.95, color='r', linestyle='-')
# plt.text(0.5, 0.85, '95% cut-off threshold', color = 'red', fontsize=16)

# ax.grid(axis='x')
# plt.show()


np.save('features_pca_train_normalized.npy', features_pca_train)
np.save('features_pca_test_normalized.npy', features_pca_test)

# Creating pairs (1s and 0s)

In [None]:
features_pca_train=np.load('features_pca_train_normalized.npy')
features_pca_test=np.load('features_pca_test_normalized.npy')
Y_train = np.load('//home//adwitiya123//Downloads//train_label.npy')
Y_test = np.load('//home//adwitiya123//Downloads//test_label.npy')
Y_train = Y_train.astype('float32')
Y_test = Y_test.astype('float32')

def create_pairs(x, digit_indices):
  pairs = []
  labels = []
   
  n=min([len(digit_indices[d]) for d in range(4)]) -1
   
  for d in range(4):
    for i in range(n):
      z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
      pairs += [[x[z1], x[z2]]]
      inc = random.randrange(1, 4)
      dn = (d + inc) % 4
      z1, z2 = digit_indices[d][i], digit_indices[dn][i]
      pairs += [[x[z1], x[z2]]]
      labels += [1,0]
  return np.array(pairs), np.array(labels) 

digit_indices = [np.where(Y_train == i)[0] for i in range(4)]
features_train_pca_pairs, features_train_pca_y = create_pairs(features_pca_train, digit_indices) 

digit_indices = [np.where(Y_test == i)[0] for i in range(4)]
features_test_pca_pairs, features_test_pca_y = create_pairs(features_pca_test, digit_indices) 

# Loading dataset for Training

In [None]:
pairTrain=np.load("features_pca_train_pairs_normalized.npy")
labelTrain=np.load("features_pca_train_y_norm.npy")
labelTrain=labelTrain.astype('float32')

pairTest=np.load("features_pca_test_pairs_normalized.npy")
labelTest=np.load("features_pca_test_y_norm.npy")
labelTest=labelTest.astype('float32')

# Dimension check

In [None]:
print(f"pair train:{pairTrain.shape}")
print(f"labeltrain:{labelTrain.shape}")