In [None]:
import numpy as np

from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.preprocessing.image import load_img, img_to_array
from keras.layers import *


from sklearn.decomposition import PCA, KernelPCA
from sklearn.preprocessing import StandardScaler
from pickle import dump, load

from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score
from sklearn import neighbors

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Input, ReLU
from keras.layers.embeddings import Embedding
from keras.layers import Conv1D, MaxPooling1D, Add, ZeroPadding1D
from keras import backend as K
from keras.optimizers import SGD
from keras import regularizers
from keras.wrappers.scikit_learn import KerasClassifier


from imblearn.over_sampling import RandomOverSampler
from keras.utils import to_categorical, plot_model


from keras.models import load_model
from keras.utils.generic_utils import get_custom_objects
from pandas import read_csv, DataFrame

from sklearn.model_selection import StratifiedKFold
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

#70 minutes for creation of feature images

# Seed value (can actually be different for each attribution step)
seed_value= 0

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
#import numpy as np
np.random.seed(seed_value)

# 4. Set `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value) # tensorflow 2.x
# tf.set_random_seed(seed_value) # tensorflow 1.x



#swish activation function
class Swish(Activation):
    
    def __init__(self, activation, **kwargs):
        super(Swish, self).__init__(activation, **kwargs)
        self.__name__ = 'swish'

def swish (x, beta =2.5):
    return(x*K.sigmoid(beta*x))
get_custom_objects().update({'swish':Swish(swish)})

#for setting name in the proper format to fetch image
def set_name(name):
	name=str(name)
	if len(name) ==1:
		name = '0000' + name
	if len(name) == 2:
		name = '000' +name
	if len(name) == 3:
		name ='00' + name
	if len(name) == 4:
		name = '0' + name
	return name

#fetch numbers of triplets in training set
def triplet_extraction(filename = 'train_triplets.txt'):
	triplets = []
	print(triplets)
	with open(filename) as reader:
		for line in reader:
			first, second, third = (item.strip() for item in line.split(' ', 2))
			triplets.append([int(first), int(second), int(third)])

	return np.array(triplets)

#training set is set up so all predictions are =1, so we create 0s by swapping positions of half of the sample 
def switch_half(X):
	print('switching half of data')
	size = 0.5
	Y = np.ones((X.shape[0],))
	index = rng.random(X.shape[0]) < size
	container = X[index,1]
	X[index,1] = X[index,2]
	X[index,2] = container

	Y[index] = 0
	return X, Y


def get_vectors(X):
	print('getting vectors')
	final_X = []
	features = load(open('features_std.pkl', 'rb'))
	for i in range(0, X.shape[0]):
		final_X.append([features[X[i,k]] for k in  range (0,3)])
	return np.array(final_X)

#extract images, preprocess them and dump them into file "features_std.pkl"
def image_extraction():

	#load model
	vgg_model =VGG16()
	#remove the output layer
	vgg_model.layers.pop()
	vgg_model = Model(inputs = vgg_model.inputs, outputs = vgg_model.layers[-1].output)
	#begin with feature extraction
	features = []
	for i in range(0, 10000):


		print(i)
		image = load_img('food/%s.jpg' % set_name(i), target_size =(224,224))

		image = img_to_array(image)
		image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
		image = preprocess_input(image)

		feature = vgg_model.predict(image)
		feature = feature.reshape((1000,)) #originally 4096
		features.append(feature)
	features = np.array(features)
	dump(features, open('features.pkl', 'wb'))

	features = load(open('features.pkl', 'rb'))
	scaler = StandardScaler().fit(features)
	features = scaler.transform(features)
	dump(features, open('features_std.pkl', 'wb'))

rng = np.random.default_rng(1)

#image_extraction() #can be left out as commentary if the program has already run 1 time at least.
X = triplet_extraction() #write the training triplets to X
X_test_final = triplet_extraction('test_triplets.txt') #write test triplets to X_test_final
X, y = switch_half(X)
X_test_final = get_vectors(X_test_final)# write the vectors of the test set

active = np.zeros([59544,1])
for i in range (0,30):
    rng = np.random.default_rng(i)
    random.seed(i)
    np.random.seed(i)
    tf.random.set_seed(i)
    mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode = 'max', verbose = 1, save_best_only = True) # for choosing the best model

    cvscores = []


    train_size = 0.8
    index = rng.random(X.shape[0]) < train_size
    
    X_train = X[index]
    X_test = X[~index]

    y_train = y[index]
    y_test = y[~index]


    X_train, X_test = get_vectors(X_train), get_vectors (X_test)

    visible = Input(shape=(3, 1000)) #originally 3,4096

    #dense = Dense(512)(visible) #i added this
    #bat = BatchNormalization()(dense)
    #drop = Dropout(0.2)(bat)#i added this
    #conv1 = Conv1D(32, kernel_size=1, dilation_rate =1, strides=1, activation='relu', kernel_regularizer=regularizers.l2(0.02))(drop) 
    #conv2 = Conv1D(32,kernel_size=1, dilation_rate = 1, strides=1,activation='relu', kernel_regularizer=regularizers.l2(0.02))(conv1)

    #bat1 = BatchNormalization()(conv2)

    #zero1 = ZeroPadding1D(padding=1)(bat1)

    #flat = Flatten()(zero1)
    #hidden1 = Dense(32, activation='relu')(flat) 
    #output = Dense(1, activation='sigmoid')(hidden1)
    flat = Flatten()(visible)
    dense1 = Dense(512)(flat)
    bat1 = BatchNormalization()(dense1)
    drop1 = Dropout(0.35)(bat1)
    dense2 = Dense(512,activation = 'relu')(drop1)
    dense3 = Dense(256)(dense2)
    bat2 = BatchNormalization()(dense3)
    drop2 = Dropout(0.2)(bat2)
    dense4 = Dense(256, activation = 'relu')(drop2)
    output = Dense(1, activation = 'sigmoid')(dense4)
    

   

    model =Model(inputs = visible, outputs = output)

    opt = SGD(lr=0.001, momentum=0.9)
    model.compile( optimizer = 'adam',loss ='binary_crossentropy',  metrics =['accuracy'])
 

    history = model.fit(X_train, y_train, epochs = 40, batch_size = 200, validation_data =(X_test, y_test), callbacks =[mc]) # og epochs =40,batch =200
    saved_model =load_model('best_model.h5', custom_objects = {'Swish': Swish(swish)})
    scores = saved_model.evaluate(X_test, y_test)
    
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

    cvscores.append(scores[1] * 100)

    print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

    model.summary()
  
    active = np.add(active,np.array(saved_model.predict(X_test_final)).reshape(59544,1))

active = active/30



ones = np.where(active > 0.5)
df = DataFrame(active)
df.loc[:] = 0
df.loc[ones] = 1
df.to_csv('prediction_task4_16.csv', index=False, header = False)