In [None]:
import numpy as np
import pandas as pd
from model_tools import split, show_train_history, metric_array
from model import train_ennavia_model
from sklearn.utils import shuffle
from descriptor import ennavia_encoding
import tensorflow.keras
from tensorflow.keras.models import load_model

In [None]:
# set gpu
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [None]:
# load data & encoding
AVP_array = ennavia_encoding('../data/pos_trainval_6db_2641.fasta')
gan_AVP_array = ennavia_encoding('../data/GAN_AVP_14354.fasta')
non_AVP_array = ennavia_encoding('../data/combined_16995_negativeset.fasta')

In [None]:
# real AVPs + GAN AVPs
AVP_array = np.concatenate((AVP_array,gan_AVP_array),axis=0)

In [None]:
# feature & labels (0:non_AVP, 1:AVP)
features = np.concatenate((non_AVP_array,AVP_array),axis=0)
labels = np.hstack((np.repeat(0, len(non_AVP_array)),np.repeat(1, len(AVP_array))))

In [None]:
# train validation split
train_data, val_data, train_labels, val_labels = split(features, labels , save = False, random_state = 1)

In [None]:
# shuffle
train_data, train_labels = shuffle(train_data, train_labels, random_state=0)
val_data, val_labels = shuffle(val_data, val_labels, random_state=0)

In [None]:
# model training
model, history = train_ennavia_model(train_data,train_labels,val_data,val_labels,'ennavia_aug_model', path='./')

In [None]:
show_train_history(history ,'accuracy','val_accuracy')

In [None]:
show_train_history(history ,'loss','val_loss')

In [None]:
# load best model
model = load_model('ennavia_aug_model_best_weights.h5')

In [None]:
# validate
metric_array(val_data, val_labels, model)

In [None]:
# load testing data
test_AVP_array = ennavia_encoding('../data/testing_6db_filter2012out_1_0.fasta')
test_non_AVP_array = ennavia_encoding('../data/neg_testing_uniprot_random_combined_293.fasta')

In [None]:
# feature & labels
test_features = np.concatenate((test_non_AVP_array,test_AVP_array),axis=0)
test_labels = np.hstack((np.repeat(0, len(test_non_AVP_array)),np.repeat(1, len(test_AVP_array))))

In [None]:
# testing
metric_array(test_features, test_labels, model)