In [None]:
import numpy as np
import pandas as pd
from protein_encoding import PC_6, read_fasta
from model_tools import split, show_train_history, metric_array
from model import train_model
from sklearn.utils import shuffle
import tensorflow
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import LeakyReLU

In [None]:
# set gpu
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [None]:
# load train data
# PC6 encoding
AVP_data = PC_6('../data/pos_trainval_6db_2641.fasta', length=50)
non_AVP_data = PC_6('../data/combined_16995_negativeset.fasta', length=50)

In [None]:
# dict to np array
AVP_array= np.array(list(AVP_data.values()))
non_AVP_array = np.array(list(non_AVP_data.values()))

In [None]:
import random
non_AVP_array = random.sample(list(non_AVP_array), len(AVP_array))

In [None]:
# feature & labels (0:non_AVP, 1:AVP)
features = np.concatenate((non_AVP_array,AVP_array),axis=0)
labels = np.hstack((np.repeat(0, len(non_AVP_array)),np.repeat(1, len(AVP_array))))

In [None]:
# train validation split
train_data, val_data, train_labels, val_labels = split(features, labels , save = False, random_state = 1)

In [None]:
# shuffle
train_data, train_labels = shuffle(train_data, train_labels, random_state=0)
val_data, val_labels = shuffle(val_data, val_labels, random_state=0)

In [None]:
# model training
model, history = train_model(train_data,train_labels,val_data,val_labels,'PC_6_model_n', path='./')

In [None]:
show_train_history(history ,'accuracy','val_accuracy')

In [None]:
show_train_history(history ,'loss','val_loss')

In [None]:
# load best model
activation=tensorflow.keras.layers.LeakyReLU()
model = load_model('PC_6_model_n_best_weights.h5')

In [None]:
# validate
metric_array(val_data, val_labels, model)

In [None]:
# load testing data
test_AVP_data = PC_6('../data/testing_6db_filter2012out_1_0.fasta', length=50)
test_non_AVP_data = PC_6('../data/neg_testing_uniprot_random_combined_293.fasta', length=50)
test_AVP_array= np.array(list(test_AVP_data.values()))
test_non_AVP_array = np.array(list(test_non_AVP_data.values()))

In [None]:
# feature & labels
test_features = np.concatenate((test_non_AVP_array,test_AVP_array),axis=0)
test_labels = np.hstack((np.repeat(0, len(test_non_AVP_array)),np.repeat(1, len(test_AVP_array))))

In [None]:
# testing
metric_array(test_features, test_labels, model)