# ExperimentEnsembleLGNR
### Take the dataset where the negatives comprise only of enhancers.
### Uses a trained ensemble on the All dataset to evaluate on the enhancer dataset

## Imports

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

# # Credit: https://stackoverflow.com/questions/34199233/how-to-prevent-tensorflow-from-allocating-the-totality-of-a-gpu-memory
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)


In [2]:
import tensorflow as tf
import data
from tensorflow import keras
import nets
from loaders import PremadeTripletClassifierSequence
import numpy as np
from Bio import SeqIO
import TripletClassifierEnsemble
import importlib
import time
import matplotlib.pyplot as plt

## Files

In [3]:
data_dir = '../Data/Datasets/LGNR/'
model_dir = '../Data/Datasets/All/Models/'


train_sequence = f'{data_dir}/train_sequences.fa'
valid_sequence = f'{data_dir}/valid_sequences.fa'
test_sequence = f'{data_dir}/test_sequences.fa'

reverse_train_sequence = f'{data_dir}/reverse_train_sequences.fa'
reverse_valid_sequence = f'{data_dir}/reverse_valid_sequences.fa'
reverse_test_sequence = f'{data_dir}/reverse_test_sequences.fa'

train_triplet_dis = f'{data_dir}/train_triplet_dis.npy'
valid_triplet_dis = f'{data_dir}/valid_triplet_dis.npy'
test_triplet_dis = f'{data_dir}/test_triplet_dis.npy'

train_triplet_sim = f'{data_dir}/train_triplet_sim.npy'
valid_triplet_sim = f'{data_dir}/valid_triplet_sim.npy'
test_triplet_sim = f'{data_dir}/test_triplet_sim.npy'

## Parameters

In [4]:
model_count = 29
d1, d2, d3 = 4, 600, 3
max_len = 600

## Loading Data

In [5]:
train = data.FantomToOneHotConverter(train_sequence, 0, max_len).seq_matrix
reverse_train = data.FantomToOneHotConverter(reverse_train_sequence, 0, max_len).seq_matrix

In [6]:
valid = data.FantomToOneHotConverter(valid_sequence, 0, max_len).seq_matrix
reverse_valid = data.FantomToOneHotConverter(reverse_valid_sequence, 0, max_len).seq_matrix

In [7]:
test = data.FantomToOneHotConverter(test_sequence, 0, max_len).seq_matrix
reverse_test = data.FantomToOneHotConverter(reverse_test_sequence, 0, max_len).seq_matrix

In [8]:
train_seq = PremadeTripletClassifierSequence(train, train_triplet_sim, train_triplet_dis, batch_size = 1024, reverse_x_in =reverse_train)
valid_seq = PremadeTripletClassifierSequence(valid, valid_triplet_sim, valid_triplet_dis, batch_size = 1024, reverse_x_in =reverse_valid)
test_seq = PremadeTripletClassifierSequence(test, test_triplet_sim, test_triplet_dis, batch_size = 1024, reverse_x_in = reverse_test)

In [9]:
def collect_data_from_loader(a_loader):
    x_list = []
    y_list = []
    for x_batch, y_batch in a_loader:
        x_list.append(x_batch)
        y_list.append(y_batch)
    x_matrix = np.concatenate(x_list)
    y_array  = np.concatenate(y_list).reshape(-1)
    return x_matrix, y_array

In [10]:
x_train, y_train = collect_data_from_loader(train_seq)
x_valid, y_valid = collect_data_from_loader(valid_seq)
x_test, y_test = collect_data_from_loader(test_seq)

## Loading Ensemble

In [11]:
ensemble = TripletClassifierEnsemble.TripletClassifierEnsemble(model_dir, (d1, d2, d3))
ensemble.load_model_info()
ensemble.load_models(model_count)

Loading models 29/29


## Evaluating

In [14]:
with tf.device('/cpu:0'):
#     r_train, _ = ensemble.evaluate(x_train, y_train, model_count, is_merit=True, is_loaded=True)
#     print("Done with Training")
#     r_valid, _ = ensemble.evaluate(x_valid, y_valid, model_count, is_merit=True, is_loaded=True)
#     print("Done with Validation")
    r_test, _ = ensemble.evaluate(x_test, y_test, model_count, is_merit=True, is_loaded=True)
    print("Done with Testing")

Accuracy: 0.6478515625
Specificity: 0.9406470127837203
Recall:  0.356121653236288
Precision:  0.8575899843505478
F1:  0.5032601708145834
Done with Testing
