In [1]:
#!/usr/bin/env python
#This script is used to train the model
import click as ck
import numpy as np
import pandas as pd
import tensorflow as tf
import logging
import math
from collections import deque

from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Dense, Embedding, Conv1D, Flatten, Concatenate,
    MaxPooling1D, Dropout, Maximum, Layer,LSTM, Dense,TimeDistributed,experimental
)

# from tensorflow.keras.layers.experimental import (RandomFourierFeatures)
# from tensorflow.keras.layers import (
#     Input, Dense, Embedding, Conv1D, Flatten, Concatenate,
#     MaxPooling1D, Dropout,LSTM, Dense,TimeDistributed,
# )
from tensorflow.keras import backend as K
from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
from tensorflow.python.ops import math_ops
from tensorflow.keras import regularizers

from sklearn.metrics import roc_curve, auc, matthews_corrcoef
from aminoacids import MAXLEN, to_onehot
from utils import Ontology, FUNC_DICT, is_exp_code

from kerastuner.tuners import RandomSearch
from kerastuner import HyperModel

logging.basicConfig(level=logging.DEBUG)

print("GPU Available: ", tf.test.is_gpu_available())




C:\Users\Mohamed\Anaconda_3_v_1\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
C:\Users\Mohamed\Anaconda_3_v_1\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
  stacklevel=1)


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


GPU Available:  True


In [2]:
class HPOLayer(Layer):

    def __init__(self, nb_classes, **kwargs):
        self.nb_classes = nb_classes
        self.hpo_matrix = np.zeros((nb_classes, nb_classes), dtype=np.float32)
        super(HPOLayer, self).__init__(**kwargs)

    def set_hpo_matrix(self, hpo_matrix):
        self.hpo_matrix = hpo_matrix

    def get_config(self):
        config = super(HPOLayer, self).get_config()
        config['nb_classes'] = self.nb_classes
        return config
    
    def build(self, input_shape):
        assert input_shape[1] == self.nb_classes
        self.kernel = K.variable(
            self.hpo_matrix, name='{}_kernel'.format(self.name))
        self.non_trainable_weights.append(self.kernel)
        super(HPOLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        x = tf.keras.backend.repeat(x, self.nb_classes)
        return tf.math.multiply(x, self.kernel)

    def compute_output_shape(self, input_shape):
        return [input_shape[0], self.nb_classes, self.nb_classes] 


In [3]:

import tensorflow as tf; print(tf.__version__)

2.1.0-rc2


In [4]:
# import keras; print(keras.__version__)

In [14]:

def create_model(params, hpo_matrix):
    inp = Input(shape=params['input_shape'], dtype=np.float32)
    # Load flat model
    flat_model = load_model(params['model_file'] + '_flat.h5')
    
    net = flat_model(inp)
    
    hpo_layer = HPOLayer(params['nb_classes'])


    hpo_layer.trainable = False
    
    hpo_layer.set_hpo_matrix(hpo_matrix)
    
    net = hpo_layer(net)
    
    net = (Conv1D(filters=1,kernel_size=1,padding='valid',kernel_initializer= 'glorot_normal'))(net)
    
    net=MaxPooling1D(pool_size=1)(net)
    
    net=LSTM(1, activation="tanh")(net)
    
#     net = MaxPooling1D(pool_size=params['nb_classes'])(net) 
    net= Flatten()(net)
    
#     output = Flatten()(net)
#     output=RandomFourierFeatures( params['nb_classes'],
                                              
#                                               kernel_initializer='laplacian', 
                                              
#                                               scale=None,
                                              
#                                               trainable=False, name='Output_SVM')(net)

    output=Dense(params['nb_classes'], activation='sigmoid', name='dense_out',kernel_regularizer=regularizers.l2(0.0001))(net)
    
#     output=Dense(20, activation='sigmoid', name='dense_out')(net)

    model = Model(inputs=inp, outputs=output)
    
    model.summary()
    
#     model.compile( optimizer=Adam(lr=params['learning_rate']),   
    model.compile(
        optimizer=Adam(lr=params['learning_rate']),
        metrics=['accuracy'],
        loss=params['loss'])    
    logging.info('Compilation finished')

    return model



In [15]:
def compute_roc(labels, preds):
    # Compute ROC curve and ROC area for each class
    fpr, tpr, _ = roc_curve(labels.flatten(), preds.flatten())
    roc_auc = auc(fpr, tpr)

    return roc_auc

def load_data(data_file, terms, fold=1):
    df = pd.read_pickle(data_file)
    # Split train/valid
    n = len(df)
    index = np.arange(n)
    np.random.seed(seed=10)
    np.random.shuffle(index)
    index = list(index)
    train_index = []
    test_index = []

    # All Swissprot proteins
    train_n = int(n * 0.9)
    train_df = df.iloc[index[:train_n]]
    valid_df = df.iloc[index[train_n:]]
    test_df=df.iloc[index[train_n:]]

    print(len(df), len(train_df), len(valid_df), len(test_df))
    return train_df, valid_df, test_df
    

class MyHyperModel(HyperModel):

    def __init__(self, params):
        self.params = params

    def build(self, hp):
        inp = Input(shape=self.params['input_shape'], dtype=np.float32)
        net = inp
        for i in range(self.params['nb_layers']):
            net = Dense(
                units=hp.Int(
                    'units', min_value=250, max_value=2000, step=250),
                name=f'dense_{i}', activation='relu')(net)
            net = Dropout(hp.Choice('rate', values=[0.3, 0.5]))(net)
        output = Dense(
            self.params['nb_classes'], activation='sigmoid',
            name='dense_out')(net)

        model = Model(inputs=inp, outputs=output)
        model.summary()
        model.compile(
            optimizer=Adam(
                hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
            loss=self.params['loss'])
        return model


def get_hpo_matrix(hpo, terms_dict):
    nb_classes = len(terms_dict)
    res = np.zeros((nb_classes, nb_classes), dtype=np.float32)
    for hp_id, i in terms_dict.items():
        subs = hpo.get_term_set(hp_id)
        res[i, i] = 1
        for h_id in subs:
            if h_id in terms_dict:
                res[i, terms_dict[h_id]] = 1
    return res


def create_flat_model(params):
    inp = Input(shape=params['input_shape'], dtype=np.float32)

    net = inp
    for i in range(params['nb_layers']):
        net = Dense(
            units=params['units'], name=f'dense_{i}', activation='relu')(net)
        net = Dropout(rate=params['rate'])(net)
    net = Dense(
        params['nb_classes'], activation='sigmoid',
        name='dense_out')(net)
    output = Flatten()(net)
    model = Model(inputs=inp, outputs=output)
    model.summary()
    model.compile(
        optimizer=Adam(lr=params['learning_rate']),
        loss=params['loss'])
    logging.info('Compilation finished')

    return model




class DFGenerator(Sequence):                                                                                                               
                                                                                                                                         
    def __init__(self, df, gos_dict, terms_dict, batch_size):
        self.start = 0
        self.size = len(df)
        self.df = df
        self.batch_size = batch_size
        self.terms_dict = terms_dict
        self.gos_dict = gos_dict
    def reset(self):
        self.start = 0                                                                                                                                           
    def __len__(self):                                                                                                                   
        return np.ceil(len(self.df) / float(self.batch_size)).astype(np.int32)                                                           
                                                                                                                                      
    def __getitem__(self, idx):                                                                                                          
        batch_index = np.arange(                                                                                                         
            idx * self.batch_size, min(self.size, (idx + 1) * self.batch_size))                                                          
        df = self.df.iloc[batch_index]                                                                                                   
        data_seq = np.zeros((len(df), MAXLEN, 21), dtype=np.float32)
        data_gos = np.zeros((len(df), len(self.gos_dict)), dtype=np.float32)
        labels = np.zeros((len(df), len(self.terms_dict)), dtype=np.int32)
        for i, row in enumerate(df.itertuples()):
            seq = row.sequences
            data_seq[i, :] = to_onehot(seq)
            
            for item in row.deepgo_annotations:
                t_id, score = item.split('|')
                if t_id in self.gos_dict:
                    data_gos[i, self.gos_dict[t_id]] = float(score)

            for t_id in row.iea_annotations:
                if t_id in self.gos_dict:
                    data_gos[i, self.gos_dict[t_id]] = 1

            for t_id in row.go_annotations:
                if t_id in self.gos_dict:
                    data_gos[i, self.gos_dict[t_id]] = 1
                
            for t_id in row.hp_annotations:
                if t_id in self.terms_dict:
                    labels[i, self.terms_dict[t_id]] = 1
        return (data_gos, labels)
    


In [18]:
def main(hp_file, data_file, terms_file, gos_file, model_file,out_file, fold, batch_size, epochs, load, 
         logger_file, threshold,device):
    gos_df = pd.read_pickle(gos_file)
    gos = gos_df['gos'].values.flatten()
    gos_dict = {v: i for i, v in enumerate(gos)}

    # cross validation settings
    # model_file = f'fold{fold}_' + model_file
    # out_file = f'fold{fold}_' + out_file
    params = {
        'nb_classes': 3783,
        'input_shape': (len(gos),),
#         'input_shape': (20,),

        'nb_layers': 1,
        'loss': 'binary_crossentropy',
#         'loss': 'Hinge',        

        'rate': 0.3,
        'learning_rate': 0.001,
        'units': 1500, # 750
        'model_file': model_file
    }
    
    print('Params:', params)
    global hpo
    hpo = Ontology(hp_file, with_rels=True)
    terms_df = pd.read_pickle(terms_file)
    global terms
    terms = terms_df['terms'].values.flatten()
    print('Phenotypes', len(terms))
    global term_set
    term_set = set(terms)
    train_df, valid_df, test_df = load_data(data_file, terms, fold)
    terms_dict = {v: i for i, v in enumerate(terms)}
    hpo_matrix = get_hpo_matrix(hpo, terms_dict)
    nb_classes = len(terms)
    print ('nb_classes=len(terms)_______________________ ',len(terms))
    params['nb_classes'] = nb_classes
    print('len(terms_dict)_______________________ ',len(terms_dict))
    test_steps = int(math.ceil(len(test_df) / batch_size))
    test_generator = DFGenerator(test_df, gos_dict, terms_dict,batch_size)
    valid_steps = int(math.ceil(len(valid_df) / batch_size))
    train_steps = int(math.ceil(len(train_df) / batch_size))
    print ( 'test_steps',test_steps,'valid_steps',valid_steps,'train_steps',train_steps)
    test_steps=5
    valid_steps=5
    train_steps=5
    xy_generator = DFGenerator(train_df, gos_dict, terms_dict,len(train_df))
    x, y = xy_generator[0]
    val_generator = DFGenerator(valid_df, gos_dict, terms_dict,len(valid_df))
    val_x, val_y = val_generator[0]

    train_generator = DFGenerator(train_df, gos_dict, terms_dict,batch_size)
    valid_generator = DFGenerator(valid_df, gos_dict, terms_dict,batch_size)
    
    with tf.device(device):
        if load:
            print('Loading pretrained model')
            model = load_model(model_file, custom_objects={'HPOLayer': HPOLayer})
            flat_model = load_model(model_file + '_flat.h5')
        else:
            
            print('Creating a new model')
            flat_model = MyHyperModel(params)
            # flat_model = create_flat_model(params)

            print("Training data size: %d" % len(train_df))
            print("Validation data size: %d" % len(valid_df))
            checkpointer = ModelCheckpoint( filepath=model_file + '_flat.h5', verbose=1, save_best_only=True)
            
            earlystopper = EarlyStopping(monitor='val_loss', patience=2, verbose=1)
            logger = CSVLogger(logger_file)


            tuner = RandomSearch( flat_model, objective='val_loss',max_trials=1,directory='data_new',project_name='dl_ml')
            
            tuner.search( x, y, epochs=2, validation_data=(val_x, val_y), )
            
            tuner.results_summary()
            
            logging.info('Loading best model')
            
            flat_model = tuner.get_best_models(num_models=1)[0]
            flat_model.summary()
            
            loss = flat_model.evaluate(val_x, val_y)
            print('Valid loss %f' % loss)
            flat_model.save(model_file + '_flat.h5')

            model = create_model(params, hpo_matrix)


            checkpointer = ModelCheckpoint( filepath=model_file, verbose=1, save_best_only=True)
            model.summary()
            print('Starting training the flat model')
            model.fit(
                train_generator,
                steps_per_epoch=train_steps,
                epochs=epochs,
                validation_data=valid_generator,
                validation_steps=valid_steps,
                max_queue_size=batch_size,
                workers=1,
                callbacks=[logger, checkpointer, earlystopper])

            logging.info('Loading best model')
            model = load_model(model_file, custom_objects={'HPOLayer': HPOLayer})
            flat_model = load_model(model_file + '_flat.h5')
            
        logging.info('Evaluating model')
        loss = flat_model.evaluate(test_generator, steps=test_steps)
#         print('Flat Test loss %f' % loss)
        loss = model.evaluate(test_generator, steps=test_steps)
#         print('Test loss %f' % loss)

        logging.info('Predicting')
        #-----------------------------------------------------
        test_generator.reset()
        #3944
        test_steps = int(math.ceil(len(test_df) / batch_size))
        preds = model.predict(test_generator, steps=test_steps)
        flat_preds = flat_model.predict(test_generator, steps=test_steps)

        all_terms_df = pd.read_pickle(terms_file)
        all_terms = all_terms_df['terms'].values
        all_terms_dict = {v:k for k,v in enumerate(all_terms)}
        all_labels = np.zeros((len(test_df), len(all_terms)), dtype=np.int32)
        for i, row in enumerate(test_df.itertuples()):
            for hp_id in row.hp_annotations:
                if hp_id in all_terms_dict:
                    all_labels[i, all_terms_dict[hp_id]] = 1
        
        all_preds = np.zeros((len(test_df), len(all_terms)), dtype=np.float32)
        all_flat_preds = np.zeros((len(test_df), len(all_terms)), dtype=np.float32)

        test_df['preds'] = list(preds)
#         print(test_df)
        logging.info('Saving predictions')
        test_df.to_pickle(out_file)

        test_df['preds'] = list(flat_preds)
        test_df.to_pickle(out_file + '_flat.pkl')



In [19]:
# main(hp_file, data_file, terms_file, gos_file, model_file,out_file, fold, batch_size, epochs, load, logger_file, threshold,device):

main('data/hp.obo',
     
     'data/My_Implementations/human.pkl',
     
     'data/My_Implementations/terms.pkl',
     
     'data/My_Implementations/gos.pkl',
     
     'data/My_Implementations/Trial_2/model_mohamed.h5',
     
     'data/My_Implementations/Trial_2/predictions.pkl',
     
     1,
     
     1,
     
     1,
     
     False,
     
     'data/My_Implementations/Trial_2/training.csv',
     
     0.5,
     
     'CPU:0')

Params: {'nb_classes': 3783, 'input_shape': (24274,), 'nb_layers': 1, 'loss': 'binary_crossentropy', 'rate': 0.3, 'learning_rate': 0.001, 'units': 1500, 'model_file': 'data/My_Implementations/Trial_2/model_mohamed.h5'}
Phenotypes 8693
3933 3539 394 394
nb_classes=len(terms)_______________________  8693
len(terms_dict)_______________________  8693
test_steps 394 valid_steps 394 train_steps 3539
Creating a new model
Training data size: 3539
Validation data size: 394
INFO:tensorflow:Reloading Oracle from data_new\dl_ml\oracle.json


INFO:tensorflow:Reloading Oracle from data_new\dl_ml\oracle.json


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 24274)]           0         
_________________________________________________________________
dense_0 (Dense)              (None, 250)               6068750   
_________________________________________________________________
dropout (Dropout)            (None, 250)               0         
_________________________________________________________________
dense_out (Dense)            (None, 8693)              2181943   
Total params: 8,250,693
Trainable params: 8,250,693
Non-trainable params: 0
_________________________________________________________________
INFO:tensorflow:Reloading Tuner from data_new\dl_ml\tuner0.json


INFO:tensorflow:Reloading Tuner from data_new\dl_ml\tuner0.json


INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


INFO:root:Loading best model


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 24274)]           0         
_________________________________________________________________
dense_0 (Dense)              (None, 1000)              24275000  
_________________________________________________________________
dropout (Dropout)            (None, 1000)              0         
_________________________________________________________________
dense_out (Dense)            (None, 8693)              8701693   
Total params: 32,976,693
Trainable params: 32,976,693
Non-trainable params: 0
_________________________________________________________________
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 24274)]           0         
__________________________________



Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 24274)]           0         
_________________________________________________________________
model (Model)                (None, 8693)              32976693  
_________________________________________________________________
hpo_layer (HPOLayer)         (None, 8693, 8693)        75568249  
_________________________________________________________________
conv1d (Conv1D)              (None, 8693, 1)           8694      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 8693, 1)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 1)                 12        
_________________________________________________________________
flatten (Flatten)            (None, 1)                 0   

INFO:root:Compilation finished


Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 24274)]           0         
_________________________________________________________________
model (Model)                (None, 8693)              32976693  
_________________________________________________________________
hpo_layer (HPOLayer)         (None, 8693, 8693)        75568249  
_________________________________________________________________
conv1d (Conv1D)              (None, 8693, 1)           8694      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 8693, 1)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 1)                 12        
_________________________________________________________________
flatten (Flatten)            (None, 1)                 0   

  ...
    to  
  ['...']


  ...
    to  
  ['...']


  ...
    to  
  ['...']


Train for 5 steps, validate for 5 steps
Epoch 00001: val_loss improved from inf to 0.69101, saving model to data/My_Implementations/Trial_2/model_mohamed.h5


INFO:root:Loading best model




INFO:root:Evaluating model


  ...
    to  
  ['...']


  ...
    to  
  ['...']


  ...
    to  
  ['...']


  ...
    to  
  ['...']




INFO:root:Predicting
INFO:root:Saving predictions


# Implementing the HPO predictor without Flat Model

In [20]:
class HPOLayer(Layer):

    def __init__(self, nb_classes, **kwargs):
        self.nb_classes = nb_classes
        self.hpo_matrix = np.zeros((nb_classes, nb_classes), dtype=np.float32)
        super(HPOLayer, self).__init__(**kwargs)

    def set_hpo_matrix(self, hpo_matrix):
        self.hpo_matrix = hpo_matrix

    def get_config(self):
        config = super(HPOLayer, self).get_config()
        config['nb_classes'] = self.nb_classes
        return config
    
    def build(self, input_shape):
        assert input_shape[1] == self.nb_classes
        self.kernel = K.variable(
            self.hpo_matrix, name='{}_kernel'.format(self.name))
        self.non_trainable_weights.append(self.kernel)
        super(HPOLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        x = tf.keras.backend.repeat(x, self.nb_classes)
        return tf.math.multiply(x, self.kernel)

    def compute_output_shape(self, input_shape):
        return [input_shape[0], self.nb_classes, self.nb_classes] 


In [33]:

def create_model(params, hpo_matrix):
    inp = Input(shape=(8693, 8693), dtype=np.float32)
    # Load flat model
#     flat_model = load_model(params['model_file'] + '_flat.h5')
    
#     net = flat_model(inp)
    
#     hpo_layer = HPOLayer(params['nb_classes'])


#     hpo_layer.trainable = False
    
#     hpo_layer.set_hpo_matrix(hpo_matrix)
    
#     net = hpo_layer(inp)
    
#     net = (Conv1D(filters=1,kernel_size=1,padding='valid',kernel_initializer= 'glorot_normal'))(net)
    net = (Conv1D(filters=1,kernel_size=1,padding='valid',kernel_initializer= 'glorot_normal'))(inp)

    net=MaxPooling1D(pool_size=1)(net)
    
    net=LSTM(1, activation="tanh")(net)
    
#     net = MaxPooling1D(pool_size=params['nb_classes'])(net) 
    net= Flatten()(net)
    
#     output = Flatten()(net)
#     output=RandomFourierFeatures( params['nb_classes'],
                                              
#                                               kernel_initializer='laplacian', 
                                              
#                                               scale=None,
                                              
#                                               trainable=False, name='Output_SVM')(net)

    output=Dense(params['nb_classes'], activation='sigmoid', name='dense_out',kernel_regularizer=regularizers.l2(0.0001))(net)
    
#     output=Dense(20, activation='sigmoid', name='dense_out')(net)

    model = Model(inputs=inp, outputs=output)
    
    model.summary()
    
#     model.compile( optimizer=Adam(lr=params['learning_rate']),   
    model.compile(
        optimizer=Adam(lr=params['learning_rate']),
        metrics=['accuracy'],
        loss=params['loss'])    
    logging.info('Compilation finished')

    return model



In [40]:
def main(hp_file, data_file, terms_file, gos_file, model_file,out_file, fold, batch_size, epochs, load, 
         logger_file, threshold,device):
    gos_df = pd.read_pickle(gos_file)
    gos = gos_df['gos'].values.flatten()
    gos_dict = {v: i for i, v in enumerate(gos)}

    # cross validation settings
    # model_file = f'fold{fold}_' + model_file
    # out_file = f'fold{fold}_' + out_file
    params = {
        'nb_classes': 3783,
        'input_shape': (len(gos),),
#         'input_shape': (20,),

        'nb_layers': 1,
        'loss': 'binary_crossentropy',
#         'loss': 'Hinge',        

        'rate': 0.3,
        'learning_rate': 0.001,
        'units': 1500, # 750
        'model_file': model_file
    }
    
    print('Params:', params)
    global hpo
    hpo = Ontology(hp_file, with_rels=True)
    terms_df = pd.read_pickle(terms_file)
    global terms
    terms = terms_df['terms'].values.flatten()
    print('Phenotypes', len(terms))
    global term_set
    term_set = set(terms)
    train_df, valid_df, test_df = load_data(data_file, terms, fold)
    terms_dict = {v: i for i, v in enumerate(terms)}
    hpo_matrix = get_hpo_matrix(hpo, terms_dict)
    nb_classes = len(terms)
    print ('nb_classes=len(terms)_______________________ ',len(terms))
    params['nb_classes'] = nb_classes
    print('len(terms_dict)_______________________ ',len(terms_dict))
    test_steps = int(math.ceil(len(test_df) / batch_size))
    test_generator = DFGenerator(test_df, gos_dict, terms_dict,batch_size)
    valid_steps = int(math.ceil(len(valid_df) / batch_size))
    train_steps = int(math.ceil(len(train_df) / batch_size))
    print ( 'test_steps',test_steps,'valid_steps',valid_steps,'train_steps',train_steps)
    test_steps=5
    valid_steps=5
    train_steps=5
    xy_generator = DFGenerator(train_df, gos_dict, terms_dict,len(train_df))
    x, y = xy_generator[0]
    val_generator = DFGenerator(valid_df, gos_dict, terms_dict,len(valid_df))
    val_x, val_y = val_generator[0]

    train_generator = DFGenerator(train_df, gos_dict, terms_dict,batch_size)
    valid_generator = DFGenerator(valid_df, gos_dict, terms_dict,batch_size)
    
    with tf.device(device):
        if load:
            print('Loading pretrained model')
            model = load_model(model_file)
        else:
            
            print('Creating a new model')


            print("Training data size: %d" % len(train_df))
            print("Validation data size: %d" % len(valid_df))
            logger = CSVLogger(logger_file)
            earlystopper = EarlyStopping(monitor='val_loss', patience=2, verbose=1)





            model = create_model(params, hpo_matrix)


            checkpointer = ModelCheckpoint( filepath=model_file, verbose=1, save_best_only=True)
            model.summary()
            print('Starting training the my Implemented model')
            
            model.fit(train_generator, steps_per_epoch=train_steps, epochs=epochs,
                      
                validation_data=valid_generator, validation_steps=valid_steps, max_queue_size=batch_size,
                      
                workers=1, callbacks=[logger, checkpointer, earlystopper])

            logging.info('Loading best model')
            
#             model = load_model(model_file, custom_objects={'HPOLayer': HPOLayer})
            model = load_model(model_file)
            
        logging.info('Evaluating model')
        loss = model.evaluate(test_generator, steps=test_steps)

        logging.info('Predicting')
        #-----------------------------------------------------
        test_generator.reset()
        #3944
        test_steps = int(math.ceil(len(test_df) / batch_size))
        
        preds = model.predict(test_generator, steps=test_steps)
        
        logging.info('creating the dataFrames for saving the predictions')

        all_terms_df = pd.read_pickle(terms_file)
        all_terms = all_terms_df['terms'].values
        all_terms_dict = {v:k for k,v in enumerate(all_terms)}
        
        all_labels = np.zeros((len(test_df), len(all_terms)), dtype=np.int32)
        
        for i, row in enumerate(test_df.itertuples()):
            for hp_id in row.hp_annotations:
                if hp_id in all_terms_dict:
                    all_labels[i, all_terms_dict[hp_id]] = 1
                    
                    
        logging.info('Placing the  predictions')
        all_preds = np.zeros((len(test_df), len(all_terms)), dtype=np.float32)

        test_df['preds'] = list(preds)
        logging.info('Saving my implemented predictions')
        test_df.to_pickle(out_file)




In [41]:
# main(hp_file, data_file, terms_file, gos_file, model_file,out_file, fold, batch_size, epochs, load, logger_file, threshold,device):

main('data/hp.obo',
     
     'data/My_Implementations/human.pkl',
     
     'data/My_Implementations/terms.pkl',
     
     'data/My_Implementations/gos.pkl',
     
     'data/My_Implementations/Trial_3/model_mohamed.h5',
     
     'data/My_Implementations/Trial_3/predictions.pkl',
     
     1,
     
     1,
     
     1,
     
     False,
     
     'data/My_Implementations/Trial_3/training.csv',
     
     0.5,
     
     'CPU:0')

Params: {'nb_classes': 3783, 'input_shape': (24274,), 'nb_layers': 1, 'loss': 'binary_crossentropy', 'rate': 0.3, 'learning_rate': 0.001, 'units': 1500, 'model_file': 'data/My_Implementations/Trial_3/model_mohamed.h5'}
Phenotypes 8693
3933 3539 394 394
nb_classes=len(terms)_______________________  8693
len(terms_dict)_______________________  8693
test_steps 394 valid_steps 394 train_steps 3539
Creating a new model
Training data size: 3539
Validation data size: 394
Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 8693, 8693)]      0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 8693, 1)           8694      
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 8693, 1)           0         
____________________________________________________

INFO:root:Compilation finished


Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 8693, 8693)]      0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 8693, 1)           8694      
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 8693, 1)           0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 1)                 12        
_________________________________________________________________
flatten_4 (Flatten)          (None, 1)                 0         
_________________________________________________________________
dense_out (Dense)            (None, 8693)              17386     
Total params: 26,092
Trainable params: 26,092
Non-trainable params: 0
_______________________________________________________

  ...
    to  
  ['...']


ValueError: in converted code:

    C:\Users\Mohamed Elhaj_Abdou\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\keras\engine\training_v2.py:677 map_fn
        batch_size=None)
    C:\Users\Mohamed Elhaj_Abdou\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\keras\engine\training.py:2410 _standardize_tensors
        exception_prefix='input')
    C:\Users\Mohamed Elhaj_Abdou\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\keras\engine\training_utils.py:573 standardize_input_data
        'with shape ' + str(data_shape))

    ValueError: Error when checking input: expected input_10 to have 3 dimensions, but got array with shape (None, None)


In [None]:
print ( "Done")

In [None]:
3060 Ti 