In [1]:
import pandas as pd

import numpy as np

import pickle

# Load Datasets

### Load Biological Process Terms

In [2]:
trainBP_terms=pd.read_pickle('../../../data/DeepGo/train-bp.pkl')

trainBP_terms=trainBP_terms[trainBP_terms['orgs']=='9606']

testBP_terms=pd.read_pickle('../../../data/DeepGo/test-bp.pkl')

testBP_terms=testBP_terms[testBP_terms['orgs']=='9606']

trainBP_terms.columns=['uniprot_ac', 'gos', 'labels', 'ngrams', 'proteins', 'sequences','orgs', 'embeddings']

testBP_terms.columns=['uniprot_ac', 'gos', 'labels', 'ngrams', 'proteins', 'sequences','orgs', 'embeddings']

# load DeepGO embeddings

In [3]:
embedding_size=256

trainProtein_weights = np.zeros((trainBP_terms.shape[0], embedding_size))

testProtein_weights = np.zeros((testBP_terms.shape[0], embedding_size))




In [4]:
trainBP_terms.columns

Index(['uniprot_ac', 'gos', 'labels', 'ngrams', 'proteins', 'sequences',
       'orgs', 'embeddings'],
      dtype='object')

In [5]:
for i in range(trainBP_terms.shape[0]):  
    trainProtein_weights[i]=trainBP_terms.iloc[i][7]

for i in range(testBP_terms.shape[0]):  
    testProtein_weights[i]=testBP_terms.iloc[i][7]

In [6]:
X_train=trainProtein_weights
X_train=np.array(X_train)
Ytrain=trainBP_terms[['labels']]
Ytrain=Ytrain.labels.apply(pd.Series)
Ytrain=np.array(Ytrain)

X_test=testProtein_weights
X_test=np.array(X_test)
Ytest=testBP_terms[['labels']]
Ytest=Ytest.labels.apply(pd.Series)
Ytest=np.array(Ytest)


# +++++++++++++++++++++++++++++++++++++++++++++

### Train and Evaluate the model

In [10]:
import sys
sys.path.append('../../../utils/')

from sklearn.model_selection import StratifiedKFold

from  model import feedForward_classifier

import keras

from sklearn.metrics import label_ranking_average_precision_score

from numpy import arange

from sklearn.metrics import f1_score


In [11]:
random_seed=13

In [12]:
num_classes=Ytrain.shape[1]

In [14]:
model=None
model = feedForward_classifier(num_classes,embedding_size)

In [15]:
history=model.fit(X_train, Ytrain, epochs=100, batch_size=32,verbose=0)
YtestPredicted_raw=model.predict(X_test)
avePrec =label_ranking_average_precision_score(Ytest, YtestPredicted_raw) 




Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



In [16]:
avePrec

0.4044951140966937

In [17]:
results={'treshold':[],'Average Precision':[],'F1 (micro)':[],'F1 (macro)':[],'Method':[]}

In [18]:
for treshold in arange(0.05,0.55,0.05):  
    YtestPredicted=None
    YtestPredicted=YtestPredicted_raw.copy()
    results['treshold'].append(treshold)
    results['Average Precision'].append(avePrec)
    YtestPredicted[YtestPredicted>=treshold]=1
    YtestPredicted[YtestPredicted<treshold]=0
    results['F1 (micro)'].append  (f1_score(Ytest, YtestPredicted, average='micro'))
    results['F1 (macro)'].append  (f1_score(Ytest, YtestPredicted, average='macro'))
    results['Method'].append ('DeepGO')


  average, "true nor predicted", 'F-score is', len(true_sum)


In [19]:
df_results=pd.DataFrame(results)

In [20]:
df_results.head(50)

Unnamed: 0,treshold,Average Precision,F1 (micro),F1 (macro),Method
0,0.05,0.404495,0.298681,0.16524,DeepGO
1,0.1,0.404495,0.343743,0.181898,DeepGO
2,0.15,0.404495,0.365987,0.187628,DeepGO
3,0.2,0.404495,0.378062,0.186446,DeepGO
4,0.25,0.404495,0.384371,0.183633,DeepGO
5,0.3,0.404495,0.386478,0.17928,DeepGO
6,0.35,0.404495,0.385132,0.172852,DeepGO
7,0.4,0.404495,0.381908,0.166832,DeepGO
8,0.45,0.404495,0.375885,0.16,DeepGO
9,0.5,0.404495,0.367468,0.152403,DeepGO


In [21]:
df_results.to_csv('../../../data/_Outputs/Final_DeepGO_BP.csv',index=False)