In [1]:
import pandas as pd

import numpy as np

import pickle

# Load Datasets

### Load Molecular Function Terms

In [2]:
trainMF_terms=pd.read_pickle('../../../data/DeepGo/train-mf.pkl')

trainMF_terms=trainMF_terms[trainMF_terms['orgs']=='9606']

testMF_terms=pd.read_pickle('../../../data/DeepGo/test-mf.pkl')

testMF_terms=testMF_terms[testMF_terms['orgs']=='9606']

trainMF_terms.columns=['uniprot_ac', 'gos', 'labels', 'ngrams', 'proteins', 'sequences','orgs', 'embeddings']

testMF_terms.columns=['uniprot_ac', 'gos', 'labels', 'ngrams', 'proteins', 'sequences','orgs', 'embeddings']

# load DeepGO embeddings

In [3]:
embedding_size=256

trainProtein_weights = np.zeros((trainMF_terms.shape[0], embedding_size))

testProtein_weights = np.zeros((testMF_terms.shape[0], embedding_size))




In [4]:
trainMF_terms.columns

Index(['uniprot_ac', 'gos', 'labels', 'ngrams', 'proteins', 'sequences',
       'orgs', 'embeddings'],
      dtype='object')

In [5]:
for i in range(trainMF_terms.shape[0]):  
    trainProtein_weights[i]=trainMF_terms.iloc[i][7]

for i in range(testMF_terms.shape[0]):  
    testProtein_weights[i]=testMF_terms.iloc[i][7]

In [6]:
X_train=trainProtein_weights
X_train=np.array(X_train)
Ytrain=trainMF_terms[['labels']]
Ytrain=Ytrain.labels.apply(pd.Series)
Ytrain=np.array(Ytrain)

X_test=testProtein_weights
X_test=np.array(X_test)
Ytest=testMF_terms[['labels']]
Ytest=Ytest.labels.apply(pd.Series)
Ytest=np.array(Ytest)


# +++++++++++++++++++++++++++++++++++++++++++++

### Train and Evaluate the model

In [8]:
import sys
sys.path.append('../../../utils/')

from sklearn.model_selection import StratifiedKFold

from  model import feedForward_classifier

import keras

from sklearn.metrics import label_ranking_average_precision_score

from numpy import arange

from sklearn.metrics import f1_score


In [9]:
random_seed=13

In [10]:
num_classes=Ytrain.shape[1]

In [11]:
model=None
model = feedForward_classifier(num_classes,embedding_size)





In [12]:
history=model.fit(X_train, Ytrain, epochs=100, batch_size=32,verbose=0)
YtestPredicted_raw=model.predict(X_test)
avePrec =label_ranking_average_precision_score(Ytest, YtestPredicted_raw) 




Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



In [13]:
avePrec

0.43709693535077515

In [14]:
results={'treshold':[],'Average Precision':[],'F1 (micro)':[],'F1 (macro)':[],'Method':[]}

In [15]:
for treshold in arange(0.05,0.55,0.05):  
    YtestPredicted=None
    YtestPredicted=YtestPredicted_raw.copy()
    results['treshold'].append(treshold)
    results['Average Precision'].append(avePrec)
    YtestPredicted[YtestPredicted>=treshold]=1
    YtestPredicted[YtestPredicted<treshold]=0
    results['F1 (micro)'].append  (f1_score(Ytest, YtestPredicted, average='micro'))
    results['F1 (macro)'].append  (f1_score(Ytest, YtestPredicted, average='macro'))
    results['Method'].append ('DeepGO')


  average, "true nor predicted", 'F-score is', len(true_sum)


In [16]:
df_results=pd.DataFrame(results)

In [17]:
df_results.head(50)

Unnamed: 0,treshold,Average Precision,F1 (micro),F1 (macro),Method
0,0.05,0.437097,0.297822,0.140333,DeepGO
1,0.1,0.437097,0.33762,0.147981,DeepGO
2,0.15,0.437097,0.354033,0.149858,DeepGO
3,0.2,0.437097,0.36378,0.152834,DeepGO
4,0.25,0.437097,0.373097,0.156545,DeepGO
5,0.3,0.437097,0.374949,0.154368,DeepGO
6,0.35,0.437097,0.374917,0.152959,DeepGO
7,0.4,0.437097,0.373332,0.150668,DeepGO
8,0.45,0.437097,0.369774,0.148008,DeepGO
9,0.5,0.437097,0.36504,0.146805,DeepGO


In [18]:
df_results.to_csv('../../../data/_Outputs/Final_DeepGO_MF.csv',index=False)