# Training A Multilayer Perceptron Using GPU P100

In [1]:
%%time

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import h5py

from tqdm import tqdm

from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import hamming_loss


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/t5embeds/train_ids.npy
/kaggle/input/t5embeds/test_embeds.npy
/kaggle/input/t5embeds/train_embeds.npy
/kaggle/input/t5embeds/test_ids.npy
/kaggle/input/train-labels-cafa5/top_1500_labels.npy
/kaggle/input/train-labels-cafa5/train_labels.h5
/kaggle/input/train-labels-cafa5/MultiaLayerPerceptron.h5
CPU times: user 3.55 s, sys: 775 ms, total: 4.32 s
Wall time: 9.06 s


In [3]:
%%time

# Check if GPU is available
if tf.test.gpu_device_name():
    print('GPU device found: {}'.format(tf.test.gpu_device_name()))
else:
    print("No GPU found. Training on CPU.")

GPU device found: /device:GPU:0
CPU times: user 1.73 ms, sys: 6.17 ms, total: 7.9 ms
Wall time: 7.8 ms


In [4]:
%%time

train_data = np.load('/kaggle/input/t5embeds/train_embeds.npy')

with h5py.File('/kaggle/input/train-labels-cafa5/train_labels.h5', 'r') as hf:
    train_labels = hf['labels'][:]

In [5]:
%%time

test_data = np.load('/kaggle/input/t5embeds/test_embeds.npy')

CPU times: user 17 ms, sys: 659 ms, total: 676 ms
Wall time: 10.6 s


In [None]:
%%time

INPUT_SHAPE = [train_data.shape[1]]
BATCH_SIZE = int(train_data.shape[0] / 250)

model = tf.keras.Sequential([
    tf.keras.layers.BatchNormalization(input_shape=INPUT_SHAPE),
    tf.keras.layers.Dense(units=512, activation='relu'),
    tf.keras.layers.Dense(units=512, activation='relu'),
    tf.keras.layers.Dense(units=512, activation='relu'),
    tf.keras.layers.Dense(units=train_labels.shape[1],activation='sigmoid')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['binary_accuracy', tf.keras.metrics.AUC()]
)

In [None]:
%%time
model.fit(
        train_data, train_labels,
        batch_size=BATCH_SIZE,
        epochs=10
    )

In [None]:
model.save('MultiLayerPerceptron.h5')

In [6]:
%%time

model = tf.keras.models.load_model('/kaggle/input/train-labels-cafa5/MultiaLayerPerceptron.h5')

CPU times: user 345 ms, sys: 271 ms, total: 616 ms
Wall time: 1.01 s


In [7]:
%%time

predictions =  model.predict(test_data)

CPU times: user 11.6 s, sys: 2.28 s, total: 13.9 s
Wall time: 22 s


In [8]:
labels = np.load('/kaggle/input/train-labels-cafa5/top_1500_labels.npy',allow_pickle=True)

In [10]:
test_protein_ids = np.load('/kaggle/input/t5embeds/test_ids.npy')
l = []
for k in list(test_protein_ids):
    l += [ k] * predictions.shape[1]

In [13]:
df_submission = pd.DataFrame(
    {
        'Protein ID': l,
        'GO Term ID': np.tile(labels, predictions.shape[0]),
        'Prediction': np.round(predictions.ravel(),3)
    }
)

In [14]:
df_submission.head()

Unnamed: 0,Protein ID,GO Term ID,Prediction
0,Q9CQV8,GO:0005575,0.745265
1,Q9CQV8,GO:0008150,0.706754
2,Q9CQV8,GO:0110165,0.718132
3,Q9CQV8,GO:0003674,0.775625
4,Q9CQV8,GO:0005622,0.591212


In [15]:
df_submission.to_csv('submission.tsv', sep='\t', header=None, index= None)