In [1]:
import random
import numpy as np
import tensorflow as tf

from src.features.encodings import pse_knc

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

info = pse_knc.get_info('PseKNC')

In [2]:
from pathlib import Path
from keras import Sequential
from keras.layers import *
from keras.callbacks import EarlyStopping
from keras.losses import BinaryCrossentropy

from src.features.encoder import FeatureEncoder
from src.data.data_loader import load_dataset, Species
from src.models.reporting.model_report import ModelReport
from src.models.reporting.single_report import generate_report

### Human Training & Testing

In [3]:
binary_train_x, human_train_y = load_dataset(Species.human, independent=False, encoding=FeatureEncoder.binary)
binary_test_x, human_test_y = load_dataset(Species.human, independent=True, encoding=FeatureEncoder.binary)

pstnpss_train_x, _ = load_dataset(Species.human, independent=False,
                                  encoding=lambda x: FeatureEncoder.pstnpss(x, 'hs'))
pstnpss_test_x, _ = load_dataset(Species.human, independent=True,
                                 encoding=lambda x: FeatureEncoder.pstnpss(x, 'hs'))

pse_knc_train_x, _ = load_dataset(Species.human, independent=False,
                                  encoding=lambda x: FeatureEncoder.pse_knc(x, info, 3, 2, 0.1))
pse_knc_test_x, _ = load_dataset(Species.human, independent=True,
                                 encoding=lambda x: FeatureEncoder.pse_knc(x, info, 3, 2, 0.1))

In [4]:
pstnpss_train_x.shape

(990, 19)

In [155]:
def build_human_model():
    input1 = tf.keras.Input(shape=(84,), name='human_binary_input')
    input2 = tf.keras.Input(shape=(66,), name='human_pse_knc_input')
    input3 = tf.keras.Input(shape=(19,), name='human_pstnpss_input')

    binary_model = tf.keras.Sequential([
        tf.keras.layers.Dense(84, input_dim=84, activation='relu'),
        # tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(42, activation='relu'),
        tf.keras.layers.Dropout(0.8),
        # tf.keras.layers.Dense(1, activation='sigmoid')
    ], name='binary_model')(input1)

    pse_knc_model = tf.keras.Sequential([
        tf.keras.layers.Dense(66, input_dim=66, activation='relu'),
        # tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.8),
        # tf.keras.layers.Dense(1, activation='sigmoid')
    ], name='pse_knc_model')(input2)

    pstnpss_model = tf.keras.Sequential([
        tf.keras.layers.Dense(19, input_dim=19, activation='relu'),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dropout(0.8),
        # tf.keras.layers.Dense(1, activation='sigmoid')
    ], name='pstnpss_model')(input3)

    concatenated = tf.keras.layers.concatenate([
        binary_model,
        pse_knc_model,
        pstnpss_model
    ])

    # dropout1 = tf.keras.layers.Dropout(0.8)(concatenated)
    # hidden = tf.keras.layers.Dense(32, activation='relu')(concatenated)
    dropout2 = tf.keras.layers.Dropout(0.8)(concatenated)

    output = tf.keras.layers.Dense(1, activation='sigmoid', name='final_output')(dropout2)

    combined_model = tf.keras.Model(
        inputs=[input1, input2, input3],
        outputs=output,
        name='human_model'
    )

    combined_model.compile(
        loss=BinaryCrossentropy(from_logits=False),
        optimizer='adam',
        metrics=['accuracy']
    )

    return combined_model


human_model = build_human_model()

early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')

In [179]:
human_model.fit(
    [binary_train_x, pse_knc_train_x, pstnpss_train_x],
    human_train_y,
    epochs=10,
    validation_data=([binary_test_x, pse_knc_test_x, pstnpss_test_x], human_test_y)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2e836c2d0>

### Yeast Training & Testing

In [6]:
yeast_x_test, yeast_y_test = load_dataset(Species.yeast, independent=True,
                                          encoding=lambda x: FeatureEncoder.pstnpss(x, 'sc'))
yeast_x_train, yeast_y_train = load_dataset(Species.yeast, independent=False,
                                            encoding=lambda x: FeatureEncoder.pstnpss(x, 'sc'))

In [7]:
yeast_model = Sequential([
    Dense(29, input_dim=29, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid'),
])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')

yeast_model.compile(loss=BinaryCrossentropy(from_logits=False), optimizer='adam', metrics=['accuracy'])

In [8]:
yeast_model.fit(yeast_x_train, yeast_y_train, epochs=40, callbacks=[early_stopping],
                validation_data=(yeast_x_test, yeast_y_test))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.src.callbacks.History at 0x29cee9b90>

In [9]:
human_model_report = ModelReport.generate(human_model, human_x_test, human_y_test, is_keras=True)
yeast_model_report = ModelReport.generate(yeast_model, yeast_x_test, yeast_y_test, is_keras=True)



In [10]:
generate_report({Species.human.value: human_model_report, Species.yeast.value: yeast_model_report},
                Path('nn/pstnpss_2'))

note: Running TeX ...
note: Rerunning TeX because "report.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `nn/pstnpss_2/report.pdf` (32.97 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [11]:
def select_features(features):
    selection = [10, 12, 8, 9, 17, 7, 6, 15, 14, 11, 2, 13, 5, 16, 18, 4, 1, 3, 0]
    imp_features = list(map(lambda x: f'pstnpss_{x + 1}', selection))
    return features[imp_features]

In [12]:
human_classifier = GradientBoostingClassifier(
    learning_rate=0.247286, loss='log_loss', max_depth=9, n_estimators=1624, subsample=0.681402
)

human_classifier.fit(select_features(human_x_train), human_y_train)
human_classifier.score(select_features(human_x_test), human_y_test)

0.78

In [13]:
yeast_classifier = GradientBoostingClassifier(
    learning_rate=0.247286, loss='log_loss', max_depth=9, n_estimators=1624, subsample=0.681402
)

yeast_classifier.fit(select_features(yeast_x_train), yeast_y_train)
yeast_classifier.score(select_features(yeast_x_test), yeast_y_test)

0.795

In [14]:
human_model_report = ModelReport.generate(human_classifier, select_features(human_x_test), human_y_test)
yeast_model_report = ModelReport.generate(yeast_classifier, select_features(yeast_x_test), yeast_y_test)

In [15]:
generate_report({Species.human.value: human_model_report, Species.yeast.value: yeast_model_report},
                Path('ml/pstnpss_2'))

note: Running TeX ...
note: Rerunning TeX because "report.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `ml/pstnpss_2/report.pdf` (32.47 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>