In [2]:
import random
import numpy as np
import tensorflow as tf

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
from pathlib import Path
from keras import Sequential
from keras.layers import *
from xgboost import XGBClassifier
from keras.callbacks import EarlyStopping
from keras.losses import BinaryCrossentropy

from src.experiment import *
from src.data import load_psi, Species
from src.features.encodings import pstnpss

In [4]:
human_encoder = pstnpss.Encoder(species=Species.human)
yeast_encoder = pstnpss.Encoder(species=Species.yeast)

In [5]:
human_test = load_psi(Species.human, independent=True)
human_train = load_psi(Species.human, independent=False)

yeast_test = load_psi(Species.yeast, independent=True)
yeast_train = load_psi(Species.yeast, independent=False)

In [6]:
human_x_train = human_encoder.fit_transform(human_train.samples)
human_y_train = human_train.targets
human_x_test = human_encoder.fit_transform(human_test.samples)
human_y_test = human_test.targets

In [7]:
yeast_x_train = yeast_encoder.fit_transform(yeast_train.samples)
yeast_y_train = yeast_train.targets
yeast_x_test = yeast_encoder.fit_transform(yeast_test.samples)
yeast_y_test = yeast_test.targets

In [9]:
human_model = Sequential([
    Conv1D(32, 3, activation='relu', input_shape=(19, 1)),
    MaxPooling1D(2),
    Conv1D(64, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

human_model.compile(
    loss=BinaryCrossentropy(from_logits=False),
    optimizer='adam',
    metrics=['accuracy'],
)

early_stopping = EarlyStopping(
    patience=5,
    mode='min',
    restore_best_weights=True
)

human_model.fit(human_x_train, human_y_train,
                epochs=100, callbacks=[early_stopping],
                validation_data=(human_x_test, human_y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


<keras.src.callbacks.History at 0x29fd5f590>

In [12]:
yeast_model = Sequential([
    Dense(29, input_dim=29, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid'),
])

yeast_model.compile(
    loss=BinaryCrossentropy(from_logits=False),
    optimizer='adam',
    metrics=['accuracy']
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min',
    restore_best_weights=True,
)

yeast_model.fit(yeast_x_train, yeast_y_train,
                epochs=100, callbacks=[early_stopping],
                validation_data=(yeast_x_test, yeast_y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


<keras.src.callbacks.History at 0x2c9ac1ad0>

In [16]:
def select_features(features):
    selection = [10, 12, 8, 9, 17, 7, 6, 15, 14, 11, 2, 13, 5, 16, 18, 4, 1, 3, 0]
    imp_features = list(map(lambda x: f'pstnpss_{x}', selection))
    return features[imp_features]

In [17]:
human_classifier = XGBClassifier(
    base_score=0.5, booster='gbtree', colsample_bynode=1, max_depth=6, verbosity=1, colsample_bytree=0.637482,
    subsample=0.901284, learning_rate=0.276002, reg_alpha=0, max_delta_step=0, min_child_weight=1, n_jobs=1,
    n_estimators=1082, colsample_bylevel=1, random_state=0, reg_lambda=1, scale_pos_weight=1, gamma=0.103823
)

human_classifier.fit(select_features(human_x_train), human_y_train)
human_classifier.score(select_features(human_x_test), human_y_test)
None

In [18]:
yeast_classifier = XGBClassifier(
    base_score=0.5, booster='gbtree', colsample_bynode=1, max_depth=6, verbosity=1, colsample_bytree=0.637482,
    subsample=0.901284, learning_rate=0.276002, reg_alpha=0, max_delta_step=0, min_child_weight=1, n_jobs=1,
    n_estimators=1082, colsample_bylevel=1, random_state=0, reg_lambda=1, scale_pos_weight=1, gamma=0.103823
)

yeast_classifier.fit(select_features(yeast_x_train), yeast_y_train)
yeast_classifier.score(select_features(yeast_x_test), yeast_y_test)
None

In [20]:
generate_latex_report(
    Report.create_report(human_model, (human_x_test, human_y_test), True),
    'human_nn_test',
    Path('pstnpss_report'),
    True
)

generate_latex_report(
    Report.create_report(yeast_model, (yeast_x_test, yeast_y_test), True),
    'yeast_nn_test',
    Path('pstnpss_report'),
    True
)

generate_latex_report(
    Report.create_report(human_classifier, (select_features(human_x_test), human_y_test)),
    'human_xg_test',
    Path('pstnpss_report'),
    True
)

generate_latex_report(
    Report.create_report(yeast_classifier, (select_features(yeast_x_test), yeast_y_test)),
    'yeast_xg_test',
    Path('pstnpss_report'),
    True
)

note: Running TeX ...
note: Rerunning TeX because "human_nn_test.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `pstnpss_report/human_nn_test.pdf` (26.38 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)
note: Running TeX ...
note: Rerunning TeX because "yeast_nn_test.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `pstnpss_report/yeast_nn_test.pdf` (26.55 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)
note: Running TeX ...
note: Rerunning TeX because "human_xg_test.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `pstnpss_report/human_xg_test.pdf` (26.26 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)
note: Running TeX ...
note: Rerunning TeX because "yeast_xg_test.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `pstnpss_report/yeast_xg_test.pdf` (27.05 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediate