In [1]:
import random
import numpy as np
import tensorflow as tf

from src.features.encodings import pse_knc

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [2]:
from pathlib import Path
from keras import Sequential
from keras.layers import *
from xgboost import XGBClassifier
from keras.callbacks import EarlyStopping
from keras.losses import BinaryCrossentropy

from src.features.encoder import FeatureEncoder
from src.data.data_loader import load_dataset, Species
from src.models.reporting.model_report import ModelReport
from src.models.reporting.single_report import generate_report

### Human Training & Testing

In [7]:
human_train = load_dataset(
    Species.human,
    encoding=lambda x: FeatureEncoder.pstnpss(x, 'hs')
)

human_test = load_dataset(
    Species.human,
    independent=True,
    encoding=lambda x: FeatureEncoder.pstnpss(x, 'hs')
)

In [24]:
human_test[0].shape

(200, 19)

In [61]:
def create_weak_model():
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=19))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create a list to store weak models
weak_models = []

# Create and train individual weak models
num_weak_models = 5  # Number of weak models to train
for _ in range(num_weak_models):
    model = create_weak_model()
    # Train the weak model
    model.fit(human_train[0], human_train[1], epochs=10, batch_size=32, verbose=0, callbacks=[EarlyStopping(patience=3)])
    weak_models.append(model)

# Make predictions using the ensemble
ensemble_predictions = []
for model in weak_models:
    predictions = model.predict(human_test[0])
    ensemble_predictions.append(predictions)

# Combine predictions using majority voting
ensemble_predictions = np.concatenate(ensemble_predictions, axis=1)
ensemble_predictions = (ensemble_predictions > 0.5).astype(int)
final_predictions = np.mean(ensemble_predictions, axis=1)

# Calculate accuracy
accuracy = np.mean(final_predictions == human_test[1])
print("Ensemble Model Accuracy:", accuracy)

Ensemble Model Accuracy: 0.755


In [42]:
human_model = Sequential([
    Conv1D(32, 3, activation='relu', input_shape=(19, 1)),
    MaxPooling1D(2),
    Conv1D(64, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

human_model.compile(
    loss=BinaryCrossentropy(from_logits=False),
    optimizer='adam',
    metrics=['accuracy']
)

human_model.fit(human_train[0], human_train[1], epochs=100, batch_size=32,
                validation_data=(human_test[0], human_test[1]))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x31e2f2150>

In [60]:

human_model.fit(human_train[0], human_train[1], epochs=40, batch_size=32,
                validation_data=(human_test[0], human_test[1]))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.src.callbacks.History at 0x32097a550>

### Yeast Training & Testing

In [11]:
yeast_x_test, yeast_y_test = load_dataset(Species.yeast, independent=True,
                                          encoding=lambda x: FeatureEncoder.pstnpss(x, 'sc'))
yeast_x_train, yeast_y_train = load_dataset(Species.yeast, independent=False,
                                            encoding=lambda x: FeatureEncoder.pstnpss(x, 'sc'))

In [14]:
yeast_model = Sequential([
    Dense(29, input_dim=29, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid'),
])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')

yeast_model.compile(loss=BinaryCrossentropy(from_logits=False), optimizer='adam', metrics=['accuracy'])

In [16]:
yeast_model.fit(yeast_x_train, yeast_y_train, epochs=40, callbacks=[early_stopping],
                validation_data=(yeast_x_test, yeast_y_test))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40


<keras.src.callbacks.History at 0x2ce970ad0>

In [17]:
human_model_report = ModelReport.generate(human_model, human_x_test, human_y_test, is_keras=True)
yeast_model_report = ModelReport.generate(yeast_model, yeast_x_test, yeast_y_test, is_keras=True)



In [18]:
generate_report({Species.human.value: human_model_report, Species.yeast.value: yeast_model_report},
                Path('nn/pstnpss_1'))

note: Running TeX ...
note: Rerunning TeX because "report.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `nn/pstnpss_1/report.pdf` (33.19 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [20]:
def select_features(features):
    selection = [10, 12, 8, 9, 17, 7, 6, 15, 14, 11, 2, 13, 5, 16, 18, 4, 1, 3, 0]
    imp_features = list(map(lambda x: f'pstnpss_{x + 1}', selection))
    return features[imp_features]

In [21]:
human_classifier = XGBClassifier(
    base_score=0.5, booster='gbtree', colsample_bynode=1, max_depth=6, verbosity=1, colsample_bytree=0.637482,
    subsample=0.901284, learning_rate=0.276002, reg_alpha=0, max_delta_step=0, min_child_weight=1, n_jobs=1,
    n_estimators=1082, colsample_bylevel=1, random_state=0, reg_lambda=1, scale_pos_weight=1, gamma=0.103823
)

human_classifier.fit(select_features(human_x_train), human_y_train)
human_classifier.score(select_features(human_x_test), human_y_test)

0.8

In [22]:
yeast_classifier = XGBClassifier(
    base_score=0.5, booster='gbtree', colsample_bynode=1, max_depth=6, verbosity=1, colsample_bytree=0.637482,
    subsample=0.901284, learning_rate=0.276002, reg_alpha=0, max_delta_step=0, min_child_weight=1, n_jobs=1,
    n_estimators=1082, colsample_bylevel=1, random_state=0, reg_lambda=1, scale_pos_weight=1, gamma=0.103823
)

yeast_classifier.fit(select_features(yeast_x_train), yeast_y_train)
yeast_classifier.score(select_features(yeast_x_test), yeast_y_test)

0.775

In [23]:
human_model_report = ModelReport.generate(human_classifier, select_features(human_x_test), human_y_test)
yeast_model_report = ModelReport.generate(yeast_classifier, select_features(yeast_x_test), yeast_y_test)

In [24]:
generate_report({Species.human.value: human_model_report, Species.yeast.value: yeast_model_report},
                Path('ml/pstnpss_1'))

note: Running TeX ...
note: Rerunning TeX because "report.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `ml/pstnpss_1/report.pdf` (32.92 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>