In [2]:
import random
import numpy as np
import tensorflow as tf

from src.features.encodings import pse_knc

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

info = pse_knc.get_info('PseKNC')

In [3]:
from pathlib import Path
from keras import Sequential
from keras.layers import *
from keras.losses import BinaryCrossentropy
from keras.callbacks import EarlyStopping
from sklearn.naive_bayes import GaussianNB

from src.features.encoder import FeatureEncoder
from src.data.data_loader import load_dataset, Species
from src.models.reporting.model_report import ModelReport
from src.models.reporting.single_report import generate_report

In [4]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')

### Human Training & Testing

In [6]:
human_x_test, human_y_test = load_dataset(Species.human, independent=True,
                                          encoding=lambda x: FeatureEncoder.pse_knc(x, info, 3, 2, 0.1))
human_x_train, human_y_train = load_dataset(Species.human, independent=False,
                                            encoding=lambda x: FeatureEncoder.pse_knc(x, info, 3, 2, 0.1))

In [7]:
def create_weak_model():
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=19))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create a list to store weak models
weak_models = []

# Create and train individual weak models
num_weak_models = 5  # Number of weak models to train
for _ in range(num_weak_models):
    model = create_weak_model()
    # Train the weak model
    model.fit(human_x_train, human_y_train, epochs=10, batch_size=32, verbose=0, callbacks=[EarlyStopping(patience=3)])
    weak_models.append(model)

# Make predictions using the ensemble
ensemble_predictions = []
for model in weak_models:
    predictions = model.predict(human_x_test)
    ensemble_predictions.append(predictions)

# Combine predictions using majority voting
ensemble_predictions = np.concatenate(ensemble_predictions, axis=1)
ensemble_predictions = (ensemble_predictions > 0.5).astype(int)
final_predictions = np.mean(ensemble_predictions, axis=1)

# Calculate accuracy
accuracy = np.mean(final_predictions == human_y_test)
print("Ensemble Model Accuracy:", accuracy)

ValueError: in user code:

    File "/Users/arish/workspace/research/psi_predictor/venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "/Users/arish/workspace/research/psi_predictor/venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/arish/workspace/research/psi_predictor/venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "/Users/arish/workspace/research/psi_predictor/venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "/Users/arish/workspace/research/psi_predictor/venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/arish/workspace/research/psi_predictor/venv/lib/python3.11/site-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 19), found shape=(None, 66)


In [100]:
human_model = Sequential([
    Dense(66, input_dim=66, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.8),
    Dense(1, activation='sigmoid'),
])

human_model.compile(
    loss=BinaryCrossentropy(from_logits=False),
    optimizer='adam',
    metrics=['accuracy'],
)

human_model.fit(human_x_train, human_y_train, epochs=40, callbacks=[early_stopping],
                validation_data=(human_x_test, human_y_test))

### Yeast Training & Testing

In [102]:
yeast_x_test, yeast_y_test = load_dataset(Species.yeast, independent=True,
                                          encoding=lambda x: FeatureEncoder.pse_knc(x, info, 3, 2, 0.1))
yeast_x_train, yeast_y_train = load_dataset(Species.yeast, independent=False,
                                            encoding=lambda x: FeatureEncoder.pse_knc(x, info, 3, 2, 0.1))

In [105]:
yeast_model = Sequential([
    Dense(66, input_dim=66, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.8),
    Dense(1, activation='sigmoid'),
])

yeast_model.compile(loss=BinaryCrossentropy(from_logits=False), optimizer='adam', metrics=['accuracy'])

yeast_model.fit(yeast_x_train, yeast_y_train, epochs=40, callbacks=[early_stopping],
                validation_data=(yeast_x_test, yeast_y_test))

In [107]:
human_model_report = ModelReport.generate(human_model, human_x_test, human_y_test, is_keras=True)
yeast_model_report = ModelReport.generate(yeast_model, yeast_x_test, yeast_y_test, is_keras=True)



In [108]:
generate_report({Species.human.value: human_model_report, Species.yeast.value: yeast_model_report}, Path('nn/pseknc'))

note: Running TeX ...
note: Rerunning TeX because "report.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `nn/pseknc/report.pdf` (35.46 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [109]:
def select_features(features):
    selection = [4, 21, 20, 0, 5, 65, 64, 1, 17, 16, 60, 15, 25, 54, 3, 11, 46,
                 41, 40, 39, 7, 2, 42, 36, 31, 12, 24, 35, 18, 62, 61, 53, 8, 50,
                 56, 43, 19, 28, 9, 59, 37, 38, 33, 48, 44, 45, 63, 10, 22, 27,
                 49, 57, 55, 14, 51, 47, 52, 26, 23, 29, 30, 13, 6, 58]
    imp_features = list(map(lambda x: f'pse_knc_{x + 1}', selection))
    return features[imp_features]

In [110]:
human_classifier = GaussianNB()

human_classifier.fit(select_features(human_x_train), human_y_train)
human_classifier.score(select_features(human_x_test), human_y_test)

0.585

In [111]:
yeast_classifier = GaussianNB()

yeast_classifier.fit(select_features(yeast_x_train), yeast_y_train)
yeast_classifier.score(select_features(yeast_x_test), yeast_y_test)

0.52

In [113]:
human_model_report = ModelReport.generate(human_classifier, select_features(human_x_test), human_y_test)
yeast_model_report = ModelReport.generate(yeast_classifier, select_features(yeast_x_test), yeast_y_test)

In [114]:
generate_report({Species.human.value: human_model_report, Species.yeast.value: yeast_model_report}, Path('ml/pseknc'))

note: Running TeX ...
note: Rerunning TeX because "report.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `ml/pseknc/report.pdf` (35.29 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>