In [1]:
import random
import numpy as np
import tensorflow as tf

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [2]:
from pathlib import Path
from keras import Sequential
from keras.layers import *
from keras.losses import BinaryCrossentropy
from keras.callbacks import EarlyStopping
from sklearn.naive_bayes import GaussianNB

from src.features.encoder import FeatureEncoder
from src.data.data_loader import load_dataset, Species
from src.models.reporting.model_report import ModelReport
from src.models.reporting.single_report import generate_report

In [5]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')

### Human Training & Testing

In [3]:
human_x_test, human_y_test = load_dataset(Species.human, independent=True,
                                          encoding=lambda x: FeatureEncoder.ps(x, 3))
human_x_train, human_y_train = load_dataset(Species.human, independent=False,
                                            encoding=lambda x: FeatureEncoder.ps(x, 3))

In [11]:
human_model = Sequential([
    Dense(1216, input_dim=1216, activation='relu'),
    Dropout(0.8),
    Dense(500, activation='relu'),
    Dropout(0.8),
    Dense(1, activation='sigmoid'),
])

human_model.compile(
    loss=BinaryCrossentropy(from_logits=False),
    optimizer='adam',
    metrics=['accuracy'],
)

human_model.fit(human_x_train, human_y_train, epochs=40, callbacks=[early_stopping],
                validation_data=(human_x_test, human_y_test))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40


<keras.src.callbacks.History at 0x298bf32d0>

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40


<keras.src.callbacks.History at 0x294bc51d0>

### Yeast Training & Testing

In [102]:
yeast_x_test, yeast_y_test = load_dataset(Species.yeast, independent=True,
                                          encoding=lambda x: FeatureEncoder.pse_knc(x, info, 3, 2, 0.1))
yeast_x_train, yeast_y_train = load_dataset(Species.yeast, independent=False,
                                            encoding=lambda x: FeatureEncoder.pse_knc(x, info, 3, 2, 0.1))

In [105]:
yeast_model = Sequential([
    Dense(66, input_dim=66, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.8),
    Dense(1, activation='sigmoid'),
])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')

yeast_model.compile(loss=BinaryCrossentropy(from_logits=False), optimizer='adam', metrics=['accuracy'])

In [106]:
yeast_model.fit(yeast_x_train, yeast_y_train, epochs=40, callbacks=[early_stopping],
                validation_data=(yeast_x_test, yeast_y_test))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40


<keras.src.callbacks.History at 0x2aa19fcd0>

In [107]:
human_model_report = ModelReport.generate(human_model, human_x_test, human_y_test, is_keras=True)
yeast_model_report = ModelReport.generate(yeast_model, yeast_x_test, yeast_y_test, is_keras=True)



In [108]:
generate_report({Species.human.value: human_model_report, Species.yeast.value: yeast_model_report}, Path('nn/pseknc'))

note: Running TeX ...
note: Rerunning TeX because "report.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `nn/pseknc/report.pdf` (35.46 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [109]:
def select_features(features):
    selection = [4, 21, 20, 0, 5, 65, 64, 1, 17, 16, 60, 15, 25, 54, 3, 11, 46,
                 41, 40, 39, 7, 2, 42, 36, 31, 12, 24, 35, 18, 62, 61, 53, 8, 50,
                 56, 43, 19, 28, 9, 59, 37, 38, 33, 48, 44, 45, 63, 10, 22, 27,
                 49, 57, 55, 14, 51, 47, 52, 26, 23, 29, 30, 13, 6, 58]
    imp_features = list(map(lambda x: f'pse_knc_{x + 1}', selection))
    return features[imp_features]

In [110]:
human_classifier = GaussianNB()

human_classifier.fit(select_features(human_x_train), human_y_train)
human_classifier.score(select_features(human_x_test), human_y_test)

0.585

In [111]:
yeast_classifier = GaussianNB()

yeast_classifier.fit(select_features(yeast_x_train), yeast_y_train)
yeast_classifier.score(select_features(yeast_x_test), yeast_y_test)

0.52

In [113]:
human_model_report = ModelReport.generate(human_classifier, select_features(human_x_test), human_y_test)
yeast_model_report = ModelReport.generate(yeast_classifier, select_features(yeast_x_test), yeast_y_test)

In [114]:
generate_report({Species.human.value: human_model_report, Species.yeast.value: yeast_model_report}, Path('ml/pseknc'))

note: Running TeX ...
note: Rerunning TeX because "report.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `ml/pseknc/report.pdf` (35.29 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>