In [1]:
import random
import numpy as np
import tensorflow as tf

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [2]:
from keras import Sequential
from keras.layers import *
from xgboost import XGBClassifier
from keras.callbacks import EarlyStopping
from keras.losses import BinaryCrossentropy

from src.experiment import *
from src.data import load_psi, Species
from src.features.encodings import pstnpss
# from src.model.porpoise import pstnpss

In [3]:
human_encoder = pstnpss.Encoder()
yeast_encoder = pstnpss.Encoder()

In [4]:
human_train = human_encoder.fit_transform(load_psi(Species.human, independent=False))
human_test = human_encoder.transform(load_psi(Species.human, independent=True))

yeast_train = yeast_encoder.fit_transform(load_psi(Species.yeast, independent=False))
yeast_test = yeast_encoder.transform(load_psi(Species.yeast, independent=True))

In [6]:
from src.data import save_dataset

save_dataset('/Users/arish/Workspace/research/rna_modification/data/human_train.csv', human_train)
save_dataset('/Users/arish/Workspace/research/rna_modification/data/human_test.csv', human_test)

In [7]:
human_model = Sequential([
    Conv1D(32, 3, activation='relu', input_shape=(19, 1)),
    MaxPooling1D(2),
    Conv1D(64, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

human_model.compile(
    loss=BinaryCrossentropy(from_logits=False),
    optimizer='adam',
    metrics=['accuracy'],
)

early_stopping = EarlyStopping(
    patience=5,
    mode='min',
    restore_best_weights=True
)

human_model.fit(human_train.samples, human_train.targets,
                epochs=100, callbacks=[early_stopping],
                validation_data=(human_test.samples, human_test.targets))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


<keras.src.callbacks.History at 0x2c7dfe2d0>

In [8]:
yeast_model = Sequential([
    Dense(29, input_dim=29, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid'),
])

yeast_model.compile(
    loss=BinaryCrossentropy(from_logits=False),
    optimizer='adam',
    metrics=['accuracy']
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min',
    restore_best_weights=True,
)

yeast_model.fit(yeast_train.samples, yeast_train.targets,
                epochs=100, callbacks=[early_stopping],
                validation_data=(yeast_test.samples, yeast_test.targets))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


<keras.src.callbacks.History at 0x2c96d2cd0>

In [19]:
def select_features(features):
    selection = [10, 12, 8, 9, 17, 7, 6, 15, 14, 11, 2, 13, 5, 16, 18, 4, 1, 3, 0]
    imp_features = list(map(lambda x: f'pstnpss_{x}', selection))
    return features[imp_features]

In [21]:
human_classifier = XGBClassifier()

human_classifier.fit(select_features(human_train.samples), human_train.targets)
human_classifier.score(select_features(human_test.samples), human_test.targets)

1.0

In [11]:
yeast_classifier = XGBClassifier()

yeast_classifier.fit(select_features(yeast_train.samples), yeast_train.targets)
yeast_classifier.score(select_features(yeast_test.samples), yeast_test.targets)

0.965

In [12]:
generate_latex_report(
    Report.create_report(human_model, (human_test.samples, human_test.targets), True),
    'human_nn_test',
    Path('pstnpss_report'),
    True
)

generate_latex_report(
    Report.create_report(yeast_model, (yeast_test.samples, yeast_test.targets), True),
    'yeast_nn_test',
    Path('pstnpss_report'),
    True
)

generate_latex_report(
    Report.create_report(human_classifier, (select_features(human_test.samples), human_test.targets)),
    'human_xg_test',
    Path('pstnpss_report'),
    True
)

generate_latex_report(
    Report.create_report(yeast_classifier, (select_features(yeast_test.samples), yeast_test.targets)),
    'yeast_xg_test',
    Path('pstnpss_report'),
    True
)

note: Running TeX ...
note: Rerunning TeX because "human_nn_test.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `pstnpss_report/human_nn_test.pdf` (27.17 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)
note: Running TeX ...
note: Rerunning TeX because "yeast_nn_test.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `pstnpss_report/yeast_nn_test.pdf` (27.58 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)
note: Running TeX ...
note: Rerunning TeX because "human_xg_test.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `pstnpss_report/human_xg_test.pdf` (24.11 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediates to keep them)
note: Running TeX ...
note: Rerunning TeX because "yeast_xg_test.aux" changed ...
note: Running xdvipdfmx ...
note: Writing `pstnpss_report/yeast_xg_test.pdf` (24.56 KiB)
note: Skipped writing 1 intermediate files (use --keep-intermediate