In [1]:
from keras
from keras.models import Sequential
from keras.optimizers import AdamW
from keras.callbacks import ModelCheckpoint
from keras.layers import Conv1D, MaxPool1D, Flatten, Dense, Input, Dropout

from src.features.encodings import pstnpss
from src.dataset import load_dataset, Species, Modification, split_balanced

In [2]:
dataset = load_dataset(Species.human, Modification.psi, 'all')

In [3]:
encoder = pstnpss.Encoder()

In [4]:
encoded_samples = encoder.fit_transform(dataset.samples, dataset.targets)

In [5]:
x_train, x_test, y_train, y_test = split_balanced(encoded_samples, dataset.targets, test_size=0.2)
x_train, x_val, y_train, y_val = split_balanced(x_train, y_train, test_size=0.2)

In [6]:
model = Sequential([
    Input(shape=(39, 1)),
    Conv1D(filters=32, kernel_size=7, activation='relu'),
    MaxPool1D(pool_size=2),
    Conv1D(filters=64, kernel_size=5, activation='relu'),
    MaxPool1D(pool_size=2),
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    MaxPool1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.8),
    Dense(1, activation='sigmoid')
])

In [7]:
model.compile(optimizer=AdamW(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [8]:
checkpoint_callback = ModelCheckpoint(filepath='best_model.keras',
                                      monitor='val_accuracy',
                                      save_best_only=True,
                                      mode='max')

In [16]:
history = model.fit(x_train, y_train,
                    epochs=10,
                    shuffle=True,
                    batch_size=32,
                    validation_data=(x_val, y_val),
                    callbacks=[checkpoint_callback])

In [17]:
model.evaluate(x_test, y_test)

In [18]:
model.load_weights('best_model.keras')
model.evaluate(x_test, y_test)