In [1]:
from keras
from keras.models import Sequential
from keras.optimizers import AdamW
from keras.callbacks import ModelCheckpoint
from keras.layers import Conv1D, MaxPool1D, Flatten, Dense, Input, Dropout

from src.features.encodings import pstnpss
from src.dataset import load_dataset, Species, Modification, split_balanced

In [2]:
dataset = load_dataset(Species.human, Modification.psi, 'all')

In [3]:
encoder = pstnpss.Encoder()

In [4]:
encoded_samples = encoder.fit_transform(dataset.samples, dataset.targets)

In [5]:
x_train, x_test, y_train, y_test = split_balanced(encoded_samples, dataset.targets, test_size=0.2)
x_train, x_val, y_train, y_val = split_balanced(x_train, y_train, test_size=0.2)

In [6]:
model = Sequential([
    Input(shape=(39, 1)),
    Conv1D(filters=32, kernel_size=7, activation='relu'),
    MaxPool1D(pool_size=2),
    Conv1D(filters=64, kernel_size=5, activation='relu'),
    MaxPool1D(pool_size=2),
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    MaxPool1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.8),
    Dense(1, activation='sigmoid')
])

In [7]:
model.compile(optimizer=AdamW(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [8]:
checkpoint_callback = ModelCheckpoint(filepath='best_model.keras',
                                      monitor='val_accuracy',
                                      save_best_only=True,
                                      mode='max')

In [16]:
history = model.fit(x_train, y_train,
                    epochs=10,
                    shuffle=True,
                    batch_size=32,
                    validation_data=(x_val, y_val),
                    callbacks=[checkpoint_callback])

Epoch 1/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8921 - loss: 0.2751 - val_accuracy: 0.8322 - val_loss: 0.3992
Epoch 2/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8794 - loss: 0.2860 - val_accuracy: 0.8084 - val_loss: 0.4292
Epoch 3/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9066 - loss: 0.2526 - val_accuracy: 0.8107 - val_loss: 0.4611
Epoch 4/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9107 - loss: 0.2465 - val_accuracy: 0.8401 - val_loss: 0.4290
Epoch 5/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9142 - loss: 0.2265 - val_accuracy: 0.8197 - val_loss: 0.4328
Epoch 6/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9098 - loss: 0.2319 - val_accuracy: 0.8333 - val_loss: 0.4420
Epoch 7/10
[1m111/111[0m 

In [17]:
model.evaluate(x_test, y_test)

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 718us/step - accuracy: 0.7868 - loss: 0.4908


[0.48662567138671875, 0.8166969418525696]

In [18]:
model.load_weights('best_model.keras')
model.evaluate(x_test, y_test)

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 712us/step - accuracy: 0.8023 - loss: 0.3929


[0.3730151355266571, 0.8312159776687622]