In [1]:
from repairing_genomic_gaps import (
    build_multivariate_dataset_cae,
    build_synthetic_dataset_cae,
    build_biological_dataset_cae
)
import numpy as np
import compress_json
import socket
from sklearn.impute import KNNImputer
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score
from notipy_me import Notipy

## Tasks to do

- [ ] Training over multivariate
    - [ ] Testing over multivariate
    - [ ] Testing over single gap
    - [ ] Testing over biological
- [ ] Trainining over single gap
    - [ ] Testing over multivariate
    - [ ] Testing over single gap
    - [ ] Testing over biological

In [2]:
tasks = {
    "fievel": [build_multivariate_dataset_cae, build_synthetic_dataset_cae],
    "souris": [build_synthetic_dataset_cae, build_multivariate_dataset_cae]
}

In [4]:
def report(y_true, y_pred, runtype, window_size:int):
    y_pred = y_pred.reshape(-1, window_size, 4)
    half = window_size//2
    return {
        f"{runtype} auroc": roc_auc_score(
            np.argmax(y_true[:, half], axis=-1), y_pred[:, half],
            multi_class="ovo",
            labels=[0,1,2,3]
        ),
        f"{runtype} auprc": average_precision_score(
            y_true[:, half], y_pred[:, half]
        ),
        f"{runtype} accuracy": accuracy_score(
            np.argmax(y_true[:, half], axis=-1),
            np.argmax(y_pred[:, half], axis=-1)
        )
    }

In [5]:
window_size = 1000
batch_size = 5

# Getting the node hostname
hostname = socket.gethostname()

with Notipy(task_name=f"KNN imputation on {hostname}"):
    # Retrieving the task for this computing node
    main_dataset, minor_dataset = tasks[hostname]

    # Creating the dataset
    main_train, main_test = main_dataset(window_size, batch_size=batch_size)
    minor_train, minor_test = main_dataset(window_size, batch_size=batch_size)
    biological = build_biological_dataset_cae(window_size=window_size)

    # For shuffling the samples
    main_train.on_epoch_end()
    main_test.on_epoch_end()
    minor_train.on_epoch_end()
    minor_test.on_epoch_end()

    # Generating the samples
    main_train_x, main_train_y = main_train[0]
    main_test_x, main_test_y = main_test[0]
    minor_train_x, minor_train_y = minor_train[0]
    minor_test_x, minor_test_y = minor_test[0]
    bio_x, bio_y = biological[0]

    # Tuning the imputer
    imputer = KNNImputer(missing_values=0.25)
    imputer.fit(main_train_y.reshape(-1, window_size*4))

    ###################################
    # EVALUATING THE MAIN DATASET.    #
    ###################################

    # Predicting the training NaN values
    main_train_pred_x = imputer.transform(main_train_x.reshape(-1, window_size*4))

    # Predicting the test NaN values
    main_test_pred_x = imputer.transform(main_test_x.reshape(-1, window_size*4))

    ###################################
    # EVALUATING THE MINOR DATASET.   #
    ###################################

    # Predicting the training NaN values
    minor_train_pred_x = imputer.transform(minor_train_x.reshape(-1, window_size*4))

    # Predicting the test NaN values
    minor_test_pred_x = imputer.transform(minor_test_x.reshape(-1, window_size*4))

    ###################################
    # EVALUATING THE BIO DATASET.     #
    ###################################

    # Predicting the training NaN values
    bio_pred_x = imputer.transform(bio_x.reshape(-1, window_size*4))

    ###################################
    # SAVING THE OBTAINED RESULTS.    #
    ###################################

    compress_json.dump(
        {
            "trained_on":main_dataset.__name__,
            "evaluated_on":minor_dataset.__name__,
            main_dataset.__name__: {
                "train": report(main_train_y, main_train_pred_x, "train", window_size),
                "test": report(main_test_y, main_test_pred_x, "test", window_size),
            },
            minor_dataset.__name__: {
                "train": report(minor_train_y, minor_train_x, "train", window_size),
                "test": report(minor_test_y, minor_test_x, "test", window_size),
            },
            biological.__class__.__name__: {
                "bio": report(bio_y, bio_x, "biological", window_size)
            }
        },
        f"{hostname}.json"
    )

Let's setup your notipy!


KeyboardInterrupt: 