In [1]:
%load_ext autoreload
%autoreload 2

from src.base_classes.omic_data_loader import OmicDataLoader
from src.data_managers.concat import CatOmicDataManager

In [2]:
mrna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_disease/mrna",
)
mirna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_disease/mirna_genes",
)
circrna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_disease/circrna",
)
# pirna_loader = OmicDataLoader(
#     data_dir="mds_data/splits_74/pirna",
# )
te_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_disease/te_counts",
)

In [3]:
omic_data_loaders = {
    "mrna": mrna_loader,
    "mirna": mirna_loader,
    # "circrna": circrna_loader,
    # # "pirna": pirna_loader,
    "te": te_loader,
}
odm = CatOmicDataManager(omic_data_loaders, n_splits=5)

save_folder = f"logs/mds_disease/{'_'.join(omic_data_loaders.keys())}.csv"
save_folder

'logs/mds_disease/mrna_mirna_te.csv'

In [4]:
from src.evals.knn import KNNEvaluator

knn_eval = KNNEvaluator(
    data_manager=odm,
    n_trials=20,
    verbose=True,
    params={"k_lb": 1, "k_ub": 20},
)
_ = knn_eval.evaluate()
knn_eval.print_best_results()
# knn_eval.save_results(results_file=save_folder, row_name="KNN")

  from .autonotebook import tqdm as notebook_tqdm
[I 2024-11-21 00:37:21,081] A new study created in memory with name: no-name-94094ee1-ba90-428c-976e-5b569c19bcc2
[I 2024-11-21 00:37:21,266] Trial 0 finished with value: 0.545253957603188 and parameters: {'n_neighbors': 14}. Best is trial 0 with value: 0.545253957603188.
[I 2024-11-21 00:37:21,408] Trial 1 finished with value: 0.6631836987350632 and parameters: {'n_neighbors': 8}. Best is trial 1 with value: 0.6631836987350632.


New best score: 0.545
Best model performance:
Accuracy: 0.880 ± 0.065
F1 Macro: 0.721 ± 0.176
F1 Weighted: 0.859 ± 0.076
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.8, 'f1_macro': np.float64(0.64), 'f1_weighted': np.float64(0.7840000000000001)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.4642857142857143), 'f1_weighted': np.float64(0.8047619047619048)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]
New best score: 0.663
Best model performance:
Accuracy: 0.906 ± 0.032
F1 Macro: 0.815 ± 0.062
F1 Weighted: 0.898 ± 0.038
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666

[I 2024-11-21 00:37:21,547] Trial 2 finished with value: 0.6631836987350632 and parameters: {'n_neighbors': 8}. Best is trial 1 with value: 0.6631836987350632.
[I 2024-11-21 00:37:21,685] Trial 3 finished with value: 0.545253957603188 and parameters: {'n_neighbors': 12}. Best is trial 1 with value: 0.6631836987350632.
[I 2024-11-21 00:37:21,822] Trial 4 finished with value: 0.6024938699160863 and parameters: {'n_neighbors': 17}. Best is trial 1 with value: 0.6631836987350632.
[I 2024-11-21 00:37:21,958] Trial 5 finished with value: 0.6631836987350632 and parameters: {'n_neighbors': 8}. Best is trial 1 with value: 0.6631836987350632.
[I 2024-11-21 00:37:22,099] Trial 6 finished with value: 0.545253957603188 and parameters: {'n_neighbors': 13}. Best is trial 1 with value: 0.6631836987350632.
[I 2024-11-21 00:37:22,235] Trial 7 finished with value: 0.545253957603188 and parameters: {'n_neighbors': 12}. Best is trial 1 with value: 0.6631836987350632.
[I 2024-11-21 00:37:22,373] Trial 8 fin

Best model performance:
Accuracy: 0.906 ± 0.032
F1 Macro: 0.815 ± 0.062
F1 Weighted: 0.898 ± 0.038


In [5]:
from src.evals.svm import SVMEvaluator

svm_eval = SVMEvaluator(
    data_manager=odm,
    n_trials=50,
    verbose=True,
    params={
        "C_lb": 0.01,
        "C_ub": 10,
        "no_rfe": True,
        # "rfe_step_range": (0.05, 0.2),
        # "rfe_n_features_range": (100, 200),
    },
    mode="linear",
)
_ = svm_eval.evaluate()
svm_eval.save_results(results_file=save_folder, row_name="Linear SVM")

[I 2024-11-21 00:37:23,944] A new study created in memory with name: no-name-fc278aab-919a-4906-b8b0-a7f9d091292e
[I 2024-11-21 00:37:24,100] Trial 0 finished with value: 0.6812403790638734 and parameters: {'C': 0.016675213956196765, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.6812403790638734.


New best score: 0.681
Best model performance:
Accuracy: 0.906 ± 0.053
F1 Macro: 0.835 ± 0.098
F1 Weighted: 0.901 ± 0.059
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]


[I 2024-11-21 00:37:24,240] Trial 1 finished with value: 0.6812403790638734 and parameters: {'C': 0.02849950426647128, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.6812403790638734.
[I 2024-11-21 00:37:24,380] Trial 2 finished with value: 0.7039337563024542 and parameters: {'C': 0.16072762000291213, 'class_weight': None}. Best is trial 2 with value: 0.7039337563024542.
[I 2024-11-21 00:37:24,523] Trial 3 finished with value: 0.6682316917536709 and parameters: {'C': 0.8456778159715628, 'class_weight': 'balanced'}. Best is trial 2 with value: 0.7039337563024542.


New best score: 0.704
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.840 ± 0.097
F1 Weighted: 0.911 ± 0.056
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.8148148148148148), 'f1_weighted': np.float64(0.9234567901234567)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-11-21 00:37:24,645] Trial 4 finished with value: 0.7039337563024542 and parameters: {'C': 0.01642746925590834, 'class_weight': None}. Best is trial 2 with value: 0.7039337563024542.
[I 2024-11-21 00:37:24,767] Trial 5 finished with value: 0.6192439604955923 and parameters: {'C': 0.011471521424410965, 'class_weight': None}. Best is trial 2 with value: 0.7039337563024542.
[I 2024-11-21 00:37:24,914] Trial 6 finished with value: 0.6682316917536709 and parameters: {'C': 2.0583204175797025, 'class_weight': None}. Best is trial 2 with value: 0.7039337563024542.
[I 2024-11-21 00:37:25,041] Trial 7 finished with value: 0.7039337563024542 and parameters: {'C': 0.03561454122625204, 'class_weight': None}. Best is trial 2 with value: 0.7039337563024542.
[I 2024-11-21 00:37:25,189] Trial 8 finished with value: 0.7172503758053912 and parameters: {'C': 0.16498184393238047, 'class_weight': 'balanced'}. Best is trial 8 with value: 0.7172503758053912.
[I 2024-11-21 00:37:25,338] Trial 9 finished

New best score: 0.717
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.853 ± 0.097
F1 Weighted: 0.914 ± 0.057
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.9386666666666666)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-11-21 00:37:25,489] Trial 10 finished with value: 0.6682316917536709 and parameters: {'C': 6.8692960511070815, 'class_weight': 'balanced'}. Best is trial 8 with value: 0.7172503758053912.
[I 2024-11-21 00:37:25,628] Trial 11 finished with value: 0.7039337563024542 and parameters: {'C': 0.12368378886541774, 'class_weight': None}. Best is trial 8 with value: 0.7172503758053912.
[I 2024-11-21 00:37:25,774] Trial 12 finished with value: 0.7172503758053912 and parameters: {'C': 0.14675039305810655, 'class_weight': 'balanced'}. Best is trial 8 with value: 0.7172503758053912.
[I 2024-11-21 00:37:25,935] Trial 13 finished with value: 0.7172503758053912 and parameters: {'C': 0.06619879441586395, 'class_weight': 'balanced'}. Best is trial 8 with value: 0.7172503758053912.
[I 2024-11-21 00:37:26,080] Trial 14 finished with value: 0.7172503758053912 and parameters: {'C': 0.33863485362653023, 'class_weight': 'balanced'}. Best is trial 8 with value: 0.7172503758053912.
[I 2024-11-21 00:37:26

New best score: 0.758
Best model performance:
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-11-21 00:37:28,416] Trial 30 finished with value: 0.7172503758053912 and parameters: {'C': 0.12500130112484123, 'class_weight': 'balanced'}. Best is trial 28 with value: 0.7581635155117249.
[I 2024-11-21 00:37:28,554] Trial 31 finished with value: 0.7581635155117249 and parameters: {'C': 0.06105273633037403, 'class_weight': 'balanced'}. Best is trial 28 with value: 0.7581635155117249.
[I 2024-11-21 00:37:28,693] Trial 32 finished with value: 0.6812403790638734 and parameters: {'C': 0.027728082850775628, 'class_weight': 'balanced'}. Best is trial 28 with value: 0.7581635155117249.
[I 2024-11-21 00:37:28,839] Trial 33 finished with value: 0.6812403790638734 and parameters: {'C': 0.05589008364042942, 'class_weight': 'balanced'}. Best is trial 28 with value: 0.7581635155117249.
[I 2024-11-21 00:37:29,000] Trial 34 finished with value: 0.7172503758053912 and parameters: {'C': 0.2011691749306423, 'class_weight': 'balanced'}. Best is trial 28 with value: 0.7581635155117249.
[I 2024-11

Results saved to logs/mds_disease/mrna_mirna_te.csv


In [6]:
from src.evals.xgboost import XGBoostEvaluator

xgb_eval = XGBoostEvaluator(
    data_manager=odm,
    n_trials=60,
    verbose=True,
)
_ = xgb_eval.evaluate()
# xgb_eval.print_best_results()
# xgb_eval.print_best_parameters()
# xgb_eval.save_results(results_file=save_folder, row_name="XGBoost")

[I 2024-11-21 00:37:31,173] A new study created in memory with name: no-name-f5967f9c-326a-4aee-bb2e-645187e52939
[I 2024-11-21 00:37:31,455] Trial 0 finished with value: 0.5844534524426352 and parameters: {'booster': 'gbtree', 'lambda': 0.777042272438913, 'alpha': 0.007012163504194617, 'max_depth': 5, 'eta': 3.366922110430363e-05, 'gamma': 8.229368747919227e-07, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.5844534524426352.


New best score: 0.584
Best model performance:
Accuracy: 0.879 ± 0.065
F1 Macro: 0.761 ± 0.170
F1 Weighted: 0.874 ± 0.070
[{'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8, 'f1_macro': np.float64(0.7204968944099379), 'f1_weighted': np.float64(0.8099378881987577)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8, 'f1_macro': np.float64(0.4444444444444444), 'f1_weighted': np.float64(0.7703703703703704)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]


[I 2024-11-21 00:37:31,700] Trial 1 finished with value: 0.43246561009234025 and parameters: {'booster': 'gbtree', 'lambda': 6.907136835959009e-06, 'alpha': 1.0143730016435205e-08, 'max_depth': 2, 'eta': 0.002099548308135269, 'gamma': 0.29869310861813714, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.5844534524426352.
[I 2024-11-21 00:37:31,956] Trial 2 finished with value: 0.5325885489025592 and parameters: {'booster': 'dart', 'lambda': 0.0004247114677277087, 'alpha': 9.511835775344175e-07, 'max_depth': 4, 'eta': 2.5091288695504218e-08, 'gamma': 0.007636488371795125, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 0.6728264260126283, 'skip_drop': 0.04105023768061634}. Best is trial 0 with value: 0.5844534524426352.
[I 2024-11-21 00:37:32,153] Trial 3 finished with value: 0.7581635155117249 and parameters: {'booster': 'gblinear', 'lambda': 0.016289389067311696, 'alpha': 1.379929016068266e-06}. Best is trial 3 with value: 0.75

New best score: 0.758
Best model performance:
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-11-21 00:37:32,454] Trial 4 finished with value: 0.5844534524426352 and parameters: {'booster': 'gbtree', 'lambda': 6.427644198938857e-07, 'alpha': 0.3135826508026116, 'max_depth': 3, 'eta': 1.7716798332590427e-06, 'gamma': 0.0007902184802045573, 'grow_policy': 'depthwise'}. Best is trial 3 with value: 0.7581635155117249.
[I 2024-11-21 00:37:32,736] Trial 5 finished with value: 0.48162878238196427 and parameters: {'booster': 'dart', 'lambda': 0.1313406697070976, 'alpha': 1.638006863412106e-05, 'max_depth': 7, 'eta': 0.025799967421881054, 'gamma': 2.1564641889698135e-08, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 2.945806173024392e-06, 'skip_drop': 1.1842930673809874e-06}. Best is trial 3 with value: 0.7581635155117249.
[I 2024-11-21 00:37:32,974] Trial 6 finished with value: 0.3829003597127381 and parameters: {'booster': 'dart', 'lambda': 0.0005164639182912427, 'alpha': 0.15150398599441617, 'max_depth': 2, 'eta': 6.8818058179

New best score: 0.777
Best model performance:
Accuracy: 0.932 ± 0.060
F1 Macro: 0.892 ± 0.093
F1 Weighted: 0.933 ± 0.060
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]


[I 2024-11-21 00:37:34,300] Trial 12 finished with value: 0.7351676603683842 and parameters: {'booster': 'gblinear', 'lambda': 1.176720274615829e-08, 'alpha': 4.04284334741871e-06}. Best is trial 10 with value: 0.7766534866310313.
[I 2024-11-21 00:37:34,538] Trial 13 finished with value: 0.6013127244639374 and parameters: {'booster': 'gblinear', 'lambda': 1.9535657640285294e-08, 'alpha': 0.0009778525679663705}. Best is trial 10 with value: 0.7766534866310313.
[I 2024-11-21 00:37:34,705] Trial 14 finished with value: 0.7766534866310313 and parameters: {'booster': 'gblinear', 'lambda': 4.2804431138058544e-07, 'alpha': 3.1156566564358095e-07}. Best is trial 10 with value: 0.7766534866310313.
[I 2024-11-21 00:37:34,949] Trial 15 finished with value: 0.6119104221152568 and parameters: {'booster': 'gblinear', 'lambda': 1.2334231778132507e-05, 'alpha': 3.04085084646386e-05}. Best is trial 10 with value: 0.7766534866310313.
[I 2024-11-21 00:37:35,133] Trial 16 finished with value: 0.6439051712

New best score: 0.814
Best model performance:
Accuracy: 0.946 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.946 ± 0.050
[{'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]


[I 2024-11-21 00:37:43,473] Trial 56 finished with value: 0.5325885489025592 and parameters: {'booster': 'dart', 'lambda': 5.26701895563825e-06, 'alpha': 5.120405999096948e-06, 'max_depth': 3, 'eta': 2.7433820281214906e-07, 'gamma': 0.004473086396723472, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 9.235469129389463e-07, 'skip_drop': 0.00022812065498343997}. Best is trial 54 with value: 0.8139129397754031.
[I 2024-11-21 00:37:43,666] Trial 57 finished with value: 0.7171808453237203 and parameters: {'booster': 'gblinear', 'lambda': 9.4994711111478e-06, 'alpha': 1.934607037399446e-05}. Best is trial 54 with value: 0.8139129397754031.
[I 2024-11-21 00:37:43,859] Trial 58 finished with value: 0.27794757275052406 and parameters: {'booster': 'gblinear', 'lambda': 0.00011825017071948709, 'alpha': 0.7820437516582137}. Best is trial 54 with value: 0.8139129397754031.
[I 2024-11-21 00:37:44,021] Trial 59 finished with value: 0.643905171245766 and

In [7]:
from src.evals.mlp import MLPEvaluator

mlp_eval = MLPEvaluator(
    data_manager=odm,
    n_trials=10,
    verbose=True,
    params={
        "lr_range": [1e-4, 1e-2],
        "l2_lambda": 5e-4,
        "dropout_range": [0.1, 0.6],
        "hidden_channels": 64,
        "proj_dim": 64,
        "batch_size": 32,
        "max_epochs": 50,
    },
)
_ = mlp_eval.evaluate()
mlp_eval.save_results(results_file=save_folder, row_name="MLP")

[I 2024-11-21 00:37:46,083] A new study created in memory with name: no-name-ddd7999b-136f-4033-9f00-4e92a3b21339
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the 

New best score: 0.670
Best model performance:
Accuracy: 0.907 ± 0.053
F1 Macro: 0.822 ± 0.096
F1 Weighted: 0.899 ± 0.058
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.8148148148148148), 'f1_weighted': np.float64(0.9234567901234567)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-pa

New best score: 0.715
Best model performance:
Accuracy: 0.919 ± 0.026
F1 Macro: 0.849 ± 0.038
F1 Weighted: 0.916 ± 0.025
[{'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.8148148148148148), 'f1_weighted': np.float64(0.9234567901234567)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]


`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-pa

In [8]:
mlp_eval.print_best_results()
mlp_eval.print_best_parameters()

Best model performance:
Accuracy: 0.919 ± 0.026
F1 Macro: 0.849 ± 0.038
F1 Weighted: 0.916 ± 0.025
Best hyperparameters:
{'lr': 0.006410579595352722, 'dropout': 0.12366664616715325}


In [4]:
from src.evals.mogonet import MOGONETEvaluator
from src.data_managers.sample_graph import SampleGraphDataManager

mogonet_eval = MOGONETEvaluator(
    data_manager=SampleGraphDataManager(
        omic_data_loaders=omic_data_loaders,
        n_splits=5,
        params={
            "graph_style": "threshold",
            "self_connections": True,
            "avg_degree": 5,
        },
    ),
    n_trials=1,
    params={
        "encoder_hidden_channels": {
            "mrna": 64,
            "mirna": 64,
            "circrna": 64,
            "te": 64,
        },
        "encoder_type": "gat",
        "dropout": 0.2,
        "integrator_type": "vcdn", # vcdn seems to work better on mds disease
        "integration_in_dim": 16,
        "vcdn_hidden_channels": 16,
        "epochs": 250,
        "log_interval": 251,
    }
)
mogonet_eval.in_channels, mogonet_eval.omic_names
# mogonet_eval.evaluate()
# mogonet_eval.print_best_results()
# mogonet_eval.save_results(results_file=save_folder, row_name="MOGONET")

  from .autonotebook import tqdm as notebook_tqdm


([200, 200, 200], ['mrna', 'mirna', 'te'])

In [6]:
mogonet_eval.feature_importance()

{'mrna': {'ENSG00000181826': 0, 'ENSG00000278588': 0, 'ENSG00000120594': 0, 'ENSG00000121797': 0, 'ENSG00000140398': 0, 'ENSG00000168062': 0, 'ENSG00000174307': 0, 'ENSG00000184897': 0, 'ENSG00000105497': 0, 'ENSG00000113552': 0, 'ENSG00000188536': 0, 'ENSG00000181004': 0, 'ENSG00000143590': 0, 'ENSG00000006534': 0, 'ENSG00000184792': 0, 'ENSG00000114737': 0, 'ENSG00000130518': 0, 'ENSG00000133561': 0, 'ENSG00000179820': 0, 'ENSG00000196329': 0, 'ENSG00000164626': 0, 'ENSG00000206172': 0, 'ENSG00000196866': 0, 'ENSG00000160013': 0, 'ENSG00000164938': 0, 'ENSG00000260729': 0, 'ENSG00000204161': 0, 'ENSG00000062282': 0, 'ENSG00000087903': 0, 'ENSG00000176641': 0, 'ENSG00000136603': 0, 'ENSG00000113369': 0, 'ENSG00000148935': 0, 'ENSG00000153071': 0, 'ENSG00000174130': 0, 'ENSG00000131016': 0, 'ENSG00000144959': 0, 'ENSG00000203883': 0, 'ENSG00000116574': 0, 'ENSG00000078804': 0, 'ENSG00000120217': 0, 'ENSG00000172667': 0, 'ENSG00000161544': 0, 'ENSG00000121966': 0, 'ENSG00000197646': 0, 

In [24]:
data = mogonet_eval.data_manager.get_split(0)
mogonet_eval.data_manager.feature_names
# data['mrna']

['HERV-Fc1',
 'LTR10C',
 'L1M3C_5',
 'LTR27E',
 'L2',
 'L1P4b_5end',
 'LTR18A',
 'MER87',
 'MER57E3',
 'THER2',
 'MLT1K',
 'LTR70',
 'LTR1F1',
 'MER54B',
 'LTR28',
 'LTR21A',
 'MIR3',
 'LTR2',
 'MER51E',
 'LTR27B',
 'L1M3D_5',
 'HERVE_a',
 'LTR34',
 'MER57F',
 'LTR38C',
 'LTR3',
 'L1MA9_5',
 'L1P4c_5end',
 'HARLEQUIN',
 'LTR1C1',
 'LTR36',
 'LTR60B',
 'L1ME3C_3end',
 'LOR1b_LTR',
 'HERVS71',
 'LTR1C',
 'LTR47A2',
 'MER66A',
 'LTR16A1',
 'LTR24',
 'HERVL66I',
 'L1ME4',
 'MER57E1',
 'MER68B',
 'MER70A',
 'LTR26E',
 'LTR57',
 'MER61B',
 'AluYf5',
 'MLT1G3',
 'MER9B',
 'L1ME5_3end',
 'MER66D',
 'LTR58',
 'LTR25',
 'LTR75_1',
 'MER65C',
 'LTR1B1',
 'LTR71A',
 'MER83C',
 'HERV-K14CI',
 'LTR2B',
 'MER52A',
 'LTR15',
 'MER101',
 'MER34A',
 'SVA_D',
 'HERV1_LTRb',
 'LTR25-int',
 'LTR72',
 'MER66B',
 'FRAM',
 'HERVE',
 'LTR53',
 'MLT2D',
 'LTR2C',
 'AluY',
 'LTR64',
 'LTR1D1',
 'LTR38A1',
 'L2B',
 'MER74A',
 'LTR9B',
 'LTR24B',
 'MER57C1',
 'L1ME2',
 'LTR9A1',
 'LTR14C',
 'LTR62',
 'LTR44',
 'LT

# linear
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051
# vcdn
- integration dim = 2
Accuracy: 0.825 ± 0.030
F1 Macro: 0.452 ± 0.009
F1 Weighted: 0.746 ± 0.043
- integration dim = 8
Accuracy: 0.973 ± 0.033
F1 Macro: 0.952 ± 0.059
F1 Weighted: 0.971 ± 0.035
- integration dim = 12
Accuracy: 0.973 ± 0.033
F1 Macro: 0.957 ± 0.053
F1 Weighted: 0.973 ± 0.033
- integration dim = 16
Accuracy: 0.973 ± 0.053
F1 Macro: 0.958 ± 0.083
F1 Weighted: 0.973 ± 0.053
# attention - faster than vcdn
- integration dim = 2
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067
- integration dim = 8
Accuracy: 0.973 ± 0.033
F1 Macro: 0.952 ± 0.059
F1 Weighted: 0.971 ± 0.035
- integration dim = 12
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051
- integration dim = 16
Accuracy: 0.960 ± 0.053
F1 Macro: 0.934 ± 0.085
F1 Weighted: 0.959 ± 0.054

In [10]:
from src.evals.birgat import BiRGATEvaluator
from src.data_managers.bipartite_graph import BipartiteGraphDataManager

three_layers = True

birgat_eval = BiRGATEvaluator(
    data_manager=BipartiteGraphDataManager(
        omic_data_loaders=omic_data_loaders,
        n_splits=5,
        params={
            "diff_exp_thresholds" : {
                "mrna": 1.5,
                "mirna": 1.5,
                "circrna": 1.7,
                "te": 1.7,
            },
        },
    ),
    params={
        "epochs": 350,
        "log_interval": 50,
        "hidden_channels": [200, 64, 64, 16, 16],
        "heads": 4,
        "dropout": 0.2,
        "attention_dropout": 0.0,
        "integrator_type": "vcdn",
        "three_layers": three_layers,
    },
    n_trials=1,
)
birgat_eval.evaluate()
birgat_eval.print_best_results()
birgat_eval.save_results(results_file=save_folder, row_name="BiRGAT 3L" if three_layers else "BiRGAT 2L")

isolated sample nodes, isolated gene nodes, mean degree: 
tensor(1) tensor(0) tensor(25.6892)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.1486)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(4) tensor(0) tensor(15.5676)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(1) tensor(0) tensor(25.6892)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.1486)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(4) tensor(0) tensor(15.5676)


[I 2024-11-21 00:38:30,432] A new study created in memory with name: no-name-53d82c88-b37a-43ac-a498-90cf04fcfd67


isolated sample nodes, isolated gene nodes, mean degree: 
tensor(1) tensor(0) tensor(25.6892)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.1486)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(4) tensor(0) tensor(15.5676)

Epoch: 050:
Train Loss: 0.4404, Train Acc: 0.8305, Train F1 Macro: 0.4537, Train F1 Weighted: 0.7536
Val Acc: 0.8000, Val F1 Macro: 0.4444, Val F1 Weighted: 0.7111, Val Geometric Mean: 0.6323
Test Acc: 0.8000, Test F1 Macro: 0.4444, Test F1 Weighted: 0.7111
##################################################

Epoch: 100:
Train Loss: 0.1544, Train Acc: 0.9153, Train F1 Macro: 0.8282, Train F1 Weighted: 0.9090
Val Acc: 0.8667, Val F1 Macro: 0.7115, Val F1 Weighted: 0.8385, Val Geometric Mean: 0.8026
Test Acc: 0.8667, Test F1 Macro: 0.7115, Test F1 Weighted: 0.8385
##################################################

Epoch: 150:
Train Loss: 0.1415, Train Acc: 0.8983, Train F1 Macro: 0.8520, Train F1 Weighted: 0.9

[I 2024-11-21 00:40:32,404] Trial 0 finished with value: 0.9491196586666667 and parameters: {}. Best is trial 0 with value: 0.9491196586666667.



Epoch: 350:
Train Loss: 0.0700, Train Acc: 0.9833, Train F1 Macro: 0.9731, Train F1 Weighted: 0.9836
Val Acc: 0.8571, Val F1 Macro: 0.7879, Val F1 Weighted: 0.8745, Val Geometric Mean: 0.8390
Test Acc: 0.8571, Test F1 Macro: 0.7879, Test F1 Weighted: 0.8745
##################################################
New best score: 0.949
Best model performance:
Accuracy: 0.987 ± 0.027
F1 Macro: 0.976 ± 0.048
F1 Weighted: 0.986 ± 0.029
[{'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]
Best model performance:
Accuracy: 0.987 ± 0.027
F1 Macro: 0.976 ± 0.048
F1 Weighted: 0.986 ± 0.029


In [11]:
# birgat_eval.save_results(results_file=save_folder, row_name="BiRGAT")
save_folder

'logs/mds_disease/mrna_mirna_te.csv'

```python
omic_data_loaders = {
    "mrna": mrna_loader,
    "mirna": mirna_loader,
    "circrna": circrna_loader,
    # # "pirna": pirna_loader,
    # "te": te_loader,
}

birgat_eval = BiRGATEvaluator(
    data_manager=BipartiteGraphDataManager(
        omic_data_loaders=omic_data_loaders,
        n_splits=5,
        params={
            "diff_exp_thresholds" : {
                "mrna": 1.5,
                "mirna": 1.5,
                "circrna": 1.7,
                # "te": 1.8,
            },
        },
    ),
    params={
        "epochs": 250,
        "log_interval": 50,
        "hidden_channels": [200, 64, 64, 16, 16],
        "heads": 4,
        "dropout": 0.2,
        "attention_dropout": 0.0,
        "integrator_type": "vcdn",
        "three_layers": False,
    },
    n_trials=1,
)
```

Accuracy: 0.960 ± 0.053
F1 Macro: 0.940 ± 0.082
F1 Weighted: 0.961 ± 0.053

# mrna, mirna, circrna, 2L
Accuracy: 0.960 ± 0.033
F1 Macro: 0.915 ± 0.073
F1 Weighted: 0.956 ± 0.036
# mrna, mirna, circrna, 2L no interactions
Accuracy: 0.960 ± 0.053
F1 Macro: 0.918 ± 0.113
F1 Weighted: 0.953 ± 0.064
---
Accuracy: 0.946 ± 0.027
F1 Macro: 0.904 ± 0.048
F1 Weighted: 0.944 ± 0.028
# mrna, mirna, circrna 3L, interactions, degree ~20 in diff exp graphs, larger degree shows degraded performance
# making the avg degree to high shows large jumps on the validation set during training
Accuracy: 0.945 ± 0.053
F1 Macro: 0.910 ± 0.081
F1 Weighted: 0.946 ± 0.048
# mrna, mirna, circrna 2L, interactions, 64 cap
Accuracy: 0.960 ± 0.053
F1 Macro: 0.940 ± 0.082
F1 Weighted: 0.961 ± 0.053
# mrna, mirna, circrna 3L, interactions, 64 cap
Accuracy: 0.891 ± 0.054
F1 Macro: 0.801 ± 0.088
F1 Weighted: 0.888 ± 0.056
# mrna, mirna, circrna 3L
Accuracy: 0.920 ± 0.050
F1 Macro: 0.829 ± 0.112
F1 Weighted: 0.907 ± 0.062
# mrna, mirna, 2L
Accuracy: 0.960 ± 0.033
F1 Macro: 0.915 ± 0.073
F1 Weighted: 0.956 ± 0.036
# mrna, mirna, 3L
Accuracy: 0.947 ± 0.050
F1 Macro: 0.897 ± 0.089
F1 Weighted: 0.944 ± 0.051