In [1]:
%load_ext autoreload
%autoreload 2

from src.base_classes.omic_data_loader import OmicDataLoader
from src.data_managers.concat import CatOmicDataManager

In [2]:
mrna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_disease/mrna",
)
mirna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_disease/mirna_genes",
)
circrna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_disease/circrna",
)
# pirna_loader = OmicDataLoader(
#     data_dir="mds_data/splits_74/pirna",
# )
te_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_disease/te_counts",
)

In [25]:
omic_data_loaders = {
    "mrna": mrna_loader,
    "mirna": mirna_loader,
    "circrna": circrna_loader,
    # # "pirna": pirna_loader,
    "te": te_loader,
}
odm = CatOmicDataManager(omic_data_loaders, n_splits=5)

save_folder = f"logs/mds_disease/{'_'.join(omic_data_loaders.keys())}.csv"
save_folder

'logs/mds_disease/mrna_mirna_circrna_te.csv'

In [26]:
from src.evals.knn import KNNEvaluator

knn_eval = KNNEvaluator(
    data_manager=odm,
    n_trials=20,
    verbose=True,
    params={"k_lb": 1, "k_ub": 20},
)
_ = knn_eval.evaluate()
knn_eval.print_best_results()
knn_eval.save_results(results_file=save_folder, row_name="KNN")

[I 2024-12-07 22:14:26,481] A new study created in memory with name: no-name-07aba30c-ad27-4944-8764-f5095cecd085
[I 2024-12-07 22:14:26,695] Trial 0 finished with value: 0.612600042864821 and parameters: {'n_neighbors': 7}. Best is trial 0 with value: 0.612600042864821.
[I 2024-12-07 22:14:26,893] Trial 1 finished with value: 0.6192439604955923 and parameters: {'n_neighbors': 15}. Best is trial 1 with value: 0.6192439604955923.


New best score: 0.613
Best model performance:
Accuracy: 0.906 ± 0.032
F1 Macro: 0.763 ± 0.163
F1 Weighted: 0.887 ± 0.054
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.4642857142857143), 'f1_weighted': np.float64(0.8047619047619048)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]
New best score: 0.619
Best model performance:
Accuracy: 0.907 ± 0.053
F1 Macro: 0.769 ± 0.180
F1 Weighted: 0.888 ± 0.069
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)

[I 2024-12-07 22:14:27,085] Trial 2 finished with value: 0.6192439604955923 and parameters: {'n_neighbors': 15}. Best is trial 1 with value: 0.6192439604955923.
[I 2024-12-07 22:14:27,275] Trial 3 finished with value: 0.7039337563024542 and parameters: {'n_neighbors': 11}. Best is trial 3 with value: 0.7039337563024542.
[I 2024-12-07 22:14:27,463] Trial 4 finished with value: 0.6862235118921005 and parameters: {'n_neighbors': 18}. Best is trial 3 with value: 0.7039337563024542.


New best score: 0.704
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.840 ± 0.097
F1 Weighted: 0.911 ± 0.056
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.8148148148148148), 'f1_weighted': np.float64(0.9234567901234567)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-12-07 22:14:27,675] Trial 5 finished with value: 0.6014691748907844 and parameters: {'n_neighbors': 8}. Best is trial 3 with value: 0.7039337563024542.
[I 2024-12-07 22:14:27,866] Trial 6 finished with value: 0.5472774151647614 and parameters: {'n_neighbors': 19}. Best is trial 3 with value: 0.7039337563024542.
[I 2024-12-07 22:14:28,065] Trial 7 finished with value: 0.6146283953157812 and parameters: {'n_neighbors': 17}. Best is trial 3 with value: 0.7039337563024542.
[I 2024-12-07 22:14:28,253] Trial 8 finished with value: 0.6192439604955923 and parameters: {'n_neighbors': 15}. Best is trial 3 with value: 0.7039337563024542.
[I 2024-12-07 22:14:28,442] Trial 9 finished with value: 0.40046307690049987 and parameters: {'n_neighbors': 2}. Best is trial 3 with value: 0.7039337563024542.
[I 2024-12-07 22:14:28,633] Trial 10 finished with value: 0.7039337563024542 and parameters: {'n_neighbors': 11}. Best is trial 3 with value: 0.7039337563024542.
[I 2024-12-07 22:14:28,838] Trial 

New best score: 0.739
Best model performance:
Accuracy: 0.933 ± 0.042
F1 Macro: 0.857 ± 0.094
F1 Weighted: 0.924 ± 0.051
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.8148148148148148), 'f1_weighted': np.float64(0.9234567901234567)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-12-07 22:14:30,177] Trial 18 finished with value: 0.6014691748907844 and parameters: {'n_neighbors': 8}. Best is trial 16 with value: 0.7389770960193865.
[I 2024-12-07 22:14:30,374] Trial 19 finished with value: 0.6623938241144065 and parameters: {'n_neighbors': 5}. Best is trial 16 with value: 0.7389770960193865.


Best model performance:
Accuracy: 0.933 ± 0.042
F1 Macro: 0.857 ± 0.094
F1 Weighted: 0.924 ± 0.051


In [27]:
from src.evals.svm import SVMEvaluator

svm_eval = SVMEvaluator(
    data_manager=odm,
    n_trials=50,
    verbose=True,
    params={
        "C_lb": 0.01,
        "C_ub": 10,
        "no_rfe": True,
        # "rfe_step_range": (0.05, 0.2),
        # "rfe_n_features_range": (100, 200),
    },
    mode="linear",
)
_ = svm_eval.evaluate()
svm_eval.save_results(results_file=save_folder, row_name="Linear SVM")

[I 2024-12-07 22:14:30,410] A new study created in memory with name: no-name-af95b62c-a13e-4f24-99a7-931b8ecbb5e1
[I 2024-12-07 22:14:30,599] Trial 0 finished with value: 0.7039337563024542 and parameters: {'C': 0.03696927231237428, 'class_weight': None}. Best is trial 0 with value: 0.7039337563024542.
[I 2024-12-07 22:14:30,792] Trial 1 finished with value: 0.7039337563024542 and parameters: {'C': 0.04499582723460574, 'class_weight': None}. Best is trial 0 with value: 0.7039337563024542.


New best score: 0.704
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.840 ± 0.097
F1 Weighted: 0.911 ± 0.056
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.8148148148148148), 'f1_weighted': np.float64(0.9234567901234567)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-12-07 22:14:30,996] Trial 2 finished with value: 0.7573892492973633 and parameters: {'C': 0.7327880174152276, 'class_weight': 'balanced'}. Best is trial 2 with value: 0.7573892492973633.
[I 2024-12-07 22:14:31,176] Trial 3 finished with value: 0.7039337563024542 and parameters: {'C': 0.014790879250326041, 'class_weight': None}. Best is trial 2 with value: 0.7573892492973633.


New best score: 0.757
Best model performance:
Accuracy: 0.933 ± 0.042
F1 Macro: 0.873 ± 0.072
F1 Weighted: 0.929 ± 0.042
[{'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.8148148148148148), 'f1_weighted': np.float64(0.9234567901234567)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-12-07 22:14:31,380] Trial 4 finished with value: 0.7573892492973633 and parameters: {'C': 1.926035170020619, 'class_weight': 'balanced'}. Best is trial 2 with value: 0.7573892492973633.
[I 2024-12-07 22:14:31,578] Trial 5 finished with value: 0.7155098095140549 and parameters: {'C': 0.039141525228436225, 'class_weight': 'balanced'}. Best is trial 2 with value: 0.7573892492973633.
[I 2024-12-07 22:14:31,805] Trial 6 finished with value: 0.7039337563024542 and parameters: {'C': 0.15689888855597295, 'class_weight': None}. Best is trial 2 with value: 0.7573892492973633.
[I 2024-12-07 22:14:32,022] Trial 7 finished with value: 0.7573892492973633 and parameters: {'C': 5.3393000292769255, 'class_weight': None}. Best is trial 2 with value: 0.7573892492973633.
[I 2024-12-07 22:14:32,235] Trial 8 finished with value: 0.7039337563024542 and parameters: {'C': 0.01811482714876531, 'class_weight': None}. Best is trial 2 with value: 0.7573892492973633.
[I 2024-12-07 22:14:32,455] Trial 9 fini

New best score: 0.758
Best model performance:
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-12-07 22:14:34,469] Trial 18 finished with value: 0.7581635155117249 and parameters: {'C': 0.129264399116046, 'class_weight': 'balanced'}. Best is trial 17 with value: 0.7581635155117249.
[I 2024-12-07 22:14:34,694] Trial 19 finished with value: 0.7581635155117249 and parameters: {'C': 0.1240264039282578, 'class_weight': 'balanced'}. Best is trial 17 with value: 0.7581635155117249.
[I 2024-12-07 22:14:34,901] Trial 20 finished with value: 0.7581635155117249 and parameters: {'C': 0.11846975066980431, 'class_weight': 'balanced'}. Best is trial 17 with value: 0.7581635155117249.
[I 2024-12-07 22:14:35,115] Trial 21 finished with value: 0.7581635155117249 and parameters: {'C': 0.11807135060217297, 'class_weight': 'balanced'}. Best is trial 17 with value: 0.7581635155117249.
[I 2024-12-07 22:14:35,327] Trial 22 finished with value: 0.7581635155117249 and parameters: {'C': 0.1874051254692188, 'class_weight': 'balanced'}. Best is trial 17 with value: 0.7581635155117249.
[I 2024-12-07 

New best score: 0.814
Best model performance:
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051
[{'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-12-07 22:14:36,151] Trial 26 finished with value: 0.8139820349629631 and parameters: {'C': 0.27421304152757614, 'class_weight': 'balanced'}. Best is trial 25 with value: 0.8139820349629631.
[I 2024-12-07 22:14:36,361] Trial 27 finished with value: 0.7573892492973633 and parameters: {'C': 0.455727155435319, 'class_weight': 'balanced'}. Best is trial 25 with value: 0.8139820349629631.
[I 2024-12-07 22:14:36,582] Trial 28 finished with value: 0.8139820349629631 and parameters: {'C': 0.23508305060865917, 'class_weight': 'balanced'}. Best is trial 25 with value: 0.8139820349629631.
[I 2024-12-07 22:14:36,791] Trial 29 finished with value: 0.7039337563024542 and parameters: {'C': 0.2534063931231969, 'class_weight': None}. Best is trial 25 with value: 0.8139820349629631.
[I 2024-12-07 22:14:36,997] Trial 30 finished with value: 0.7573892492973633 and parameters: {'C': 0.6081219533341583, 'class_weight': 'balanced'}. Best is trial 25 with value: 0.8139820349629631.
[I 2024-12-07 22:14:

In [24]:
from src.evals.xgboost import XGBoostEvaluator

xgb_eval = XGBoostEvaluator(
    data_manager=odm,
    n_trials=60,
    verbose=True,
)
_ = xgb_eval.evaluate()
xgb_eval.print_best_results()
xgb_eval.print_best_parameters()
xgb_eval.save_results(results_file=save_folder, row_name="XGBoost")

[I 2024-12-07 22:13:45,397] A new study created in memory with name: no-name-57031d7d-b726-4aa0-a584-08fd401f5182
[I 2024-12-07 22:13:45,626] Trial 0 finished with value: 0.5675043225090064 and parameters: {'booster': 'gbtree', 'lambda': 0.2258161216799686, 'alpha': 0.0009606628003389111, 'max_depth': 3, 'eta': 1.1219467333837462e-06, 'gamma': 5.705570530267046e-08, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.5675043225090064.


New best score: 0.568
Best model performance:
Accuracy: 0.866 ± 0.084
F1 Macro: 0.756 ± 0.178
F1 Weighted: 0.867 ± 0.081
[{'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8, 'f1_macro': np.float64(0.7204968944099379), 'f1_weighted': np.float64(0.8099378881987577)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.7333333333333333, 'f1_macro': np.float64(0.4230769230769231), 'f1_weighted': np.float64(0.7333333333333333)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]


[I 2024-12-07 22:13:45,870] Trial 1 finished with value: 0.48162878238196427 and parameters: {'booster': 'dart', 'lambda': 7.069550131503006e-05, 'alpha': 3.561886371432537e-06, 'max_depth': 3, 'eta': 0.038781166188858004, 'gamma': 4.438850227128154e-05, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 6.72383985801384e-08, 'skip_drop': 0.005743015697836312}. Best is trial 0 with value: 0.5675043225090064.
[I 2024-12-07 22:13:46,103] Trial 2 finished with value: 0.5591177623513526 and parameters: {'booster': 'gbtree', 'lambda': 0.14009061160979316, 'alpha': 1.8880862903511775e-08, 'max_depth': 7, 'eta': 0.0008860530213730884, 'gamma': 0.0003791725838373204, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.5675043225090064.
[I 2024-12-07 22:13:46,321] Trial 3 finished with value: 0.5325885489025592 and parameters: {'booster': 'gbtree', 'lambda': 0.0008502292033967831, 'alpha': 1.0205059298042949e-08, 'max_depth': 3, 'eta': 9.694197

New best score: 0.758
Best model performance:
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


[I 2024-12-07 22:13:47,474] Trial 9 finished with value: 0.6288118975723532 and parameters: {'booster': 'gblinear', 'lambda': 0.04231946442032059, 'alpha': 0.0024687025297133783}. Best is trial 7 with value: 0.7581635155117249.
[I 2024-12-07 22:13:47,624] Trial 10 finished with value: 0.27794757275052406 and parameters: {'booster': 'gblinear', 'lambda': 0.002788118632007606, 'alpha': 0.8192488278637083}. Best is trial 7 with value: 0.7581635155117249.
[I 2024-12-07 22:13:47,776] Trial 11 finished with value: 0.7581635155117249 and parameters: {'booster': 'gblinear', 'lambda': 0.01071967663457468, 'alpha': 9.138660565942961e-07}. Best is trial 7 with value: 0.7581635155117249.
[I 2024-12-07 22:13:47,928] Trial 12 finished with value: 0.7172503758053912 and parameters: {'booster': 'gblinear', 'lambda': 0.006144272560781403, 'alpha': 8.980818483779622e-07}. Best is trial 7 with value: 0.7581635155117249.
[I 2024-12-07 22:13:48,084] Trial 13 finished with value: 0.7351676603683842 and para

New best score: 0.771
Best model performance:
Accuracy: 0.932 ± 0.042
F1 Macro: 0.886 ± 0.066
F1 Weighted: 0.933 ± 0.042
[{'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.9386666666666666)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]


[I 2024-12-07 22:13:50,623] Trial 28 finished with value: 0.7351676603683842 and parameters: {'booster': 'gblinear', 'lambda': 3.350536607793748e-06, 'alpha': 3.2035763706703653e-06}. Best is trial 26 with value: 0.7711411011566484.
[I 2024-12-07 22:13:50,887] Trial 29 finished with value: 0.5036718571125454 and parameters: {'booster': 'dart', 'lambda': 1.5758060649862608e-06, 'alpha': 0.0004912369589464265, 'max_depth': 5, 'eta': 1.2106786536757498e-05, 'gamma': 4.14912407900022e-06, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 5.7811939375725656e-05, 'skip_drop': 8.297862776170574e-05}. Best is trial 26 with value: 0.7711411011566484.
[I 2024-12-07 22:13:51,140] Trial 30 finished with value: 0.0013903478254046716 and parameters: {'booster': 'gbtree', 'lambda': 3.0365585290179303e-05, 'alpha': 4.111508629716555e-05, 'max_depth': 6, 'eta': 1.076350074564469e-08, 'gamma': 0.008554588664711845, 'grow_policy': 'lossguide'}. Best is trial 26

Best model performance:
Accuracy: 0.932 ± 0.042
F1 Macro: 0.886 ± 0.066
F1 Weighted: 0.933 ± 0.042
Best hyperparameters:
{'booster': 'gblinear', 'lambda': 1.2262735180246439e-05, 'alpha': 8.475640911381701e-06}


In [28]:
from src.evals.mlp import MLPEvaluator

mlp_eval = MLPEvaluator(
    data_manager=odm,
    n_trials=10,
    verbose=True,
    params={
        "lr_range": [1e-4, 1e-2],
        "l2_lambda": 5e-4,
        "dropout_range": [0.1, 0.6],
        "hidden_channels": 64,
        "proj_dim": 64,
        "batch_size": 32,
        "max_epochs": 50,
    },
)
_ = mlp_eval.evaluate()
mlp_eval.save_results(results_file=save_folder, row_name="MLP")

[I 2024-12-07 22:14:40,960] A new study created in memory with name: no-name-24dbe65a-b4d8-4225-bf99-1e730ca6bee0
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using:

New best score: 0.492
Best model performance:
Accuracy: 0.878 ± 0.028
F1 Macro: 0.662 ± 0.171
F1 Weighted: 0.846 ± 0.049
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.4642857142857143), 'f1_weighted': np.float64(0.8047619047619048)}, {'acc': 0.8571428571428571, 'f1_macro': np.float64(0.46153846153846156), 'f1_weighted': np.float64(0.7912087912087912)}]


`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-pa

New best score: 0.663
Best model performance:
Accuracy: 0.906 ± 0.032
F1 Macro: 0.815 ± 0.062
F1 Weighted: 0.898 ± 0.038
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.8148148148148148), 'f1_weighted': np.float64(0.9234567901234567)}, {'acc': 0.9285714285714286, 'f1_macro': np.float64(0.8782608695652174), 'f1_weighted': np.float64(0.9341614906832298)}]


`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-pa

New best score: 0.758
Best model performance:
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067
[{'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7115384615384616), 'f1_weighted': np.float64(0.8384615384615385)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 0.8666666666666667, 'f1_macro': np.float64(0.7916666666666666), 'f1_weighted': np.float64(0.8666666666666667)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]


`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaThesis2/.venv/lib/python3.12/site-pa

In [8]:
mlp_eval.print_best_results()
mlp_eval.print_best_parameters()

Best model performance:
Accuracy: 0.919 ± 0.026
F1 Macro: 0.849 ± 0.038
F1 Weighted: 0.916 ± 0.025
Best hyperparameters:
{'lr': 0.006410579595352722, 'dropout': 0.12366664616715325}


In [4]:
from src.evals.mogonet import MOGONETEvaluator
from src.data_managers.sample_graph import SampleGraphDataManager

mogonet_eval = MOGONETEvaluator(
    data_manager=SampleGraphDataManager(
        omic_data_loaders=omic_data_loaders,
        n_splits=5,
        params={
            "graph_style": "threshold",
            "self_connections": True,
            "avg_degree": 5,
        },
    ),
    n_trials=1,
    params={
        "encoder_hidden_channels": {
            "mrna": 64,
            "mirna": 64,
            "circrna": 64,
            "te": 64,
        },
        "encoder_type": "gat",
        "dropout": 0.2,
        "integrator_type": "vcdn", # vcdn seems to work better on mds disease
        "integration_in_dim": 16,
        "vcdn_hidden_channels": 16,
        "epochs": 250,
        "log_interval": 251,
    }
)
mogonet_eval.in_channels, mogonet_eval.omic_names
# mogonet_eval.evaluate()
# mogonet_eval.print_best_results()
# mogonet_eval.save_results(results_file=save_folder, row_name="MOGONET")

  from .autonotebook import tqdm as notebook_tqdm


([200, 200, 200], ['mrna', 'mirna', 'te'])

In [6]:
mogonet_eval.feature_importance()

{'mrna': {'ENSG00000181826': 0, 'ENSG00000278588': 0, 'ENSG00000120594': 0, 'ENSG00000121797': 0, 'ENSG00000140398': 0, 'ENSG00000168062': 0, 'ENSG00000174307': 0, 'ENSG00000184897': 0, 'ENSG00000105497': 0, 'ENSG00000113552': 0, 'ENSG00000188536': 0, 'ENSG00000181004': 0, 'ENSG00000143590': 0, 'ENSG00000006534': 0, 'ENSG00000184792': 0, 'ENSG00000114737': 0, 'ENSG00000130518': 0, 'ENSG00000133561': 0, 'ENSG00000179820': 0, 'ENSG00000196329': 0, 'ENSG00000164626': 0, 'ENSG00000206172': 0, 'ENSG00000196866': 0, 'ENSG00000160013': 0, 'ENSG00000164938': 0, 'ENSG00000260729': 0, 'ENSG00000204161': 0, 'ENSG00000062282': 0, 'ENSG00000087903': 0, 'ENSG00000176641': 0, 'ENSG00000136603': 0, 'ENSG00000113369': 0, 'ENSG00000148935': 0, 'ENSG00000153071': 0, 'ENSG00000174130': 0, 'ENSG00000131016': 0, 'ENSG00000144959': 0, 'ENSG00000203883': 0, 'ENSG00000116574': 0, 'ENSG00000078804': 0, 'ENSG00000120217': 0, 'ENSG00000172667': 0, 'ENSG00000161544': 0, 'ENSG00000121966': 0, 'ENSG00000197646': 0, 

In [24]:
data = mogonet_eval.data_manager.get_split(0)
mogonet_eval.data_manager.feature_names
# data['mrna']

['HERV-Fc1',
 'LTR10C',
 'L1M3C_5',
 'LTR27E',
 'L2',
 'L1P4b_5end',
 'LTR18A',
 'MER87',
 'MER57E3',
 'THER2',
 'MLT1K',
 'LTR70',
 'LTR1F1',
 'MER54B',
 'LTR28',
 'LTR21A',
 'MIR3',
 'LTR2',
 'MER51E',
 'LTR27B',
 'L1M3D_5',
 'HERVE_a',
 'LTR34',
 'MER57F',
 'LTR38C',
 'LTR3',
 'L1MA9_5',
 'L1P4c_5end',
 'HARLEQUIN',
 'LTR1C1',
 'LTR36',
 'LTR60B',
 'L1ME3C_3end',
 'LOR1b_LTR',
 'HERVS71',
 'LTR1C',
 'LTR47A2',
 'MER66A',
 'LTR16A1',
 'LTR24',
 'HERVL66I',
 'L1ME4',
 'MER57E1',
 'MER68B',
 'MER70A',
 'LTR26E',
 'LTR57',
 'MER61B',
 'AluYf5',
 'MLT1G3',
 'MER9B',
 'L1ME5_3end',
 'MER66D',
 'LTR58',
 'LTR25',
 'LTR75_1',
 'MER65C',
 'LTR1B1',
 'LTR71A',
 'MER83C',
 'HERV-K14CI',
 'LTR2B',
 'MER52A',
 'LTR15',
 'MER101',
 'MER34A',
 'SVA_D',
 'HERV1_LTRb',
 'LTR25-int',
 'LTR72',
 'MER66B',
 'FRAM',
 'HERVE',
 'LTR53',
 'MLT2D',
 'LTR2C',
 'AluY',
 'LTR64',
 'LTR1D1',
 'LTR38A1',
 'L2B',
 'MER74A',
 'LTR9B',
 'LTR24B',
 'MER57C1',
 'L1ME2',
 'LTR9A1',
 'LTR14C',
 'LTR62',
 'LTR44',
 'LT

# linear
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051
# vcdn
- integration dim = 2
Accuracy: 0.825 ± 0.030
F1 Macro: 0.452 ± 0.009
F1 Weighted: 0.746 ± 0.043
- integration dim = 8
Accuracy: 0.973 ± 0.033
F1 Macro: 0.952 ± 0.059
F1 Weighted: 0.971 ± 0.035
- integration dim = 12
Accuracy: 0.973 ± 0.033
F1 Macro: 0.957 ± 0.053
F1 Weighted: 0.973 ± 0.033
- integration dim = 16
Accuracy: 0.973 ± 0.053
F1 Macro: 0.958 ± 0.083
F1 Weighted: 0.973 ± 0.053
# attention - faster than vcdn
- integration dim = 2
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067
- integration dim = 8
Accuracy: 0.973 ± 0.033
F1 Macro: 0.952 ± 0.059
F1 Weighted: 0.971 ± 0.035
- integration dim = 12
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051
- integration dim = 16
Accuracy: 0.960 ± 0.053
F1 Macro: 0.934 ± 0.085
F1 Weighted: 0.959 ± 0.054

In [10]:
from src.evals.birgat import BiRGATEvaluator
from src.data_managers.bipartite_graph import BipartiteGraphDataManager

three_layers = True

birgat_eval = BiRGATEvaluator(
    data_manager=BipartiteGraphDataManager(
        omic_data_loaders=omic_data_loaders,
        n_splits=5,
        params={
            "diff_exp_thresholds" : {
                "mrna": 1.5,
                "mirna": 1.5,
                "circrna": 1.7,
                "te": 1.7,
            },
        },
    ),
    params={
        "epochs": 350,
        "log_interval": 50,
        "hidden_channels": [200, 64, 64, 16, 16],
        "heads": 4,
        "dropout": 0.2,
        "attention_dropout": 0.0,
        "integrator_type": "vcdn",
        "three_layers": three_layers,
    },
    n_trials=1,
)
birgat_eval.evaluate()
birgat_eval.print_best_results()
birgat_eval.save_results(results_file=save_folder, row_name="BiRGAT 3L" if three_layers else "BiRGAT 2L")

isolated sample nodes, isolated gene nodes, mean degree: 
tensor(1) tensor(0) tensor(25.6892)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.1486)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(4) tensor(0) tensor(15.5676)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(1) tensor(0) tensor(25.6892)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.1486)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(4) tensor(0) tensor(15.5676)


[I 2024-11-21 00:38:30,432] A new study created in memory with name: no-name-53d82c88-b37a-43ac-a498-90cf04fcfd67


isolated sample nodes, isolated gene nodes, mean degree: 
tensor(1) tensor(0) tensor(25.6892)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.1486)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(4) tensor(0) tensor(15.5676)

Epoch: 050:
Train Loss: 0.4404, Train Acc: 0.8305, Train F1 Macro: 0.4537, Train F1 Weighted: 0.7536
Val Acc: 0.8000, Val F1 Macro: 0.4444, Val F1 Weighted: 0.7111, Val Geometric Mean: 0.6323
Test Acc: 0.8000, Test F1 Macro: 0.4444, Test F1 Weighted: 0.7111
##################################################

Epoch: 100:
Train Loss: 0.1544, Train Acc: 0.9153, Train F1 Macro: 0.8282, Train F1 Weighted: 0.9090
Val Acc: 0.8667, Val F1 Macro: 0.7115, Val F1 Weighted: 0.8385, Val Geometric Mean: 0.8026
Test Acc: 0.8667, Test F1 Macro: 0.7115, Test F1 Weighted: 0.8385
##################################################

Epoch: 150:
Train Loss: 0.1415, Train Acc: 0.8983, Train F1 Macro: 0.8520, Train F1 Weighted: 0.9

[I 2024-11-21 00:40:32,404] Trial 0 finished with value: 0.9491196586666667 and parameters: {}. Best is trial 0 with value: 0.9491196586666667.



Epoch: 350:
Train Loss: 0.0700, Train Acc: 0.9833, Train F1 Macro: 0.9731, Train F1 Weighted: 0.9836
Val Acc: 0.8571, Val F1 Macro: 0.7879, Val F1 Weighted: 0.8745, Val Geometric Mean: 0.8390
Test Acc: 0.8571, Test F1 Macro: 0.7879, Test F1 Weighted: 0.8745
##################################################
New best score: 0.949
Best model performance:
Accuracy: 0.987 ± 0.027
F1 Macro: 0.976 ± 0.048
F1 Weighted: 0.986 ± 0.029
[{'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 0.9333333333333333, 'f1_macro': np.float64(0.88), 'f1_weighted': np.float64(0.928)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]
Best model performance:
Accuracy: 0.987 ± 0.027
F1 Macro: 0.976 ± 0.048
F1 Weighted: 0.986 ± 0.029


In [11]:
# birgat_eval.save_results(results_file=save_folder, row_name="BiRGAT")
save_folder

'logs/mds_disease/mrna_mirna_te.csv'

```python
omic_data_loaders = {
    "mrna": mrna_loader,
    "mirna": mirna_loader,
    "circrna": circrna_loader,
    # # "pirna": pirna_loader,
    # "te": te_loader,
}

birgat_eval = BiRGATEvaluator(
    data_manager=BipartiteGraphDataManager(
        omic_data_loaders=omic_data_loaders,
        n_splits=5,
        params={
            "diff_exp_thresholds" : {
                "mrna": 1.5,
                "mirna": 1.5,
                "circrna": 1.7,
                # "te": 1.8,
            },
        },
    ),
    params={
        "epochs": 250,
        "log_interval": 50,
        "hidden_channels": [200, 64, 64, 16, 16],
        "heads": 4,
        "dropout": 0.2,
        "attention_dropout": 0.0,
        "integrator_type": "vcdn",
        "three_layers": False,
    },
    n_trials=1,
)
```

Accuracy: 0.960 ± 0.053
F1 Macro: 0.940 ± 0.082
F1 Weighted: 0.961 ± 0.053

# mrna, mirna, circrna, 2L
Accuracy: 0.960 ± 0.033
F1 Macro: 0.915 ± 0.073
F1 Weighted: 0.956 ± 0.036
# mrna, mirna, circrna, 2L no interactions
Accuracy: 0.960 ± 0.053
F1 Macro: 0.918 ± 0.113
F1 Weighted: 0.953 ± 0.064
---
Accuracy: 0.946 ± 0.027
F1 Macro: 0.904 ± 0.048
F1 Weighted: 0.944 ± 0.028
# mrna, mirna, circrna 3L, interactions, degree ~20 in diff exp graphs, larger degree shows degraded performance
# making the avg degree to high shows large jumps on the validation set during training
Accuracy: 0.945 ± 0.053
F1 Macro: 0.910 ± 0.081
F1 Weighted: 0.946 ± 0.048
# mrna, mirna, circrna 2L, interactions, 64 cap
Accuracy: 0.960 ± 0.053
F1 Macro: 0.940 ± 0.082
F1 Weighted: 0.961 ± 0.053
# mrna, mirna, circrna 3L, interactions, 64 cap
Accuracy: 0.891 ± 0.054
F1 Macro: 0.801 ± 0.088
F1 Weighted: 0.888 ± 0.056
# mrna, mirna, circrna 3L
Accuracy: 0.920 ± 0.050
F1 Macro: 0.829 ± 0.112
F1 Weighted: 0.907 ± 0.062
# mrna, mirna, 2L
Accuracy: 0.960 ± 0.033
F1 Macro: 0.915 ± 0.073
F1 Weighted: 0.956 ± 0.036
# mrna, mirna, 3L
Accuracy: 0.947 ± 0.050
F1 Macro: 0.897 ± 0.089
F1 Weighted: 0.944 ± 0.051