In [47]:
%load_ext autoreload
%autoreload 2

from src.base_classes.omic_data_loader import OmicDataLoader
from src.data_managers.concat import CatOmicDataManager

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [48]:
mrna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_risk/mrna",
)
mirna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_risk/mirna_genes",
)
circrna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_risk/circrna",
)
# pirna_loader = OmicDataLoader(
#     data_dir="mds_data/splits_74/pirna",
# )
te_loader = OmicDataLoader(
    data_dir="mds_data/splits_74_risk/te_counts",
)

In [49]:
omic_data_loaders = {
    "mrna": mrna_loader,
    "mirna": mirna_loader,
    "circrna": circrna_loader,
    # "pirna": pirna_loader,
    # "te": te_loader,
}
odm = CatOmicDataManager(omic_data_loaders, n_splits=5)

save_folder = f"logs/mds_risk/{'_'.join(omic_data_loaders.keys())}.csv"
save_folder

'logs/mds_risk/mrna_mirna_circrna.csv'

In [50]:
from src.evals.knn import KNNEvaluator

knn_eval = KNNEvaluator(
    data_manager=odm,
    n_trials=20,
    verbose=True,
    params={"k_lb": 1, "k_ub": 20},
)
_ = knn_eval.evaluate()
knn_eval.save_results(results_file=save_folder, row_name="KNN")

[I 2024-11-21 00:21:58,103] A new study created in memory with name: no-name-7e40e229-d887-4ae3-86ed-36c75469c619
[I 2024-11-21 00:21:58,247] Trial 0 finished with value: 0.34204924042989987 and parameters: {'n_neighbors': 19}. Best is trial 0 with value: 0.34204924042989987.
[I 2024-11-21 00:21:58,371] Trial 1 finished with value: 0.19388513670751556 and parameters: {'n_neighbors': 1}. Best is trial 0 with value: 0.34204924042989987.


New best score: 0.342
Best model performance:
Accuracy: 0.704 ± 0.128
F1 Macro: 0.697 ± 0.130
F1 Weighted: 0.697 ± 0.134
[{'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6303030303030303)}, {'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6363636363636364)}, {'acc': 0.5454545454545454, 'f1_macro': np.float64(0.5299145299145299), 'f1_weighted': np.float64(0.5221445221445221)}, {'acc': 0.9, 'f1_macro': np.float64(0.8901098901098901), 'f1_weighted': np.float64(0.8967032967032967)}, {'acc': 0.8, 'f1_macro': np.float64(0.8), 'f1_weighted': np.float64(0.8)}]


[I 2024-11-21 00:21:58,494] Trial 2 finished with value: 0.3049770148266652 and parameters: {'n_neighbors': 11}. Best is trial 0 with value: 0.34204924042989987.
[I 2024-11-21 00:21:58,620] Trial 3 finished with value: 0.31018182119467835 and parameters: {'n_neighbors': 17}. Best is trial 0 with value: 0.34204924042989987.
[I 2024-11-21 00:21:58,760] Trial 4 finished with value: 0.25476976237074866 and parameters: {'n_neighbors': 13}. Best is trial 0 with value: 0.34204924042989987.
[I 2024-11-21 00:21:58,881] Trial 5 finished with value: 0.29005021306735596 and parameters: {'n_neighbors': 15}. Best is trial 0 with value: 0.34204924042989987.
[I 2024-11-21 00:21:59,002] Trial 6 finished with value: 0.25476976237074866 and parameters: {'n_neighbors': 13}. Best is trial 0 with value: 0.34204924042989987.
[I 2024-11-21 00:21:59,128] Trial 7 finished with value: 0.29005021306735596 and parameters: {'n_neighbors': 15}. Best is trial 0 with value: 0.34204924042989987.
[I 2024-11-21 00:21:59,

Results saved to logs/mds_risk/mrna_mirna_circrna.csv


In [51]:
from src.evals.svm import SVMEvaluator

svm_eval = SVMEvaluator(
    data_manager=odm,
    n_trials=50,
    verbose=True,
    params={
        "C_lb": 0.01,
        "C_ub": 10,
        "no_rfe": True,
        # "rfe_step_range": (0.05, 0.2),
        # "rfe_n_features_range": (100, 200),
    },
    mode="linear",
)
_ = svm_eval.evaluate()
svm_eval.save_results(results_file=save_folder, row_name="Linear SVM")

[I 2024-11-21 00:22:00,683] A new study created in memory with name: no-name-b7ab9e38-cd1d-4a1b-a616-cf073cc6f28e
[I 2024-11-21 00:22:00,811] Trial 0 finished with value: 0.33450122790480763 and parameters: {'C': 0.01945349170115929, 'class_weight': None}. Best is trial 0 with value: 0.33450122790480763.


New best score: 0.335
Best model performance:
Accuracy: 0.702 ± 0.117
F1 Macro: 0.690 ± 0.119
F1 Weighted: 0.691 ± 0.123
[{'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6363636363636364)}, {'acc': 0.5454545454545454, 'f1_macro': np.float64(0.5299145299145299), 'f1_weighted': np.float64(0.5221445221445221)}, {'acc': 0.9, 'f1_macro': np.float64(0.8901098901098901), 'f1_weighted': np.float64(0.8967032967032967)}, {'acc': 0.7, 'f1_macro': np.float64(0.6703296703296704), 'f1_weighted': np.float64(0.6703296703296703)}]


[I 2024-11-21 00:22:00,921] Trial 1 finished with value: 0.33450122790480763 and parameters: {'C': 0.026158729824112388, 'class_weight': None}. Best is trial 0 with value: 0.33450122790480763.
[I 2024-11-21 00:22:01,034] Trial 2 finished with value: 0.33450122790480763 and parameters: {'C': 0.01678306741875054, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.33450122790480763.
[I 2024-11-21 00:22:01,163] Trial 3 finished with value: 0.2520688516904583 and parameters: {'C': 4.063671185117358, 'class_weight': None}. Best is trial 0 with value: 0.33450122790480763.
[I 2024-11-21 00:22:01,296] Trial 4 finished with value: 0.2520688516904583 and parameters: {'C': 1.5977743899288857, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.33450122790480763.
[I 2024-11-21 00:22:01,420] Trial 5 finished with value: 0.22866732496688394 and parameters: {'C': 0.15189199699495332, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.33450122790480763.
[I 2024-11-21 00:22:01,549

New best score: 0.340
Best model performance:
Accuracy: 0.704 ± 0.128
F1 Macro: 0.696 ± 0.128
F1 Weighted: 0.695 ± 0.132
[{'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6303030303030303)}, {'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6363636363636364)}, {'acc': 0.5454545454545454, 'f1_macro': np.float64(0.5299145299145299), 'f1_weighted': np.float64(0.5221445221445221)}, {'acc': 0.9, 'f1_macro': np.float64(0.8901098901098901), 'f1_weighted': np.float64(0.8967032967032967)}, {'acc': 0.8, 'f1_macro': np.float64(0.7916666666666667), 'f1_weighted': np.float64(0.7916666666666667)}]


[I 2024-11-21 00:22:02,170] Trial 11 finished with value: 0.2777096180173103 and parameters: {'C': 0.03170500460409983, 'class_weight': None}. Best is trial 9 with value: 0.340415895850237.
[I 2024-11-21 00:22:02,278] Trial 12 finished with value: 0.33450122790480763 and parameters: {'C': 0.010828198106658107, 'class_weight': None}. Best is trial 9 with value: 0.340415895850237.
[I 2024-11-21 00:22:02,399] Trial 13 finished with value: 0.33638068415134637 and parameters: {'C': 0.06426839282912879, 'class_weight': None}. Best is trial 9 with value: 0.340415895850237.
[I 2024-11-21 00:22:02,528] Trial 14 finished with value: 0.2520688516904583 and parameters: {'C': 0.38182704984874594, 'class_weight': None}. Best is trial 9 with value: 0.340415895850237.
[I 2024-11-21 00:22:02,648] Trial 15 finished with value: 0.282303085828288 and parameters: {'C': 0.05808661948912825, 'class_weight': 'balanced'}. Best is trial 9 with value: 0.340415895850237.
[I 2024-11-21 00:22:02,773] Trial 16 finis

In [52]:
from src.evals.xgboost import XGBoostEvaluator

xgb_eval = XGBoostEvaluator(
    data_manager=odm,
    n_trials=60,
    verbose=True,
)
_ = xgb_eval.evaluate()
xgb_eval.print_best_results()
xgb_eval.save_results(results_file=save_folder, row_name="XGBoost")

[I 2024-11-21 00:22:06,818] A new study created in memory with name: no-name-47b88404-50f5-4548-9b0a-70e28eef2d2f
[I 2024-11-21 00:22:07,026] Trial 0 finished with value: 0.2149085674931129 and parameters: {'booster': 'gbtree', 'lambda': 8.172875198129406e-06, 'alpha': 0.7230665708592489, 'max_depth': 7, 'eta': 2.87048416293221e-05, 'gamma': 4.0730159441985344e-07, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.2149085674931129.
[I 2024-11-21 00:22:07,197] Trial 1 finished with value: 0.07498688884758245 and parameters: {'booster': 'gblinear', 'lambda': 0.07783228967457771, 'alpha': 0.8453666208490124}. Best is trial 0 with value: 0.2149085674931129.


New best score: 0.215
Best model performance:
Accuracy: 0.620 ± 0.073
F1 Macro: 0.582 ± 0.133
F1 Weighted: 0.595 ± 0.108
[{'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6363636363636364)}, {'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6303030303030303)}, {'acc': 0.5, 'f1_macro': np.float64(0.3333333333333333), 'f1_weighted': np.float64(0.4)}, {'acc': 0.6, 'f1_macro': np.float64(0.5833333333333333), 'f1_weighted': np.float64(0.5833333333333333)}]


[I 2024-11-21 00:22:07,460] Trial 2 finished with value: 0.2149085674931129 and parameters: {'booster': 'dart', 'lambda': 0.00043585617404054455, 'alpha': 0.0008872109723103269, 'max_depth': 7, 'eta': 6.53338875723601e-07, 'gamma': 8.694938435372058e-06, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 5.87859555689667e-08, 'skip_drop': 0.013546539462348014}. Best is trial 0 with value: 0.2149085674931129.
[I 2024-11-21 00:22:07,595] Trial 3 finished with value: 0.38524225167290765 and parameters: {'booster': 'gblinear', 'lambda': 8.738544374904245e-05, 'alpha': 0.02683811674420172}. Best is trial 3 with value: 0.38524225167290765.
[I 2024-11-21 00:22:07,729] Trial 4 finished with value: 0.3735629973133255 and parameters: {'booster': 'gblinear', 'lambda': 0.00575093052129023, 'alpha': 0.07409413658924324}. Best is trial 3 with value: 0.38524225167290765.


New best score: 0.385
Best model performance:
Accuracy: 0.735 ± 0.044
F1 Macro: 0.722 ± 0.054
F1 Weighted: 0.726 ± 0.050
[{'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.8181818181818182, 'f1_macro': np.float64(0.8166666666666667), 'f1_weighted': np.float64(0.8151515151515153)}, {'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.7, 'f1_macro': np.float64(0.6703296703296704), 'f1_weighted': np.float64(0.6901098901098901)}, {'acc': 0.7, 'f1_macro': np.float64(0.6703296703296704), 'f1_weighted': np.float64(0.6703296703296703)}]


[I 2024-11-21 00:22:07,926] Trial 5 finished with value: 0.3515032983971951 and parameters: {'booster': 'gblinear', 'lambda': 0.02285844265284716, 'alpha': 0.0014514746742740501}. Best is trial 3 with value: 0.38524225167290765.
[I 2024-11-21 00:22:08,103] Trial 6 finished with value: 0.3305099711477334 and parameters: {'booster': 'gblinear', 'lambda': 0.22160869972866146, 'alpha': 2.4009291906845667e-05}. Best is trial 3 with value: 0.38524225167290765.
[I 2024-11-21 00:22:08,336] Trial 7 finished with value: 0.21579444444444446 and parameters: {'booster': 'gbtree', 'lambda': 1.3281230837856426e-06, 'alpha': 1.4432248477778299e-08, 'max_depth': 3, 'eta': 0.13570205881812453, 'gamma': 1.779558141715992e-07, 'grow_policy': 'depthwise'}. Best is trial 3 with value: 0.38524225167290765.
[I 2024-11-21 00:22:08,548] Trial 8 finished with value: 0.2149085674931129 and parameters: {'booster': 'gbtree', 'lambda': 3.662802334223968e-08, 'alpha': 1.4421894972027642e-08, 'max_depth': 9, 'eta': 5.

New best score: 0.415
Best model performance:
Accuracy: 0.753 ± 0.054
F1 Macro: 0.740 ± 0.066
F1 Weighted: 0.744 ± 0.062
[{'acc': 0.8181818181818182, 'f1_macro': np.float64(0.8166666666666667), 'f1_weighted': np.float64(0.8181818181818182)}, {'acc': 0.8181818181818182, 'f1_macro': np.float64(0.8166666666666667), 'f1_weighted': np.float64(0.8151515151515153)}, {'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.7, 'f1_macro': np.float64(0.6703296703296704), 'f1_weighted': np.float64(0.6901098901098901)}, {'acc': 0.7, 'f1_macro': np.float64(0.6703296703296704), 'f1_weighted': np.float64(0.6703296703296703)}]


[I 2024-11-21 00:22:09,693] Trial 14 finished with value: 0.1001661456337518 and parameters: {'booster': 'dart', 'lambda': 1.4405104927187283e-05, 'alpha': 0.0019003423904554832, 'max_depth': 1, 'eta': 1.9771563941942304e-08, 'gamma': 0.0014831231125841304, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 0.7512019471770593, 'skip_drop': 1.8505151444302425e-08}. Best is trial 12 with value: 0.4146797854017747.
[I 2024-11-21 00:22:09,850] Trial 15 finished with value: 0.3734019041622159 and parameters: {'booster': 'gblinear', 'lambda': 2.402857428956379e-07, 'alpha': 0.12555588764290232}. Best is trial 12 with value: 0.4146797854017747.
[I 2024-11-21 00:22:09,996] Trial 16 finished with value: 0.304836957913881 and parameters: {'booster': 'gblinear', 'lambda': 0.0011444108242948513, 'alpha': 2.903695575916224e-06}. Best is trial 12 with value: 0.4146797854017747.
[I 2024-11-21 00:22:10,203] Trial 17 finished with value: 0.38529159843335303 a

New best score: 0.415
Best model performance:
Accuracy: 0.755 ± 0.093
F1 Macro: 0.740 ± 0.095
F1 Weighted: 0.744 ± 0.097
[{'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6303030303030303)}, {'acc': 0.9090909090909091, 'f1_macro': np.float64(0.9090909090909091), 'f1_weighted': np.float64(0.9090909090909091)}, {'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.8, 'f1_macro': np.float64(0.7619047619047619), 'f1_weighted': np.float64(0.7809523809523808)}, {'acc': 0.7, 'f1_macro': np.float64(0.6703296703296704), 'f1_weighted': np.float64(0.6703296703296703)}]
New best score: 0.458
Best model performance:
Accuracy: 0.775 ± 0.110
F1 Macro: 0.769 ± 0.110
F1 Weighted: 0.769 ± 0.113
[{'acc': 0.7272727272727273, 'f1_macro': np.float64(0.717948717948718), 'f1_weighted': np.float64(0.7132867132867133)}, {'acc': 0.9090909090909091, 'f1_macro': np.float64(0.90909090909090

[I 2024-11-21 00:22:11,299] Trial 22 finished with value: 0.45848949058461286 and parameters: {'booster': 'gblinear', 'lambda': 1.431635732487221e-08, 'alpha': 4.050786097843031e-06}. Best is trial 22 with value: 0.45848949058461286.
[I 2024-11-21 00:22:11,516] Trial 23 finished with value: 0.42037358084130233 and parameters: {'booster': 'gblinear', 'lambda': 1.2756470106316192e-08, 'alpha': 3.929754725425964e-07}. Best is trial 22 with value: 0.45848949058461286.
[I 2024-11-21 00:22:11,673] Trial 24 finished with value: 0.36984636486390743 and parameters: {'booster': 'gblinear', 'lambda': 1.0022074321863063e-08, 'alpha': 3.062810332032032e-07}. Best is trial 22 with value: 0.45848949058461286.
[I 2024-11-21 00:22:11,860] Trial 25 finished with value: 0.453983092332606 and parameters: {'booster': 'gblinear', 'lambda': 1.2744454399998134e-07, 'alpha': 2.552902247046118e-07}. Best is trial 22 with value: 0.45848949058461286.
[I 2024-11-21 00:22:12,039] Trial 26 finished with value: 0.393

New best score: 0.490
Best model performance:
Accuracy: 0.795 ± 0.104
F1 Macro: 0.784 ± 0.106
F1 Weighted: 0.787 ± 0.107
[{'acc': 0.7272727272727273, 'f1_macro': np.float64(0.717948717948718), 'f1_weighted': np.float64(0.7132867132867133)}, {'acc': 0.9090909090909091, 'f1_macro': np.float64(0.9090909090909091), 'f1_weighted': np.float64(0.9090909090909091)}, {'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6303030303030303)}, {'acc': 0.8, 'f1_macro': np.float64(0.7619047619047619), 'f1_weighted': np.float64(0.7809523809523808)}, {'acc': 0.9, 'f1_macro': np.float64(0.898989898989899), 'f1_weighted': np.float64(0.898989898989899)}]
Best model performance:
Accuracy: 0.795 ± 0.104
F1 Macro: 0.784 ± 0.106
F1 Weighted: 0.787 ± 0.107


In [53]:
from src.evals.mlp import MLPEvaluator

mlp_eval = MLPEvaluator(
    data_manager=odm,
    n_trials=10,
    verbose=True,
    params={
        "lr_range": [1e-4, 1e-2],
        "l2_lambda": 5e-4,
        "dropout_range": [0.1, 0.6],
        "hidden_channels": 64,
        "proj_dim": 64,
        "batch_size": 32,
        "max_epochs": 50,
    },
)
_ = mlp_eval.evaluate()
mlp_eval.save_results(results_file=save_folder, row_name="MLP")

[I 2024-11-21 00:22:18,653] A new study created in memory with name: no-name-1ceb2680-e83c-440b-8cf3-0a0547387d49
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using:

New best score: 0.370
Best model performance:
Accuracy: 0.720 ± 0.073
F1 Macro: 0.715 ± 0.071
F1 Weighted: 0.718 ± 0.072
[{'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6363636363636364)}, {'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6363636363636364)}, {'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.8, 'f1_macro': np.float64(0.7916666666666667), 'f1_weighted': np.float64(0.8)}, {'acc': 0.8, 'f1_macro': np.float64(0.7916666666666667), 'f1_weighted': np.float64(0.7916666666666667)}]


`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-pa

New best score: 0.426
Best model performance:
Accuracy: 0.756 ± 0.068
F1 Macro: 0.748 ± 0.065
F1 Weighted: 0.753 ± 0.066
[{'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6363636363636364)}, {'acc': 0.8181818181818182, 'f1_macro': np.float64(0.8166666666666667), 'f1_weighted': np.float64(0.8181818181818182)}, {'acc': 0.8, 'f1_macro': np.float64(0.7619047619047619), 'f1_weighted': np.float64(0.7809523809523808)}, {'acc': 0.8, 'f1_macro': np.float64(0.8), 'f1_weighted': np.float64(0.8)}]


`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-pa

New best score: 0.433
Best model performance:
Accuracy: 0.760 ± 0.132
F1 Macro: 0.755 ± 0.135
F1 Weighted: 0.754 ± 0.139
[{'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.7272727272727273, 'f1_macro': np.float64(0.7272727272727273), 'f1_weighted': np.float64(0.7272727272727273)}, {'acc': 0.5454545454545454, 'f1_macro': np.float64(0.5299145299145299), 'f1_weighted': np.float64(0.5221445221445221)}, {'acc': 0.9, 'f1_macro': np.float64(0.8901098901098901), 'f1_weighted': np.float64(0.8967032967032967)}, {'acc': 0.9, 'f1_macro': np.float64(0.898989898989899), 'f1_weighted': np.float64(0.898989898989899)}]


`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-pa

In [54]:
mlp_eval.print_best_results()
mlp_eval.print_best_parameters()

Best model performance:
Accuracy: 0.760 ± 0.132
F1 Macro: 0.755 ± 0.135
F1 Weighted: 0.754 ± 0.139
Best hyperparameters:
{'lr': 0.00015545789103861535, 'dropout': 0.5038754072096114}


In [55]:
from src.evals.mogonet import MOGONETEvaluator
from src.data_managers.sample_graph import SampleGraphDataManager

mogonet_eval = MOGONETEvaluator(
    data_manager=SampleGraphDataManager(
        omic_data_loaders=omic_data_loaders,
        n_splits=5,
        params={
            "graph_style": "threshold",
            "self_connections": True,
            "avg_degree": 5,
        },
    ),
    n_trials=1,
    params={
        "encoder_hidden_channels": {
            "mrna": 64,
            "mirna": 64,
            "circrna": 64,
            "te": 64,
        },
        "encoder_type": "gat",
        "dropout": 0.2,
        "integrator_type": "vcdn", # vcdn seems to work better on mds disease
        "integration_in_dim": 16,
        "vcdn_hidden_channels": 16,
        "epochs": 250,
        "log_interval": 251,
    }
)
mogonet_eval.evaluate()
mogonet_eval.print_best_results()
mogonet_eval.save_results(results_file=save_folder, row_name="MOGONET")

[I 2024-11-21 00:22:33,665] A new study created in memory with name: no-name-4bc71976-03a4-46cf-b54d-717fd6feda0e


Using: vcdn integrator
Using: vcdn integrator
Using: vcdn integrator
Using: vcdn integrator
Using: vcdn integrator


[I 2024-11-21 00:22:57,682] Trial 0 finished with value: 0.5685106677310369 and parameters: {}. Best is trial 0 with value: 0.5685106677310369.


New best score: 0.569
Best model performance:
Accuracy: 0.831 ± 0.035
F1 Macro: 0.826 ± 0.033
F1 Weighted: 0.828 ± 0.036
[{'acc': 0.8181818181818182, 'f1_macro': np.float64(0.8166666666666667), 'f1_weighted': np.float64(0.8181818181818182)}, {'acc': 0.8181818181818182, 'f1_macro': np.float64(0.8166666666666667), 'f1_weighted': np.float64(0.8151515151515153)}, {'acc': 0.8181818181818182, 'f1_macro': np.float64(0.8166666666666667), 'f1_weighted': np.float64(0.8181818181818182)}, {'acc': 0.9, 'f1_macro': np.float64(0.8901098901098901), 'f1_weighted': np.float64(0.8967032967032967)}, {'acc': 0.8, 'f1_macro': np.float64(0.7916666666666667), 'f1_weighted': np.float64(0.7916666666666667)}]
Best model performance:
Accuracy: 0.831 ± 0.035
F1 Macro: 0.826 ± 0.033
F1 Weighted: 0.828 ± 0.036


# linear
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051
# vcdn
- integration dim = 2
Accuracy: 0.825 ± 0.030
F1 Macro: 0.452 ± 0.009
F1 Weighted: 0.746 ± 0.043
- integration dim = 8
Accuracy: 0.973 ± 0.033
F1 Macro: 0.952 ± 0.059
F1 Weighted: 0.971 ± 0.035
- integration dim = 12
Accuracy: 0.973 ± 0.033
F1 Macro: 0.957 ± 0.053
F1 Weighted: 0.973 ± 0.033
- integration dim = 16
Accuracy: 0.973 ± 0.053
F1 Macro: 0.958 ± 0.083
F1 Weighted: 0.973 ± 0.053
# attention - faster than vcdn
- integration dim = 2
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067
- integration dim = 8
Accuracy: 0.973 ± 0.033
F1 Macro: 0.952 ± 0.059
F1 Weighted: 0.971 ± 0.035
- integration dim = 12
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051
- integration dim = 16
Accuracy: 0.960 ± 0.053
F1 Macro: 0.934 ± 0.085
F1 Weighted: 0.959 ± 0.054

In [56]:
from src.evals.birgat import BiRGATEvaluator
from src.data_managers.bipartite_graph import BipartiteGraphDataManager

birgat_eval = BiRGATEvaluator(
    data_manager=BipartiteGraphDataManager(
        omic_data_loaders=omic_data_loaders,
        n_splits=5,
        params={
            "diff_exp_thresholds" : {
                "mrna": 1.5,
                "mirna": 1.5,
                "circrna": 1.7,
                "te": 1.7,
            },
        },
    ),
    params={
        "epochs": 350,
        "log_interval": 50,
        "hidden_channels": [200, 64, 64, 16, 16],
        "heads": 4,
        "dropout": 0.2,
        "attention_dropout": 0.0,
        "integrator_type": "vcdn",
        "three_layers": True,
    },
    n_trials=1,
)
birgat_eval.evaluate()
birgat_eval.print_best_results()
birgat_eval.save_results(results_file=save_folder, row_name="BiRGAT")

isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(25.0943)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.4528)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(8) tensor(15.1887)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(25.0943)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.4528)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(8) tensor(15.1887)


[I 2024-11-21 00:22:58,018] A new study created in memory with name: no-name-8ad8310d-d31b-4759-977d-a51ba78f603a


isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(25.0943)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(0) tensor(21.4528)
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(0) tensor(8) tensor(15.1887)

Epoch: 050:
Train Loss: 0.4761, Train Acc: 0.7619, Train F1 Macro: 0.7597, Train F1 Weighted: 0.7619
Val Acc: 0.7273, Val F1 Macro: 0.7273, Val F1 Weighted: 0.7273, Val Geometric Mean: 0.7273
Test Acc: 0.7273, Test F1 Macro: 0.7273, Test F1 Weighted: 0.7273
##################################################

Epoch: 100:
Train Loss: 0.3980, Train Acc: 0.8810, Train F1 Macro: 0.8809, Train F1 Weighted: 0.8806
Val Acc: 0.6364, Val F1 Macro: 0.6071, Val F1 Weighted: 0.5974, Val Geometric Mean: 0.6134
Test Acc: 0.6364, Test F1 Macro: 0.6071, Test F1 Weighted: 0.5974
##################################################

Epoch: 150:
Train Loss: 0.2218, Train Acc: 0.9048, Train F1 Macro: 0.9048, Train F1 Weighted: 0.9

[I 2024-11-21 00:24:54,048] Trial 0 finished with value: 0.5016456616949483 and parameters: {}. Best is trial 0 with value: 0.5016456616949483.



Epoch: 350:
Train Loss: 0.1253, Train Acc: 0.9767, Train F1 Macro: 0.9765, Train F1 Weighted: 0.9768
Val Acc: 0.8000, Val F1 Macro: 0.7917, Val F1 Weighted: 0.7917, Val Geometric Mean: 0.7944
Test Acc: 0.8000, Test F1 Macro: 0.7917, Test F1 Weighted: 0.7917
##################################################
New best score: 0.502
Best model performance:
Accuracy: 0.796 ± 0.120
F1 Macro: 0.792 ± 0.122
F1 Weighted: 0.795 ± 0.121
[{'acc': 0.8181818181818182, 'f1_macro': np.float64(0.8166666666666667), 'f1_weighted': np.float64(0.8181818181818182)}, {'acc': 0.7272727272727273, 'f1_macro': np.float64(0.717948717948718), 'f1_weighted': np.float64(0.7226107226107227)}, {'acc': 0.6363636363636364, 'f1_macro': np.float64(0.6333333333333333), 'f1_weighted': np.float64(0.6363636363636364)}, {'acc': 0.8, 'f1_macro': np.float64(0.7916666666666667), 'f1_weighted': np.float64(0.8)}, {'acc': 1.0, 'f1_macro': np.float64(1.0), 'f1_weighted': np.float64(1.0)}]
Best model performance:
Accuracy: 0.796 ± 0.