In [1]:
%load_ext autoreload
%autoreload 2

from src.base_classes.omic_data_loader import OmicDataLoader
from src.data_managers.concat import CatOmicDataManager

In [3]:
mrna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74/mrna",
)
mirna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74/mirna_genes",
)
circrna_loader = OmicDataLoader(
    data_dir="mds_data/splits_74/circrna",
)
  
te_loader = OmicDataLoader(
    data_dir="mds_data/splits_74/te_counts",
)

In [4]:
for fold_idx in range(5):
    train_df, test_df = mrna_loader.get_fold(fold_idx)

    print("fold: ", fold_idx)
    print(train_df["class"].value_counts(), test_df["class"].value_counts())

fold:  0
shape: (2, 2)
┌───────┬───────┐
│ class ┆ count │
│ ---   ┆ ---   │
│ i64   ┆ u32   │
╞═══════╪═══════╡
│ 0     ┆ 10    │
│ 1     ┆ 49    │
└───────┴───────┘ shape: (2, 2)
┌───────┬───────┐
│ class ┆ count │
│ ---   ┆ ---   │
│ i64   ┆ u32   │
╞═══════╪═══════╡
│ 0     ┆ 3     │
│ 1     ┆ 12    │
└───────┴───────┘
fold:  1
shape: (2, 2)
┌───────┬───────┐
│ class ┆ count │
│ ---   ┆ ---   │
│ i64   ┆ u32   │
╞═══════╪═══════╡
│ 0     ┆ 10    │
│ 1     ┆ 49    │
└───────┴───────┘ shape: (2, 2)
┌───────┬───────┐
│ class ┆ count │
│ ---   ┆ ---   │
│ i64   ┆ u32   │
╞═══════╪═══════╡
│ 0     ┆ 3     │
│ 1     ┆ 12    │
└───────┴───────┘
fold:  2
shape: (2, 2)
┌───────┬───────┐
│ class ┆ count │
│ ---   ┆ ---   │
│ i64   ┆ u32   │
╞═══════╪═══════╡
│ 0     ┆ 10    │
│ 1     ┆ 49    │
└───────┴───────┘ shape: (2, 2)
┌───────┬───────┐
│ class ┆ count │
│ ---   ┆ ---   │
│ i64   ┆ u32   │
╞═══════╪═══════╡
│ 0     ┆ 3     │
│ 1     ┆ 12    │
└───────┴───────┘
fold:  3
shape: (2, 2)
┌─

In [57]:
omic_data_loaders = {
    "mrna": mrna_loader,
    "mirna": mirna_loader,
    "circrna": circrna_loader,
    # "pirna": pirna_loader,
    "te": te_loader,
}
odm = CatOmicDataManager(omic_data_loaders, n_splits=5)

In [27]:
odm.feature_dim, odm.n_classes

(200, 2)

In [73]:
from src.evals.knn import KNNEvaluator

knn_eval = KNNEvaluator(
    data_manager=odm,
    n_trials=20,
    verbose=True,
    params={"k_lb": 1, "k_ub": 20},
)
_ = knn_eval.evaluate()
knn_eval.print_best_results()

[I 2024-11-13 21:53:48,070] A new study created in memory with name: no-name-98f39504-4a21-41b5-a710-2c0993ddd36a
[I 2024-11-13 21:53:48,292] Trial 0 finished with value: 0.6862235118921005 and parameters: {'n_neighbors': 18}. Best is trial 0 with value: 0.6862235118921005.


New best score: 0.686
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.824 ± 0.109
F1 Weighted: 0.906 ± 0.061


[I 2024-11-13 21:53:48,500] Trial 1 finished with value: 0.5472774151647614 and parameters: {'n_neighbors': 19}. Best is trial 0 with value: 0.6862235118921005.
[I 2024-11-13 21:53:48,700] Trial 2 finished with value: 0.6192439604955923 and parameters: {'n_neighbors': 15}. Best is trial 0 with value: 0.6862235118921005.
[I 2024-11-13 21:53:48,898] Trial 3 finished with value: 0.40046307690049987 and parameters: {'n_neighbors': 2}. Best is trial 0 with value: 0.6862235118921005.
[I 2024-11-13 21:53:49,104] Trial 4 finished with value: 0.5171422862851435 and parameters: {'n_neighbors': 1}. Best is trial 0 with value: 0.6862235118921005.
[I 2024-11-13 21:53:49,303] Trial 5 finished with value: 0.5472774151647614 and parameters: {'n_neighbors': 20}. Best is trial 0 with value: 0.6862235118921005.
[I 2024-11-13 21:53:49,514] Trial 6 finished with value: 0.7039337563024542 and parameters: {'n_neighbors': 11}. Best is trial 6 with value: 0.7039337563024542.


New best score: 0.704
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.840 ± 0.097
F1 Weighted: 0.911 ± 0.056


[I 2024-11-13 21:53:49,720] Trial 7 finished with value: 0.6192439604955923 and parameters: {'n_neighbors': 13}. Best is trial 6 with value: 0.7039337563024542.
[I 2024-11-13 21:53:49,936] Trial 8 finished with value: 0.7039337563024542 and parameters: {'n_neighbors': 11}. Best is trial 6 with value: 0.7039337563024542.
[I 2024-11-13 21:53:50,134] Trial 9 finished with value: 0.5171422862851435 and parameters: {'n_neighbors': 1}. Best is trial 6 with value: 0.7039337563024542.
[I 2024-11-13 21:53:50,334] Trial 10 finished with value: 0.612600042864821 and parameters: {'n_neighbors': 7}. Best is trial 6 with value: 0.7039337563024542.
[I 2024-11-13 21:53:50,536] Trial 11 finished with value: 0.7389770960193865 and parameters: {'n_neighbors': 9}. Best is trial 11 with value: 0.7389770960193865.


New best score: 0.739
Best model performance:
Accuracy: 0.933 ± 0.042
F1 Macro: 0.857 ± 0.094
F1 Weighted: 0.924 ± 0.051


[I 2024-11-13 21:53:50,738] Trial 12 finished with value: 0.6472821557663117 and parameters: {'n_neighbors': 6}. Best is trial 11 with value: 0.7389770960193865.
[I 2024-11-13 21:53:50,953] Trial 13 finished with value: 0.6014691748907844 and parameters: {'n_neighbors': 8}. Best is trial 11 with value: 0.7389770960193865.
[I 2024-11-13 21:53:51,154] Trial 14 finished with value: 0.6631836987350632 and parameters: {'n_neighbors': 10}. Best is trial 11 with value: 0.7389770960193865.
[I 2024-11-13 21:53:51,354] Trial 15 finished with value: 0.6623938241144065 and parameters: {'n_neighbors': 5}. Best is trial 11 with value: 0.7389770960193865.
[I 2024-11-13 21:53:51,557] Trial 16 finished with value: 0.6192439604955923 and parameters: {'n_neighbors': 15}. Best is trial 11 with value: 0.7389770960193865.
[I 2024-11-13 21:53:51,759] Trial 17 finished with value: 0.6631836987350632 and parameters: {'n_neighbors': 10}. Best is trial 11 with value: 0.7389770960193865.
[I 2024-11-13 21:53:51,96

Best model performance:
Accuracy: 0.933 ± 0.042
F1 Macro: 0.857 ± 0.094
F1 Weighted: 0.924 ± 0.051


In [74]:
from src.evals.svm import SVMEvaluator

svm_eval = SVMEvaluator(
    data_manager=odm,
    n_trials=30,
    verbose=True,
    params={
        "C_lb": 0.01,
        "C_ub": 10,
        "rfe_step_range": (0.05, 0.2),
        "rfe_n_features_range": (100, 200),
    },
    mode="linear",
)
_ = svm_eval.evaluate()
svm_eval.print_best_results()

[I 2024-11-13 21:53:55,110] A new study created in memory with name: no-name-6ac59e52-4f6a-4e4a-9e43-0d6330f068ef
[I 2024-11-13 21:53:55,448] Trial 0 finished with value: 0.7155098095140549 and parameters: {'C': 0.01772957818805434, 'class_weight': 'balanced', 'rfe_step': 0.09919097900989131, 'rfe_n_features': 185}. Best is trial 0 with value: 0.7155098095140549.


New best score: 0.716
Best model performance:
Accuracy: 0.919 ± 0.050
F1 Macro: 0.852 ± 0.097
F1 Weighted: 0.913 ± 0.056


[I 2024-11-13 21:53:55,830] Trial 1 finished with value: 0.7039337563024542 and parameters: {'C': 0.03134410486467334, 'class_weight': None, 'rfe_step': 0.05380628664121365, 'rfe_n_features': 186}. Best is trial 0 with value: 0.7155098095140549.
[I 2024-11-13 21:53:56,153] Trial 2 finished with value: 0.6631836987350632 and parameters: {'C': 0.09836892513300621, 'class_weight': 'balanced', 'rfe_step': 0.16061841838350865, 'rfe_n_features': 186}. Best is trial 0 with value: 0.7155098095140549.
[I 2024-11-13 21:53:56,566] Trial 3 finished with value: 0.7039337563024542 and parameters: {'C': 0.06340808334748284, 'class_weight': None, 'rfe_step': 0.0658841898678164, 'rfe_n_features': 152}. Best is trial 0 with value: 0.7155098095140549.
[I 2024-11-13 21:53:56,976] Trial 4 finished with value: 0.6631836987350632 and parameters: {'C': 0.14411431771340535, 'class_weight': None, 'rfe_step': 0.07037657350406427, 'rfe_n_features': 186}. Best is trial 0 with value: 0.7155098095140549.
[I 2024-11-

New best score: 0.721
Best model performance:
Accuracy: 0.918 ± 0.052
F1 Macro: 0.857 ± 0.099
F1 Weighted: 0.916 ± 0.055


[I 2024-11-13 21:53:59,613] Trial 12 finished with value: 0.6804361055090542 and parameters: {'C': 0.010299627290047192, 'class_weight': 'balanced', 'rfe_step': 0.09720676384643837, 'rfe_n_features': 131}. Best is trial 11 with value: 0.720542876247867.
[I 2024-11-13 21:54:00,191] Trial 13 finished with value: 0.7500998536858599 and parameters: {'C': 0.4624453241199983, 'class_weight': 'balanced', 'rfe_step': 0.09488325164479136, 'rfe_n_features': 134}. Best is trial 13 with value: 0.7500998536858599.


New best score: 0.750
Best model performance:
Accuracy: 0.932 ± 0.002
F1 Macro: 0.867 ± 0.026
F1 Weighted: 0.928 ± 0.003


[I 2024-11-13 21:54:00,628] Trial 14 finished with value: 0.7500998536858599 and parameters: {'C': 0.5354323664708412, 'class_weight': 'balanced', 'rfe_step': 0.093207078605448, 'rfe_n_features': 133}. Best is trial 13 with value: 0.7500998536858599.
[I 2024-11-13 21:54:01,080] Trial 15 finished with value: 0.7039337563024542 and parameters: {'C': 0.5096211154336758, 'class_weight': 'balanced', 'rfe_step': 0.08843596965022621, 'rfe_n_features': 135}. Best is trial 13 with value: 0.7500998536858599.
[I 2024-11-13 21:54:01,568] Trial 16 finished with value: 0.7039337563024542 and parameters: {'C': 9.025990574812704, 'class_weight': 'balanced', 'rfe_step': 0.08522161323802442, 'rfe_n_features': 137}. Best is trial 13 with value: 0.7500998536858599.
[I 2024-11-13 21:54:01,935] Trial 17 finished with value: 0.6631836987350632 and parameters: {'C': 0.30804291221960983, 'class_weight': 'balanced', 'rfe_step': 0.1389023581081106, 'rfe_n_features': 121}. Best is trial 13 with value: 0.750099853

New best score: 0.794
Best model performance:
Accuracy: 0.947 ± 0.027
F1 Macro: 0.891 ± 0.060
F1 Weighted: 0.941 ± 0.029


[I 2024-11-13 21:54:06,264] Trial 27 finished with value: 0.7039337563024542 and parameters: {'C': 8.001215382243764, 'class_weight': 'balanced', 'rfe_step': 0.07982999421017259, 'rfe_n_features': 119}. Best is trial 26 with value: 0.7940961203523854.
[I 2024-11-13 21:54:06,837] Trial 28 finished with value: 0.7573892492973633 and parameters: {'C': 4.28120279756211, 'class_weight': 'balanced', 'rfe_step': 0.06281939731108788, 'rfe_n_features': 143}. Best is trial 26 with value: 0.7940961203523854.
[I 2024-11-13 21:54:07,346] Trial 29 finished with value: 0.7147297129153442 and parameters: {'C': 4.730736025598158, 'class_weight': 'balanced', 'rfe_step': 0.07611507962460702, 'rfe_n_features': 143}. Best is trial 26 with value: 0.7940961203523854.


Best model performance:
Accuracy: 0.947 ± 0.027
F1 Macro: 0.891 ± 0.060
F1 Weighted: 0.941 ± 0.029


Best model performance:
Accuracy: 0.938 ± 0.058
F1 Macro: 0.684 ± 0.258
F1 Weighted: 0.924 ± 0.064

In [62]:
from src.evals.xgboost import XGBoostEvaluator

xgb_eval = XGBoostEvaluator(
    data_manager=odm,
    n_trials=60,
    verbose=True,
)
_ = xgb_eval.evaluate()

[I 2024-11-13 21:51:22,141] A new study created in memory with name: no-name-a6c84dbb-96fb-4e06-89e3-5cf9800d3432
[I 2024-11-13 21:51:22,362] Trial 0 finished with value: 0.7039337563024542 and parameters: {'lambda': 0.3584886463563621, 'alpha': 2.1203835053596267e-08}. Best is trial 0 with value: 0.7039337563024542.
[I 2024-11-13 21:51:22,553] Trial 1 finished with value: 0.7581635155117249 and parameters: {'lambda': 0.004089650990495877, 'alpha': 1.7858414787418788e-08}. Best is trial 1 with value: 0.7581635155117249.


New best score: 0.704
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.840 ± 0.097
F1 Weighted: 0.911 ± 0.056
New best score: 0.758
Best model performance:
Accuracy: 0.933 ± 0.060
F1 Macro: 0.877 ± 0.114
F1 Weighted: 0.927 ± 0.067


[I 2024-11-13 21:51:22,749] Trial 2 finished with value: 0.7581635155117249 and parameters: {'lambda': 0.017511112403939873, 'alpha': 1.028261317174957e-06}. Best is trial 1 with value: 0.7581635155117249.
[I 2024-11-13 21:51:22,942] Trial 3 finished with value: 0.6022120594382817 and parameters: {'lambda': 2.486683965835714e-06, 'alpha': 2.540926338636746e-05}. Best is trial 1 with value: 0.7581635155117249.
[I 2024-11-13 21:51:23,267] Trial 4 finished with value: 0.27794757275052406 and parameters: {'lambda': 0.010966976886641595, 'alpha': 0.9470306339118821}. Best is trial 1 with value: 0.7581635155117249.
[I 2024-11-13 21:51:23,467] Trial 5 finished with value: 0.775476069168414 and parameters: {'lambda': 4.0819797745300316e-05, 'alpha': 5.528383457492382e-07}. Best is trial 5 with value: 0.775476069168414.
[I 2024-11-13 21:51:23,662] Trial 6 finished with value: 0.6305302169311957 and parameters: {'lambda': 7.852368576823749e-08, 'alpha': 0.00010179406181330772}. Best is trial 5 w

New best score: 0.775
Best model performance:
Accuracy: 0.932 ± 0.042
F1 Macro: 0.891 ± 0.067
F1 Weighted: 0.933 ± 0.042


[I 2024-11-13 21:51:23,881] Trial 7 finished with value: 0.7581635155117249 and parameters: {'lambda': 0.03037996203506876, 'alpha': 7.35072665195095e-07}. Best is trial 5 with value: 0.775476069168414.
[I 2024-11-13 21:51:24,075] Trial 8 finished with value: 0.7415719661868713 and parameters: {'lambda': 6.651696267369343e-06, 'alpha': 6.651813445797384e-08}. Best is trial 5 with value: 0.775476069168414.
[I 2024-11-13 21:51:24,268] Trial 9 finished with value: 0.7039337563024542 and parameters: {'lambda': 3.601323635499819e-07, 'alpha': 0.00040128100391318225}. Best is trial 5 with value: 0.775476069168414.
[I 2024-11-13 21:51:24,462] Trial 10 finished with value: 0.6969195446998454 and parameters: {'lambda': 0.00014946013119694412, 'alpha': 0.013581880025666163}. Best is trial 5 with value: 0.775476069168414.
[I 2024-11-13 21:51:24,662] Trial 11 finished with value: 0.7334652041272984 and parameters: {'lambda': 0.00040345155052458505, 'alpha': 1.0856781764411731e-08}. Best is trial 5

New best score: 0.814
Best model performance:
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051


In [63]:
xgb_eval.print_best_results()

Best model performance:
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051


In [None]:
from src.evals.mlp import MLPEvaluator

mlp_eval = MLPEvaluator(
    data_manager=odm,
    n_trials=10,
    verbose=True,
    params={
        "lr_range": [1e-4, 1e-2],
        "l2_lambda": 5e-4,
        "dropout_range": [0.1, 0.5],
        "hidden_channels": 64,
        "proj_dim": 64,
        "batch_size": 32,
        "max_epochs": 50,
    },
)
_ = mlp_eval.evaluate()

[I 2024-11-13 21:52:55,151] A new study created in memory with name: no-name-6b2c9513-2f1e-4374-912e-c9eb65b9fb60
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using

New best score: 0.663
Best model performance:
Accuracy: 0.906 ± 0.032
F1 Macro: 0.815 ± 0.062
F1 Weighted: 0.898 ± 0.038


`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-

New best score: 0.704
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.840 ± 0.097
F1 Weighted: 0.911 ± 0.056


`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/lubojjan/DiplomaGeneral/.venv/lib/python3.12/site-

In [72]:
mlp_eval.print_best_results()
mlp_eval.print_best_parameters()

Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.840 ± 0.097
F1 Weighted: 0.911 ± 0.056
Best hyperparameters:
{'lr': 0.0002717018667066596, 'dropout': 0.26157280166479474}


In [92]:
knn_eval.save_results(results_file="logs/mds_disese_eval.csv", row_name="knn")
svm_eval.save_results(results_file="logs/mds_disese_eval.csv", row_name="svm")
xgb_eval.save_results(results_file="logs/mds_disese_eval.csv", row_name="xgb")

In [76]:
knn_eval.print_best_results()
svm_eval.print_best_results()
xgb_eval.print_best_results()
mlp_eval.print_best_results()

Best model performance:
Accuracy: 0.933 ± 0.042
F1 Macro: 0.857 ± 0.094
F1 Weighted: 0.924 ± 0.051
Best model performance:
Accuracy: 0.947 ± 0.027
F1 Macro: 0.891 ± 0.060
F1 Weighted: 0.941 ± 0.029
Best model performance:
Accuracy: 0.947 ± 0.050
F1 Macro: 0.910 ± 0.080
F1 Weighted: 0.945 ± 0.051
Best model performance:
Accuracy: 0.920 ± 0.050
F1 Macro: 0.840 ± 0.097
F1 Weighted: 0.911 ± 0.056


In [78]:
from src.evals.mogonet import MOGONETEvaluator
from src.data_managers.sample_graph import SampleGraphDataManager

mogonet_eval = MOGONETEvaluator(
    data_manager=SampleGraphDataManager(
        omic_data_loaders={
            "mrna": mrna_loader,
            "mirna": mirna_loader,
            "circrna": circrna_loader,
            "te": te_loader,
        },
        n_splits=5,
        params={
            "graph_style": "threshold",
            "self_connections": True,
            "avg_degree": 5,
        },
    ),
    n_trials=1,
    params={
        "encoder_hidden_channels": {
            "mrna": 64,
            "mirna": 64,
            "circrna": 64,
            "te": 64,
        },
        "encoder_type": "gat",
        "dropout": 0.2,
        "integrator_type": "attention",
        "integration_in_dim": 16,
        "vcdn_hidden_channels": 16,
        "epochs": 200,
        "log_interval": 101,
    }
)
mogonet_eval.evaluate()

[I 2024-11-13 21:56:42,386] A new study created in memory with name: no-name-7f3917be-c09c-4430-85de-1c7cc13d127f



Epoch: 101:
Train Loss: 0.0732, Train Acc: 0.9492, Train F1 Macro: 0.9059, Train F1 Weighted: 0.9481
Val Acc: 0.9333, Val F1 Macro: 0.8800, Val F1 Weighted: 0.9280, Val Geometric Mean: 0.9135
Test Acc: 0.9333, Test F1 Macro: 0.8800, Test F1 Weighted: 0.9280
##################################################

Epoch: 101:
Train Loss: 0.0590, Train Acc: 0.9661, Train F1 Macro: 0.9344, Train F1 Weighted: 0.9646
Val Acc: 0.9333, Val F1 Macro: 0.8800, Val F1 Weighted: 0.9280, Val Geometric Mean: 0.9135
Test Acc: 0.9333, Test F1 Macro: 0.8800, Test F1 Weighted: 0.9280
##################################################

Epoch: 101:
Train Loss: 0.0264, Train Acc: 1.0000, Train F1 Macro: 1.0000, Train F1 Weighted: 1.0000
Val Acc: 0.8667, Val F1 Macro: 0.7917, Val F1 Weighted: 0.8667, Val Geometric Mean: 0.8409
Test Acc: 0.8667, Test F1 Macro: 0.7917, Test F1 Weighted: 0.8667
##################################################

Epoch: 101:
Train Loss: 0.0644, Train Acc: 0.9831, Train F1 Macro: 0.

[I 2024-11-13 21:56:57,219] Trial 0 finished with value: 0.8601248426666666 and parameters: {}. Best is trial 0 with value: 0.8601248426666666.


New best score: 0.860
Best model performance:
Accuracy: 0.960 ± 0.053
F1 Macro: 0.934 ± 0.085
F1 Weighted: 0.959 ± 0.054


{'acc': np.float64(0.96),
 'f1_macro': np.float64(0.9343333333333333),
 'f1_weighted': np.float64(0.9589333333333332),
 'acc_std': np.float64(0.053333333333333316),
 'f1_macro_std': np.float64(0.08513779680285628),
 'f1_weighted_std': np.float64(0.05390625607890463)}

Best model performance:
Accuracy: 0.947 ± 0.050
F1 Macro: 0.918 ± 0.070
F1 Weighted: 0.947 ± 0.047

Best model performance:
Accuracy: 0.960 ± 0.053
F1 Macro: 0.947 ± 0.069
F1 Weighted: 0.963 ± 0.049

In [81]:
from src.evals.birgat import BiRGATEvaluator
from src.data_managers.bipartite_graph import BipartiteGraphDataManager

birgat_eval = BiRGATEvaluator(
    data_manager=BipartiteGraphDataManager(
        omic_data_loaders={
            "mrna": mrna_loader,
            "mirna": mirna_loader,
            "circrna": circrna_loader,
            "te": te_loader,
        },
        n_splits=5,
        params={
            "diff_exp_thresholds" : {
                "mrna": 1.8,
                "mirna": 1.8,
                "circrna": 1.8,
                "te": 1.8,
            },
        },
    ),
    params={
        "epochs": 200,
        "log_interval": 50,
    },
    n_trials=1,
)
birgat_eval.evaluate()

[I 2024-11-13 21:59:12,056] A new study created in memory with name: no-name-ce572e65-001d-4a8f-beff-d92b22013261



Epoch: 050:
Train Loss: 0.4343, Train Acc: 0.8305, Train F1 Macro: 0.4537, Train F1 Weighted: 0.7536
Val Acc: 0.8000, Val F1 Macro: 0.4444, Val F1 Weighted: 0.7111, Val Geometric Mean: 0.6323
Test Acc: 0.8000, Test F1 Macro: 0.4444, Test F1 Weighted: 0.7111
##################################################

Epoch: 100:
Train Loss: 0.1226, Train Acc: 0.9322, Train F1 Macro: 0.8689, Train F1 Weighted: 0.9291
Val Acc: 0.8667, Val F1 Macro: 0.7115, Val F1 Weighted: 0.8385, Val Geometric Mean: 0.8026
Test Acc: 0.8667, Test F1 Macro: 0.7115, Test F1 Weighted: 0.8385
##################################################

Epoch: 150:
Train Loss: 0.0367, Train Acc: 1.0000, Train F1 Macro: 1.0000, Train F1 Weighted: 1.0000
Val Acc: 0.8667, Val F1 Macro: 0.7115, Val F1 Weighted: 0.8385, Val Geometric Mean: 0.8026
Test Acc: 0.8667, Test F1 Macro: 0.7115, Test F1 Weighted: 0.8385
##################################################

Epoch: 200:
Train Loss: 0.0429, Train Acc: 0.9661, Train F1 Macro: 0.

[I 2024-11-13 22:00:06,125] Trial 0 finished with value: 0.7948768203234714 and parameters: {}. Best is trial 0 with value: 0.7948768203234714.



Epoch: 200:
Train Loss: 0.0096, Train Acc: 1.0000, Train F1 Macro: 1.0000, Train F1 Weighted: 1.0000
Val Acc: 0.7143, Val F1 Macro: 0.4167, Val F1 Weighted: 0.7143, Val Geometric Mean: 0.5968
Test Acc: 0.7143, Test F1 Macro: 0.4167, Test F1 Weighted: 0.7143
##################################################
New best score: 0.795
Best model performance:
Accuracy: 0.947 ± 0.050
F1 Macro: 0.894 ± 0.106
F1 Weighted: 0.939 ± 0.060


{'acc': np.float64(0.9466666666666667),
 'f1_macro': np.float64(0.8943076923076922),
 'f1_weighted': np.float64(0.9388923076923078),
 'acc_std': np.float64(0.04988876515698587),
 'f1_macro_std': np.float64(0.10597711040122806),
 'f1_weighted_std': np.float64(0.05965219905477923)}

In [None]:
est model performance:
Accuracy: 0.960 ± 0.033
F1 Macro: 0.920 ± 0.072
F1 Weighted: 0.958 ± 0.035

In [24]:
from src.data_managers.bipartite_graph import BipartiteGraphDataManager
from torch_geometric.transforms import ToUndirected

bpdm = BipartiteGraphDataManager(
    omic_data_loaders={
        "mrna": mrna_loader,
        "mirna": mirna_loader,
    },
    n_splits=5,
    params={
        "diff_exp_thresholds" : {
            "mrna": 1.8,
            "mirna": 1.8,
        }
    },
)
data, _, _, _ = bpdm.get_split(0)
# params={
#     "graph_style": "threshold",
#     "self_connections": True,
data

torch.Size([200]) torch.Size([200])
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(5) tensor(1) tensor(12.9189)
torch.Size([200]) torch.Size([200])
isolated sample nodes, isolated gene nodes, mean degree: 
tensor(4) tensor(0) tensor(12.7027)


HeteroData(
  feature_names=[2],
  omics=[2],
  num_relations=6,
  y=[74],
  train_mask=[74],
  test_mask=[74],
  val_mask=[74],
  mrna={ x=[74, 200] },
  mrna_feature={ x=[200, 200] },
  mirna={ x=[74, 200] },
  mirna_feature={ x=[200, 200] },
  (mrna, diff_exp, mrna_feature)={ edge_index=[2, 956] },
  (mirna, diff_exp, mirna_feature)={ edge_index=[2, 940] },
  (mrna_feature, rev_diff_exp, mrna)={ edge_index=[2, 956] },
  (mirna_feature, rev_diff_exp, mirna)={ edge_index=[2, 940] },
  (mrna_feature, interacts, mrna_feature)={ edge_index=[2, 293] },
  (mirna_feature, regulates, mrna_feature)={ edge_index=[2, 462] }
)

In [39]:
import torch
from src.models.birgat import BiRGAT

params = {
    "hidden_channels": [200, 32, 32, 32, 32],
    "heads": 2,
    "dropout": 0.2,
    "attention_dropout": 0.2,
    "use_proj_module": False,
    "integrator_type": "attention",
    "proj_dim" : 64,
    "three_layers": False
}

model = BiRGAT(
    omic_channels=data.omics,
    feature_names=data.feature_names,
    relations=list(data.edge_index_dict.keys()),
    input_dims={
        omic: data.x_dict[omic].shape[1] for omic in data.x_dict.keys()
    },
    proj_dim=params["proj_dim"],
    hidden_channels=params["hidden_channels"],
    num_classes=len(torch.unique(data.y)),
    heads=params["heads"],
    dropout=params["dropout"],
    attention_dropout=params["attention_dropout"],
    use_proj_module=params["use_proj_module"],
    integrator_type=params["integrator_type"],
    three_layers=params["three_layers"],
)
model.forward(data)

tensor([[-0.1996,  0.0200],
        [ 0.1476, -0.0125],
        [-0.0169, -0.3833],
        [-0.2606, -0.1123],
        [ 0.0988,  0.1072],
        [ 0.2152,  0.1267],
        [ 0.2539,  0.2399],
        [-0.1327,  0.1547],
        [ 0.1204,  0.0057],
        [ 0.0787,  0.0047],
        [-0.1699, -0.0654],
        [-0.1015, -0.0169],
        [-0.1148, -0.1564],
        [-0.2891, -0.1228],
        [-0.1221, -0.1194],
        [-0.2548, -0.1509],
        [-0.1392, -0.1450],
        [-0.0354, -0.2231],
        [-0.2383,  0.0189],
        [-0.0243, -0.2815],
        [-0.1663,  0.0082],
        [ 0.0280, -0.0584],
        [ 0.1171,  0.0122],
        [ 0.0266, -0.0320],
        [-0.1866,  0.0935],
        [-0.1981, -0.1071],
        [-0.1796, -0.3447],
        [-0.0147, -0.2409],
        [ 0.1196, -0.2709],
        [ 0.0811, -0.2482],
        [ 0.1268, -0.0737],
        [-0.0107, -0.0359],
        [-0.1744, -0.2606],
        [-0.2738, -0.0500],
        [-0.0365, -0.1182],
        [ 0.0636, -0

In [302]:
data.edge_index_dict

{('mrna_feature',
  'interacts',
  'mrna_feature'): tensor([[  0,   1,   2,   2,   3,   4,   4,   4,   5,   6,   7,   8,   9,  10,
           10,  10,  10,  10,  10,  11,  12,  12,  12,  12,  12,  13,  13,  14,
           15,  15,  16,  17,  17,  17,  17,  18,  19,  19,  19,  19,  20,  20,
           21,  21,  21,  21,  21,  21,  21,  22,  23,  23,  23,  24,  25,  26,
           27,  27,  28,  29,  30,  30,  31,  31,  32,  33,  33,  34,  35,  36,
           36,  36,  37,  38,  38,  38,  38,  38,  38,  38,  38,  38,  39,  40,
           40,  40,  40,  40,  40,  40,  40,  41,  41,  42,  43,  43,  43,  43,
           43,  44,  44,  45,  45,  46,  47,  48,  48,  48,  48,  49,  49,  49,
           49,  49,  50,  50,  50,  51,  51,  51,  52,  52,  52,  52,  52,  52,
           53,  53,  53,  53,  53,  53,  53,  53,  54,  55,  56,  57,  58,  59,
           59,  60,  61,  62,  62,  62,  62,  62,  63,  64,  65,  65,  65,  65,
           66,  67,  67,  67,  68,  69,  70,  71,  72,  73,  74,  74,

In [300]:
data["mrna_feature", "interacts", "mrna_feature"].edge_index

tensor([[  0,   1,   2,   2,   3,   4,   4,   4,   5,   6,   7,   8,   9,  10,
          10,  10,  10,  10,  10,  11,  12,  12,  12,  12,  12,  13,  13,  14,
          15,  15,  16,  17,  17,  17,  17,  18,  19,  19,  19,  19,  20,  20,
          21,  21,  21,  21,  21,  21,  21,  22,  23,  23,  23,  24,  25,  26,
          27,  27,  28,  29,  30,  30,  31,  31,  32,  33,  33,  34,  35,  36,
          36,  36,  37,  38,  38,  38,  38,  38,  38,  38,  38,  38,  39,  40,
          40,  40,  40,  40,  40,  40,  40,  41,  41,  42,  43,  43,  43,  43,
          43,  44,  44,  45,  45,  46,  47,  48,  48,  48,  48,  49,  49,  49,
          49,  49,  50,  50,  50,  51,  51,  51,  52,  52,  52,  52,  52,  52,
          53,  53,  53,  53,  53,  53,  53,  53,  54,  55,  56,  57,  58,  59,
          59,  60,  61,  62,  62,  62,  62,  62,  63,  64,  65,  65,  65,  65,
          66,  67,  67,  67,  68,  69,  70,  71,  72,  73,  74,  74,  75,  75,
          75,  75,  75,  76,  76,  76,  76,  77,  77

In [274]:
data.x_dict

{'mrna': tensor([[ 0.1901,  0.8589,  0.7247,  ...,  0.4696,  0.2355,  0.2009],
         [ 0.1317,  0.7882,  0.7385,  ...,  0.8404,  0.4612,  0.0611],
         [ 0.9406,  0.6232,  0.7357,  ...,  0.8423,  0.1146,  0.2261],
         ...,
         [ 0.5044,  0.5115,  0.6188,  ...,  0.3666,  0.1762,  0.7017],
         [-0.2887,  0.5810,  0.8522,  ...,  0.3074,  0.1788,  1.1057],
         [-0.0414,  0.5253,  0.5139,  ...,  0.1934,  0.9554,  0.1940]]),
 'mirna': tensor([[ 0.8021,  0.7240,  0.8759,  ...,  0.7245,  0.6704,  0.8779],
         [ 0.6343,  0.6804,  0.6110,  ...,  0.7883,  0.2520,  0.4453],
         [ 0.4353,  0.6257,  0.7742,  ...,  0.6928,  0.4783,  0.6300],
         ...,
         [ 0.6901,  0.5982,  0.8047,  ...,  0.9011,  0.4010,  0.6526],
         [ 0.7930,  0.8460,  0.6499,  ...,  0.8561,  0.7770,  0.6665],
         [ 0.9770, -0.0268,  0.6509,  ...,  0.8369,  0.5275,  0.9227]])}

In [289]:
mirna_mrna_interactions_db = pl.read_csv("interaction_data/mirna_mrna_interactions_DB.csv")
mmirnas = mirna_mrna_interactions_db["mirna"].to_list()
mirna_gene_names = ["".join(mirna.split("-")[1:3]).upper() for mirna in mmirnas]
mirna_mrna_interactions_db.with_columns(
    pl.Series("mirna", mirna_gene_names)
).select("mirna", "gene").write_csv("interaction_data/mirna_genes_mrna.csv")