In [None]:
import os
import sys
import logging

# Setup paths
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.getcwd()))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# Setup directories
RESULTS_DIR = os.path.join(os.getcwd(), 'results')
os.makedirs(RESULTS_DIR, exist_ok=True)

RESULTS_PATH = os.path.join(RESULTS_DIR, 'results')
MODELS_PATH = os.path.join(RESULTS_DIR, 'model')

FIN_RESULTS_OV_PATH = os.path.join(PROJECT_ROOT, 'results_modelling_ovs')
FIN_RESULTS_SPLIT_PATH = os.path.join(PROJECT_ROOT, 'results_modelling_splits')
FEAT_IMP_PATH = os.path.join(PROJECT_ROOT, 'feat_imps')
MODEL_NAME = "CoxPN"

# Imports
from models.modelling_process import ModellingProcess
from utils.analysis import * 

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

In [None]:
import os
import pandas as pd
from models.cox_pas_net_model import Cox_PASNet, Cox_PASNet_Model
import torch

# # TODO: Dataframe erstellen: Spalte 1: Name des Feautres, Spalte 2: Wert
# # -------------------- functions to load feat. imp from model
# def load_feat_imp(model_path):
#     with open(model_path, 'rb') as file:
#         model = pickle.load(file)
    
#     # Cat boost specific
#     #print(model)
#     # bei den Modellen die keine eigene Modellklasse von uns haben, muss man gucken wie der library interne Aufruf ist
#     imps = model.model.get_feature_importance()
    
#     df = pd.DataFrame({
#     'feature': model.model.feature_names_,
#     'value': imps
#     })
    
#     df = df.sort_values(by = "value", ascending=False)
#     df = df[df.loc[: , 'value'] > 0]
    
#     return df

# def load_model(model_path): 
#     with open(model_path, 'rb') as file:
#         model = pickle.load(file)
    
#     return model
    

# --------------------- get test perf 
import os
import pandas as pd
import numpy as np
import re

def get_weights(model_path):    
    DATA_CONFIG = {
        'use_pca': False,
        'pca_threshold': 0.85,
        'use_imputed': True,
        'select_random' : False, 
        'use_cohorts': False, 
        # Muss je nach algo angepasst werden; CatBoost eig der einzige der keines braucht, bei den anderen auf True setzen
        'requires_ohenc' : True, 
        'gene_type' : 'intersection',
        'clinical_covs' : ["AGE", "TISSUE", "GLEASON_SCORE", 'PRE_OPERATIVE_PSA'],
        'only_pDta' : False
    }

        
    net = torch.load(model_path, map_location=torch.device('cpu'))
    pathway_mask = pd.read_csv("../../data/pathway_mask.csv", index_col = 0)
    
    mp = ModellingProcess()
    mp.prepare_data(DATA_CONFIG, PROJECT_ROOT)
    model_hull = Cox_PASNet_Model(pathway_mask= pathway_mask, clin_covs=['AGE', 'TISSUE_FFPE', 'TISSUE_Fresh_frozen',
                        'TISSUE_Snap_frozen', 'GLEASON_SCORE',
                        'PRE_OPERATIVE_PSA'])
    model_hull.model = net
    model_hull.is_fitted_ = True
    
    
    genes, pData, ytime, yevent, \
        eval_x, eval_age, eval_ytime, eval_yevent = model_hull._prepare_data(mp.X, mp.y, 0.1)
    
    w_sc1 = net.sc1.weight.data.cpu().detach().numpy()
    w_sc2 = net.sc2.weight.data.cpu().detach().numpy()
    w_sc3 = net.sc3.weight.data.cpu().detach().numpy()
    w_sc4 = net.sc4.weight.data.cpu().detach().numpy()
    np.savetxt("weights/w_sc1.csv", w_sc1, delimiter = ",")
    np.savetxt("weights/w_sc2.csv", w_sc2, delimiter = ",")
    np.savetxt("weights/w_sc3.csv", w_sc3, delimiter = ",")
    np.savetxt("weights/w_sc4.csv", w_sc4, delimiter = ",")

    pathway_node = net.tanh(net.sc1(genes))
    hidden_node = net.tanh(net.sc2(pathway_node))
    hidden_2_node = net.tanh(net.sc3(hidden_node))
    x_cat = torch.cat((hidden_2_node, pData), 1)
    lin_pred = net.sc4(x_cat)

    np.savetxt("weights/pathway_node.csv", pathway_node.cpu().detach().numpy(), delimiter = ",")
    np.savetxt("weights/hidden_node.csv", hidden_node.cpu().detach().numpy(), delimiter = ",")
    np.savetxt("weights/hidden_2_node.csv", x_cat.cpu().detach().numpy(), delimiter = ",")
    np.savetxt("weights/lin_pred.csv", lin_pred.cpu().detach().numpy(), delimiter = ",") 


# Function to test performance of all models
def test_perf_all_models(model_path):
    files = os.listdir(model_path)
    test_perf = []
    print(files)
    for file in files:
        print(file)

        contains_pData = bool(re.search(r"pData", file, re.IGNORECASE))
        contains_intersection = bool(re.search(r"inter|intersection", file, re.IGNORECASE))
        contains_imputed = bool(re.search(r"imp|imputed|common", file, re.IGNORECASE))
        contains_aenc = bool(re.search(r"aenc|auto|autoenc", file, re.IGNORECASE))
        contains_scores = bool(re.search(r"score|scores", file, re.IGNORECASE))
        
        DATA_CONFIG = {
            'use_pca': False,
            'pca_threshold': 0.85,
            'use_imputed': True,
            'select_random' : False, 
            'use_cohorts': False, 
            'requires_ohenc' : True, 
        }

        # Load data based on file type
        if contains_intersection:
            DATA_CONFIG['gene_type'] = 'intersection'
        elif contains_imputed:
            DATA_CONFIG['gene_type'] = 'common_genes'
        elif contains_aenc:
            DATA_CONFIG['gene_type'] = 'autoencoder'
        elif contains_scores: 
            DATA_CONFIG['gene_type'] = 'scores'
        if contains_pData:
            DATA_CONFIG['clinical_covs'] = ["AGE", "TISSUE", "GLEASON_SCORE", 'PRE_OPERATIVE_PSA']
        if contains_pData and not contains_intersection and not contains_imputed and not contains_aenc and not contains_scores: 
            DATA_CONFIG['only_pData'] = True
            DATA_CONFIG['gene_type'] = None
            
        mp = os.path.join(model_path, file)
        model = torch.load(mp, map_location=torch.device('cpu'))
        pathway_mask = pd.read_csv("../../data/pathway_mask.csv", index_col = 0)
        
        model_hull = Cox_PASNet_Model(pathway_mask= pathway_mask, clin_covs=['AGE', 'TISSUE_FFPE', 'TISSUE_Fresh_frozen',
                            'TISSUE_Snap_frozen', 'GLEASON_SCORE',
                            'PRE_OPERATIVE_PSA'])
        
        mp = ModellingProcess()
        mp.prepare_test_data(DATA_CONFIG, PROJECT_ROOT)
        groups = set(mp.test_groups)
        groups = sorted(groups)
        X_cos, y_cos = mp.prepare_test_cohort_data(DATA_CONFIG, PROJECT_ROOT, groups)
                
        model_hull.model = model
        model_hull.is_fitted_ = True
        
        ci1 = model_hull.score(X_cos[0], y_cos[0])
        ci2 = model_hull.score(X_cos[1], y_cos[1])
        print(ci1)         
        print(ci2)
        
        result = {
            'model' : file.replace(".pth", ""), 
            'ci_coh1' : ci1, 
            'ci_coh2' : ci2
        }
        test_perf.append(result)

    return pd.DataFrame(test_perf)

In [None]:
split_results = load_split_results(RESULTS_PATH, MODEL_NAME)
split_results.to_csv(os.path.join(FIN_RESULTS_SPLIT_PATH, 'splits_coxPAS.csv'))
split_results

Unnamed: 0,model_class,model,test_cohort,ci,dataset
0,CoxPN,results_intersect_pdata_model3,Atlanta_2014_Long,0.670748,pData_Intersection
1,CoxPN,results_intersect_pdata_model3,Belfast_2018_Jain,0.591866,pData_Intersection
2,CoxPN,results_intersect_pdata_model3,CPC_GENE_2017_Fraser,0.669381,pData_Intersection
3,CoxPN,results_intersect_pdata_model3,CPGEA_2020_Li,0.644514,pData_Intersection
4,CoxPN,results_intersect_pdata_model3,CamCap_2016_Ross_Adams,0.713689,pData_Intersection
5,CoxPN,results_intersect_pdata_model3,CancerMap_2017_Luca,0.658294,pData_Intersection
6,CoxPN,results_intersect_pdata_model3,DKFZ_2018_Gerhauser,0.820225,pData_Intersection
7,CoxPN,results_intersect_pdata_model3,MSKCC_2010_Taylor,0.791815,pData_Intersection
8,CoxPN,results_intersect_pdata_model3,Stockholm_2016_Ross_Adams,0.578806,pData_Intersection


In [32]:
results = load_all_results(RESULTS_PATH)
test_perf = test_perf_all_models(MODELS_PATH)

['results_intersect_pdata_model3.pth']
results_intersect_pdata_model3.pth
c:\Users\laeti\PCaPrognostics\models\cox_pas_net\results\model\results_intersect_pdata_model3.pth
Cox_PASNet(
  (tanh): Tanh()
  (sc1): Linear(in_features=6094, out_features=143, bias=True)
  (sc2): Linear(in_features=143, out_features=64, bias=True)
  (sc3): Linear(in_features=64, out_features=32, bias=False)
  (sc4): Linear(in_features=38, out_features=1, bias=False)
)


  model = torch.load(mp, map_location=torch.device('cpu'))
2025-02-11 19:53:25,521 - INFO - Found clinical data specification
2025-02-11 19:53:25,768 - INFO - Loaded data: 496 samples, 13220 features


Index(['TISSUE'], dtype='object')
Index(['TISSUE', 'TISSUE_FFPE', 'TISSUE_Fresh_frozen', 'TISSUE_Snap_frozen'], dtype='object')
['test_cohort_1', 'test_cohort_2']


2025-02-11 19:54:13,764 - INFO - Found clinical data specification
2025-02-11 19:54:13,779 - INFO - Loaded data: 332 samples, 13220 features


Index(['TISSUE'], dtype='object')
Index(['TISSUE', 'TISSUE_FFPE', 'TISSUE_Fresh_frozen', 'TISSUE_Snap_frozen'], dtype='object')


2025-02-11 19:55:11,372 - INFO - Found clinical data specification
2025-02-11 19:55:11,389 - INFO - Loaded data: 164 samples, 13220 features


Index(['TISSUE'], dtype='object')
Index(['TISSUE', 'TISSUE_FFPE', 'TISSUE_Fresh_frozen', 'TISSUE_Snap_frozen'], dtype='object')
0.76204157
0.8332548


In [34]:
results_combined = combine_results(results, test_perf)
results_combined.to_csv(os.path.join(FIN_RESULTS_OV_PATH, 'ov_coxPAS.csv'))
results_combined

Unnamed: 0,model,mean,sd,ci_coh1,ci_coh2
0,results_intersect_pdata_model3,0.682149,0.076881,0.76204157,0.8332548
