In [4]:
import os
import sys
import pandas as pd
import numpy as np
import neptune
from neptune.utils import stringify_unsupported
from utils_neptune import start_neptune_run

In [3]:

import sklearn
print('sklearn version: ', sklearn.__version__)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import make_scorer, roc_auc_score, accuracy_score, f1_score, precision_score, recall_score

sklearn version:  1.3.2


In [2]:
data_dir ='/Users/jonaheaton/ReviveMed Dropbox/Jonah Eaton/development_CohortCombination/alignment_RCC_2024_Feb_27/March_22_Data'

In [15]:
X_train = pd.read_csv(os.path.join(data_dir, 'X_finetune_train.csv'),index_col=0)
y_train = pd.read_csv(os.path.join(data_dir, 'y_finetune_train.csv'),index_col=0)
X_val = pd.read_csv(os.path.join(data_dir, 'X_finetune_val.csv'), index_col=0)
y_val = pd.read_csv(os.path.join(data_dir, 'y_finetune_val.csv'), index_col=0)

In [16]:
y_val = y_val['MSKCC BINARY']
y_train = y_train['MSKCC BINARY']

# drop the nans
y_val = y_val.dropna()
X_val = X_val.loc[y_val.index]
y_train = y_train.dropna()
X_train = X_train.loc[y_train.index]

In [26]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_prob = lr.predict_proba(X_val)[:,1]


train_roc_auc = roc_auc_score(y_train, lr.predict_proba(X_train)[:,1], average='micro')
val_roc_auc = roc_auc_score(y_val, y_prob, average='micro')

run, _ = start_neptune_run(with_run_id=None,tags=['v3.1','classical'])
run['finetune_mkscc/kwargs/model_kind'] ='LogisticRegression'
run['finetune_mkscc/eval/train/Binary_MSKCC/AUROC (micro)'] = train_roc_auc
run['finetune_mkscc/eval/val/Binary_MSKCC/AUROC (micro)'] = val_roc_auc

run.stop()

In [27]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_prob = rf.predict_proba(X_val)[:,1]

train_roc_auc = roc_auc_score(y_train, rf.predict_proba(X_train)[:,1], average='micro')
val_roc_auc = roc_auc_score(y_val, y_prob, average='micro')

run, _ = start_neptune_run(with_run_id=None,tags=['v3.1','classical'])
run['finetune_mkscc/kwargs/model_kind'] ='RandomForestClassifier'
run['finetune_mkscc/eval/train/Binary_MSKCC/AUROC (micro)'] = train_roc_auc
run['finetune_mkscc/eval/val/Binary_MSKCC/AUROC (micro)'] = val_roc_auc

run.stop()


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/revivemed/RCC/e/RCC-1426
[neptune] [info   ] Shutting down background jobs, please wait a moment...
[neptune] [info   ] Done!
[neptune] [info   ] Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.
[neptune] [info   ] All 3 operations synced, thanks for waiting!
[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/revivemed/RCC/e/RCC-1426/metadata


In [28]:
svc = SVC(probability=True)
svc.fit(X_train, y_train)
y_prob = svc.predict_proba(X_val)[:,1]

train_roc_auc = roc_auc_score(y_train, svc.predict_proba(X_train)[:,1], average='micro')
val_roc_auc = roc_auc_score(y_val, y_prob, average='micro')

run, _ = start_neptune_run(with_run_id=None,tags=['v3.1','classical'])
run['finetune_mkscc/kwargs/model_kind'] ='SVC'
run['finetune_mkscc/eval/train/Binary_MSKCC/AUROC (micro)'] = train_roc_auc
run['finetune_mkscc/eval/val/Binary_MSKCC/AUROC (micro)'] = val_roc_auc

run.stop()

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/revivemed/RCC/e/RCC-1427
[neptune] [info   ] Shutting down background jobs, please wait a moment...
[neptune] [info   ] Done!
[neptune] [info   ] Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.
[neptune] [info   ] All 3 operations synced, thanks for waiting!
[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/revivemed/RCC/e/RCC-1427/metadata


## Run Evaluation

In [29]:
NEPTUNE_API_TOKEN = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxMGM5ZDhiMy1kOTlhLTRlMTAtOGFlYy1hOTQzMDE1YjZlNjcifQ=='


In [30]:
encoder_kind = 'AE'
project = neptune.init_project(
    project='revivemed/RCC',
    mode="read-only",
    api_token=NEPTUNE_API_TOKEN
)

runs_table_df = project.fetch_runs_table(tag=['v3.1'],state='inactive').to_pandas()

#drop the failed runs
runs_table_df = runs_table_df[~runs_table_df['sys/failed']].copy()

#filter by encoder_kind
runs_table_df = runs_table_df[runs_table_df['pretrain/kwargs/encoder_kind'] == encoder_kind].copy()

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/revivemed/RCC/


Fetching table...: 0 [00:00, ?/s]

In [32]:
runs_table_df.columns

Index(['sys/creation_time', 'sys/description', 'sys/failed', 'sys/hostname',
       'sys/id', 'sys/modification_time', 'sys/monitoring_time', 'sys/name',
       'sys/owner', 'sys/ping_time',
       ...
       'monitoring/fe0263b7/memory', 'monitoring/fe0263b7/pid',
       'monitoring/fe0263b7/stdout', 'monitoring/fe0263b7/tid',
       'monitoring/ffe67bd5/cpu', 'monitoring/ffe67bd5/hostname',
       'monitoring/ffe67bd5/memory', 'monitoring/ffe67bd5/pid',
       'monitoring/ffe67bd5/stdout', 'monitoring/ffe67bd5/tid'],
      dtype='object', length=774)

In [34]:
cols = ['sys/id']
obj_cols = [col for col in runs_table_df.columns if 'objectives/OBJ' in col]
cols.extend(obj_cols)
runs_table_df[cols].head()

Unnamed: 0,sys/id,objectives/OBJ 0-1-1-1 (v0),objectives/OBJ 1-0-0-0 (v0),objectives/OBJ 1-0-0-1 (v0),objectives/OBJ 1-0-0-10 (v0),objectives/OBJ 1-1-0-1 (v0),objectives/OBJ 1-1-1-0 (v0),objectives/OBJ 1-1-1-1 (v0),objectives/OBJ 1-1-1-10 (v0),objectives/OBJ 10-0-0-1 (v0),objectives/OBJ 10-1-1-1 (v0),objectives/OBJ no Adv (v0),objectives/OBJ only recon (v0),objectives/OBJ4 equal weights (v0),objectives/OBJECTIVE equal weights
4,RCC-1424,0.99831,-0.704315,-0.206005,-0.206005,-0.206005,0.793995,0.293995,-4.206005,-0.206005,-6.044841,,,,
5,RCC-1423,0.616359,-161.654419,-161.538059,-161.538059,-161.538059,-160.538059,-161.038059,-165.538059,-161.538059,-1615.92783,,,,
6,RCC-1422,0.991053,-0.589987,-0.098934,-0.098934,-0.098934,0.901066,0.401066,-4.098934,-0.098934,-4.908821,,,,
7,RCC-1421,0.995662,-0.691467,-0.195805,-0.195805,-0.195805,0.804195,0.304195,-4.195805,-0.195805,-5.919007,,,,
8,RCC-1420,0.993543,-0.593849,-0.100307,-0.100307,-0.100307,0.899693,0.399693,-4.100307,-0.100307,-4.944951,,,,
