In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import neptune
from models import create_compound_model_from_info, create_pytorch_model_from_info, MultiHead
import json
import torch
import os
from utils_neptune import check_if_path_in_struc, get_sub_struc_from_path
from collections import defaultdict

NEPTUNE_API_TOKEN = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxMGM5ZDhiMy1kOTlhLTRlMTAtOGFlYy1hOTQzMDE1YjZlNjcifQ=='


In [2]:
run_id = 'RCC-2925'

task_id = 'Optimized_NIVO-OS ADV EVER-OS__133_finetune'
local_dir = os.path.expanduser('~/Desktop/saved_models')
model_dir = f'{local_dir}/{run_id}/{task_id}'
components_dir = f'{local_dir}/{run_id}/{task_id}/components'
os.makedirs(components_dir, exist_ok=True)

In [13]:
run = neptune.init_run(project='revivemed/RCC',
    api_token= NEPTUNE_API_TOKEN,
    with_id=run_id,
    mode="read-only")   

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/revivemed/RCC/e/RCC-2925


In [15]:
original_kwargs = run[task_id+'/original_kwargs'].fetch()

In [16]:
run.stop()

[neptune] [info   ] Shutting down background jobs, please wait a moment...
[neptune] [info   ] Done!
[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/revivemed/RCC/e/RCC-2925/metadata


In [3]:

run = neptune.init_run(project='revivemed/RCC',
    api_token= NEPTUNE_API_TOKEN,
    with_id=run_id,
    mode="read-only")   
run_struc= run.get_structure()

substruc = get_sub_struc_from_path(run_struc,f'{task_id}/models')
for key in substruc.keys():
    if 'info' in key:
        run[f'{task_id}/models/{key}'].download(f'{components_dir}/{key}.json')
    elif 'state' in key:
        run[f'{task_id}/models/{key}'].download(f'{components_dir}/{key}.pt')

run.stop()

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/revivemed/RCC/e/RCC-2925


Fetching file...: 0 [00:00, ?/s]

[neptune] [info   ] Shutting down background jobs, please wait a moment...
[neptune] [info   ] Done!
[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/revivemed/RCC/e/RCC-2925/metadata


In [4]:
# load the relavent encoder info and head info
head_key = 'Cox_NIVO'
model_files = os.listdir(components_dir)

encoder_info = None
head_info = None
encoder_state = None
head_state = None

for f in model_files:
    if ('encoder' in f):
        if 'info' in f:
            encoder_info = json.load(open(f'{components_dir}/{f}'))
        elif 'state' in f:
            encoder_state = torch.load(f'{components_dir}/{f}')
    if (head_key in f):
        if 'info' in f:
            head_info = json.load(open(f'{components_dir}/{f}'))
            # head_name = f.replace('_info.json', '')
        elif 'state' in f:
            head_state = torch.load(f'{components_dir}/{f}')
            
            
if encoder_info is not None and head_info is not None:            
    model = create_compound_model_from_info(encoder_info=encoder_info, 
                                            head_info= head_info,
                                            encoder_state_dict=encoder_state,
                                            head_state_dict=head_state)
    
    model.save_info(model_dir, f'VAE {head_key} info.json')
    model.save_state_to_path(model_dir, f'VAE {head_key} state.pt')

    skmodel = create_pytorch_model_from_info(full_model=model)



In [6]:
# data_dir = '/app/finetune_data'
data_dir= '/Users/jonaheaton/ReviveMed Dropbox/Jonah Eaton/development_finetune_optimization/April_30_Finetune_Data'
X_data = pd.read_csv(f'{data_dir}/X_finetune_test.csv',index_col=0)
y_data = pd.read_csv(f'{data_dir}/y_finetune_test.csv',index_col=0)

In [11]:
head_info

{'training': True,
 'goal': 'survival',
 'kind': 'Cox',
 'name': 'NIVO OS',
 'y_idx': [0, 1],
 'weight': 1,
 'input_size': 109,
 'output_size': 1,
 'architecture': {'kind': 'Cox',
  'name': 'NIVO OS',
  'weight': 1,
  'y_idx': [0, 1],
  'hidden_size': 4,
  'num_hidden_layers': 1,
  'dropout_rate': 0,
  'activation': 'leakyrelu',
  'use_batch_norm': False,
  'num_classes': 1,
  'input_size': 109},
 'score_func_dict': {'Concordance Index': '<function Cox_Head.__init__.<locals>.<lambda> at 0x7f0f07157670>'},
 'file_id': 'Cox_NIVO OS',
 'loss_reduction': 'mean'}

In [9]:
skmodel.get_params()['model']

CompoundModel(
  (encoder): VAE(
    (encoder): Dense_Layers(
      (network): Sequential(
        (0): Linear(in_features=2736, out_features=162, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
        (2): Dropout(p=0.4, inplace=False)
        (hidden_layer): Sequential(
          (0): Linear(in_features=162, out_features=162, bias=True)
          (1): LeakyReLU(negative_slope=0.01)
          (2): Dropout(p=0.4, inplace=False)
        )
        (output_layer): Sequential(
          (0): Linear(in_features=162, out_features=216, bias=True)
        )
      )
    )
    (decoder): Dense_Layers(
      (network): Sequential(
        (0): Linear(in_features=108, out_features=162, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
        (2): Dropout(p=0.4, inplace=False)
        (hidden_layer): Sequential(
          (0): Linear(in_features=162, out_features=162, bias=True)
          (1): LeakyReLU(negative_slope=0.01)
          (2): Dropout(p=0.4, inplace=False)
        )
      

In [7]:
y_preds = skmodel.predict(X_data.to_numpy())

In [6]:
skmodel.score(X_data.to_numpy(),y_data[['OS','OS_Event']].to_numpy())

{'Concordance Index': 0.6098095902701625}

In [9]:
skmodel.score(X_data.to_numpy(),y_data[['NIVO OS','OS_Event']].to_numpy())

{'Concordance Index': 0.6371973587674248}

In [10]:
skmodel.score(X_data.to_numpy(),y_data[['EVER OS','OS_Event']].to_numpy())

{'Concordance Index': 0.5876379690949227}

In [None]:
def generate_survival_report(task_id,pretrained=True):
    if ('finetune' in task_id) or ('randinit' in task_id):
        finetune_id = task_id
    else:
        if pretrained:
            finetune_id = task_id + '_finetune'
        else:
            finetune_id = task_id + '_randinit'

            

In [25]:
def generate_survival_report(desc_str,head_key,pretrained=True,data_dir=None,local_dir=None):
    if data_dir is None:
        data_dir = '/app/finetune_data'
    if local_dir is None:
        local_dir = os.path.expanduser('~/saved_models')
    if ('OS' not in head_key):
        raise ValueError('head_key must be an OS')
    
    if ('finetune' in desc_str) or ('randinit' in desc_str):
        task_id = desc_str
    else:
        if pretrained:
            task_id = desc_str + '_finetune'
        else:
            task_id = desc_str + '_randinit'

    model_dir = f'{local_dir}/{run_id}/{task_id}'
    components_dir = f'{local_dir}/{run_id}/{task_id}/components'
    os.makedirs(components_dir, exist_ok=True)
    model_files = os.listdir(components_dir)
    
    if (len(model_files) < 4):

        run = neptune.init_run(project='revivemed/RCC',
            api_token= NEPTUNE_API_TOKEN,
            with_id=run_id,
            mode="read-only")   
        run_struc= run.get_structure()

        original_kwargs=run[task_id+'/original_kwargs'].fetch()
        json.dump(original_kwargs,open(f'{components_dir}/original_kwargs.json','w'),indent=4)
        substruc = get_sub_struc_from_path(run_struc,f'{task_id}/models')
        for key in substruc.keys():
            if 'info' in key:
                run[f'{task_id}/models/{key}'].download(f'{components_dir}/{key}.json')
            elif 'state' in key:
                run[f'{task_id}/models/{key}'].download(f'{components_dir}/{key}.pt')

        run.stop()
        model_files = os.listdir(components_dir)


    encoder_info = None
    head_info = None
    encoder_state = None
    head_state = None

    for f in model_files:
        if ('encoder' in f):
            if 'info' in f:
                encoder_info = json.load(open(f'{components_dir}/{f}'))
            elif 'state' in f:
                encoder_state = torch.load(f'{components_dir}/{f}')
        if (head_key in f):
            if 'info' in f:
                head_info = json.load(open(f'{components_dir}/{f}'))
                # head_name = f.replace('_info.json', '')
            elif 'state' in f:
                head_state = torch.load(f'{components_dir}/{f}')
        if 'oringal_kwargs' in f:
            original_kwargs = json.load(open(f'{components_dir}/{f}'))
                
    if encoder_info is not None and head_info is not None:            
        model = create_compound_model_from_info(encoder_info=encoder_info, 
                                                head_info= head_info,
                                                encoder_state_dict=encoder_state,
                                                head_state_dict=head_state)
        
        params = {}
        params['encoder dropout_rate'] = encoder_info['dropout_rate']
        params['head name'] = head_info['name']
        params['head weight'] = head_info['weight']*original_kwargs['train_kwargs']['head_weight']
        params['head layers']= head_info['architecture']['num_hidden_layers']
        params['num auxillary heads'] = len(original_kwargs['head_kwargs_dict']) -1
        
        params['num adversarial heads'] = len(original_kwargs['adversarial_head_kwargs_dict'])
        params['adversary weight'] = original_kwargs['train_kwargs']['adversary_weight']
        params['adversarial_start_epoch'] = original_kwargs['train_kwargs']['adversarial_start_epoch']
        if params['adversary weight'] == 0 or params['num adversarial heads'] == 0:
            params['adversary weight'] = 0
            params['num adversarial heads'] = 0
            params['adversarial_start_epoch'] = 0
        
        params['encoder weight'] = original_kwargs['train_kwargs']['encoder_weight']
        params['learning rate'] = original_kwargs['train_kwargs']['learning_rate']
        params['l1_reg_weight'] = original_kwargs['train_kwargs']['l1_reg_weight']
        params['l2_reg_weight'] = original_kwargs['train_kwargs']['l2_reg_weight']
        params['noise_factor'] = original_kwargs['train_kwargs']['noise_factor']
        params['num_epochs'] = original_kwargs['train_kwargs']['num_epochs']
        params['weight_decay'] = original_kwargs['train_kwargs']['weight_decay']

        
        model.save_info(model_dir, f'VAE {head_key} info.json')
        model.save_state_to_path(model_dir, f'VAE {head_key} state.pt')

        skmodel = create_pytorch_model_from_info(full_model=model)
    else:
        return None


    X_data = pd.read_csv(f'{data_dir}/X_finetune_test.csv',index_col=0)
    y_data = pd.read_csv(f'{data_dir}/y_finetune_test.csv',index_col=0)


    score_dict = {}
    score_dict['both-OS'] = skmodel.score(X_data.to_numpy(),y_data[['OS','OS_Event']].to_numpy())['Concordance Index']
    score_dict['NIVO-OS'] = skmodel.score(X_data.to_numpy(),y_data[['NIVO OS','OS_Event']].to_numpy())['Concordance Index']
    score_dict['EVER-OS'] = skmodel.score(X_data.to_numpy(),y_data[['EVER OS','OS_Event']].to_numpy())['Concordance Index']
    
    res_dict = {
        'test c-index' : score_dict,
        'params' : params
    }


    return res_dict

In [23]:
local_dir

'/Users/jonaheaton/Desktop/saved_models'

In [24]:
data_dir

'/Users/jonaheaton/ReviveMed Dropbox/Jonah Eaton/development_finetune_optimization/April_30_Finetune_Data'

In [None]:
desc_str = 'Optimized_NIVO-OS ADV EVER-OS_finetune'
report_dict = defaultdict({})

NameError: name 'defaultdict' is not defined

In [None]:
desc_str = 'Optimized_NIVO-OS ADV EVER-OS_finetune'
report_dict = defaultdict({})


for head_key in ['NIVO OS', 'EVER OS', 'OS']:
    report_df[head_key] = generate_survival_report(desc_str,head_key,pretrained=True,data_dir=data_dir,local_dir=local_dir)

In [None]:
run= neptune.init_run(
        project='revivemed/RCC',
        api_token= NEPTUNE_API_TOKEN,
        with_id='RCC-2925',
        mode='read-only',
)

run_struc = run.get_structure()

num_trials_key = 'original_kwargs/optimized_study_info/number of total trials'
chosen_trials_key = 'original_kwargs/optimized_study_info/best trial'
res_dict = defaultdict(dict)
run_struc = run.get_structure()
for task_key in run_struc.keys():
    if 'Optimized_' in task_key:
        
        if check_if_path_in_struc(run_struc[task_key],num_trials_key):
            num_trials = run[task_key][num_trials_key].fetch()
            chosen_trial = run[task_key][chosen_trials_key].fetch()
            res_dict[task_key]['num_trials'] = num_trials
            res_dict[task_key]['chosen_trial'] = chosen_trial

        if 'avg' in run_struc[task_key].keys():
            eval_res_dict = run[task_key]['avg'].fetch()
            res_dict[task_key].update(eval_res_dict)


pd.DataFrame(res_dict).T.to_csv('~/Desktop/neptune_results 3.csv')