In [51]:
import torch
import sys
import os
import os.path as osp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import ticker
import matplotlib as mpl
from sklearn.metrics import precision_recall_curve

In [2]:
root_dir = "/home/users/richras/Ge2Net_Repo"
os.chdir(root_dir)

In [3]:
!./ini.sh

set environment variables
All done


In [4]:
os.environ['USER_PATH']='/home/users/richras/Ge2Net_Repo'
os.environ['USER_SCRATCH_PATH']="/scratch/users/richras"
os.environ['IN_PATH']='/scratch/groups/cdbustam/richras/data_in'
os.environ['OUT_PATH']='/scratch/groups/cdbustam/richras/data_out'
os.environ['LOG_PATH']='/scratch/groups/cdbustam/richras/logs/'

In [5]:
%load_ext autoreload
%autoreload 2
from src.utils.dataUtil import load_path, save_file, vcf2npy, get_recomb_rate, interpolate_genetic_pos, form_windows,\
getValueBySelection
from src.utils.modelUtil import Params, load_model
from src.utils.decorators import timer
from src.utils.labelUtil import repeat_pop_arr, getSuperpopBins
from src.models import AuxiliaryTask, LSTM, Attention, BasicBlock, Model_A, Model_B, Model_C
from src.models.distributions import Multivariate_Gaussian
from src.main.evaluation import eval_cp_batch, reportChangePointMetrics, t_prMetrics, cpMethod
from src.main.settings_model import parse_args, MODEL_CLASS
import test

In [6]:
# Specify the dataset to be evaluated
# chm22 pca full dataset and model
labels_path = osp.join(os.environ['OUT_PATH'],'humans/labels/data_id_3_pca')
data_path = osp.join(os.environ['OUT_PATH'],'humans/labels/data_id_3_pca')
models_path=osp.join(os.environ['OUT_PATH'],'humans/training/Model_B_exp_id_4_data_id_3_pca/') 
dataset_type='valid'

In [8]:
# load the params file and run test.py

In [18]:
config={}
config['data.labels']=labels_path 
config['data.dir']=data_path 
config['models.dir']=models_path
config['data.dataset_type']=dataset_type
config['cuda']='cuda'
json_path = osp.join(config['models.dir'], 'params.json')
assert osp.isfile(json_path), "No json configuration file found at {}".format(json_path)
params = Params(json_path)
params.rtnOuts=True
params.mc_dropout=True
params.mc_samples=100
results, test_dataset=test.main(config, params)

 device used: cuda
Loading the datasets...
Finished 'transform_data' in 26.7762 secs
Finished '__init__' in 32.7749 secs
model ['Model_B.model_B'] : AuxiliaryTask.AuxNetwork
model ['Model_B.model_B'] : LSTM.BiRNN
model ['Model_B.model_B'] : BasicBlock.logits_Block
best val loss metrics : {'gcd': None, 'mse': 0.20966243415029062, 'smooth_l1': 0.09892145020944967, 'weighted_loss': 0.3524936370038568, 'loss_main': 0.3524936370038568, 'loss_aux': 0.8679943420903375}
at epoch : 94
train loss metrics: {'gcd': None, 'mse': 0.09787766041908866, 'smooth_l1': 0.046797061544382804, 'weighted_loss': 0.2386510676702706, 'loss_main': 0.21970770299296133, 'loss_aux': 0.8715258090012372}
best val cp metrics : {'loss_cp': 1.3071895907791774e-06, 'Precision': 1.0, 'Recall': 0.4043835616438356, 'Accuracy': 0.9863497688086083, 'A_major': 1.0, 'BalancedAccuracy': 0.7021917808219178}
train cp metrics: {'loss_cp': 2.140980156204126e-06, 'Precision': 1.0, 'Recall': 0.6669736034376919, 'Accuracy': 0.9923624061

In [10]:
test_dataset.data['cps'].shape

torch.Size([3650, 317])

In [11]:
results.t_out.cp_logits.shape, results.t_out.coord_main.shape, results.t_out.y_var.shape

((3650, 317, 1), (3650, 317, 3), (3650, 317, 3))

In [42]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [46]:
@timer
def prMetricsByThresh(method_name, cp_pred_raw, cp_target, steps):
    num_samples = cp_target.shape[0]
    seqlen = cp_target.shape[1]
    min_prob = 0.0
    max_prob = 1.0
    increment = (max_prob - min_prob)/steps
    df=pd.DataFrame(columns=list(t_prMetrics._fields)+['thresh'])
    for thresh in np.arange(min_prob, max_prob + increment, increment):
        prMetrics = reportChangePointMetrics(method_name, cp_pred_raw, cp_target, seqlen, thresh)
        prMetrics['thresh']=thresh
        df=df.append(prMetrics, ignore_index=True)
    return df

In [50]:
df_nn=prMetricsByThresh(cpMethod.neural_network.name, torch.tensor(results.t_out.cp_logits).float(), \
                        test_dataset.data['cps'].unsqueeze(2).float(), 20)
df_nn

Finished 'prMetricsByThresh' in 9.9794 secs


Unnamed: 0,Precision,Recall,Accuracy,A_major,BalancedAccuracy,thresh
0,0.014539,1.0,0.039666,0.026872,0.513436,0.0
1,0.019973,0.989428,0.095478,0.083749,0.536588,0.05
2,0.977078,0.409891,0.986329,0.999846,0.704869,0.1
3,0.99863,0.404431,0.986343,0.999989,0.70221,0.15
4,1.0,0.404384,0.98635,1.0,0.702192,0.2
5,1.0,0.404384,0.98635,1.0,0.702192,0.25
6,1.0,0.404384,0.98635,1.0,0.702192,0.3
7,1.0,0.404384,0.98635,1.0,0.702192,0.35
8,1.0,0.404384,0.98635,1.0,0.702192,0.4
9,1.0,0.404384,0.98635,1.0,0.702192,0.45


In [56]:
precision, recall, thresholds = precision_recall_curve(test_dataset.data['cps'].detach().cpu().numpy().flatten(), \
                                                       results.t_out.cp_logits.flatten())

In [57]:
precision, recall, thresholds

(array([0.01365023, 0.01364938, 0.01364939, ..., 0.        , 0.        ,
        1.        ]),
 array([1.        , 0.99993668, 0.99993668, ..., 0.        , 0.        ,
        0.        ]),
 array([-3.3164587, -3.3088896, -3.3083174, ..., -1.4701847, -1.4203497,
        -1.4023982], dtype=float32))

In [48]:
print(prMetricsByThresh(cpMethod.gradient.name, torch.tensor(results.t_out.coord_main).float(), \
                        test_dataset.data['cps'].unsqueeze(2).float(), 20))

Finished 'prMetricsByThresh' in 9.7473 secs
    Precision    Recall  Accuracy   A_major  BalancedAccuracy  thresh
0    0.014586  1.000000  0.042789  0.030038          0.515019    0.00
1    0.272116  0.876770  0.941270  0.943058          0.909914    0.05
2    0.432153  0.854147  0.969532  0.972404          0.913275    0.10
3    0.544122  0.839733  0.979048  0.982456          0.911094    0.15
4    0.609237  0.829436  0.983503  0.987244          0.908340    0.20
5    0.657852  0.821463  0.986212  0.990198          0.905831    0.25
6    0.687876  0.812147  0.987828  0.992076          0.902111    0.30
7    0.713363  0.803981  0.988939  0.993405          0.898693    0.35
8    0.735169  0.797256  0.989877  0.994518          0.895887    0.40
9    0.756630  0.792530  0.990585  0.995372          0.893951    0.45
10   0.774287  0.785963  0.991073  0.996026          0.890994    0.50
11   0.791523  0.780229  0.991501  0.996602          0.888415    0.55
12   0.808017  0.773052  0.991853  0.997117   

In [49]:
print(prMetricsByThresh(cpMethod.mc_dropout.name, torch.tensor(results.t_out.y_var).float(), \
                        test_dataset.data['cps'].unsqueeze(2).float(), 20))

RuntimeError: Could not infer dtype of NoneType

In [17]:
results.t_out.y_var

array([[[4.08605410e-05, 1.11367030e-04, 8.47110714e-05],
        [1.86296820e-05, 3.08456802e-05, 4.80170966e-05],
        [7.14375028e-06, 1.93669312e-05, 4.94519409e-05],
        ...,
        [1.16939300e-04, 4.82083671e-03, 1.69717390e-02],
        [8.71554366e-05, 4.84260218e-03, 1.16324173e-02],
        [1.37436844e-04, 6.07324066e-03, 2.18241196e-02]],

       [[1.41916913e-04, 4.34661109e-04, 1.98118243e-04],
        [7.95282976e-05, 1.30294502e-04, 2.02928961e-04],
        [8.34068414e-05, 1.42203149e-04, 1.62174430e-04],
        ...,
        [2.00957275e-05, 2.12317245e-05, 2.12823201e-04],
        [2.22179351e-05, 3.06492148e-05, 2.56553642e-04],
        [6.27382469e-05, 2.20274887e-04, 5.75352751e-04]],

       [[1.88451450e-04, 1.07953709e-03, 1.82134460e-03],
        [1.26131839e-04, 3.90142959e-04, 1.14504073e-03],
        [7.57835587e-05, 1.82555072e-04, 6.90431450e-04],
        ...,
        [3.87039006e-04, 2.94174021e-03, 2.76402328e-02],
        [2.37477114e-04, 2.35