In [1]:
# Will reload modeules after this when they change!
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import itertools
import functools
import importlib
import os
from IPython.display import Image

from nl_00_hmdb_pre import main_loop_0
from sklearn.decomposition import PCA
import umap

from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde

# RDKit
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw

import warnings

def action_with_warnings():
    warnings.warn("should not appear")

In [3]:
# Regression:
from nl_03_filter_model_score_reg import filter_split_model_score

In [None]:
# Binary classification:
from nl_03_filter_model_score import filter_split_model_score

### This script runs the neutral loss scripts for MALDI IMS data

Steps:
1. Generation of core metabolome database.  See:
    PycharmProjects/core_metabolome/core_metabolome_db.ipynb
2. Manual curations of core_metabolome_v2 --> core_metabolome_v3

1. nl_00 Calculates Mordred descriptors and FP4 fingerprints as bits.
2. nl_01 Filter HMDB_db for observed and parses METASPACE output.
3. nl_02 Joins HMDB_db and METASPACE output.
4. nl_03 Filters and searchs with direct and machine learning models.

### To do:
5. nl_04 deep learning: executing on toy data, change to regression

### Observations:
1. 2912/3333 datasets a) did not error and b) had at least one id.  
2. 2543/2868 datasets w/10 or more ID's.  Obs: 557,508/559,002
3. 644,094 values identifications, the dataset with the most identifications had 5,820 and mean was 221 (median 131).  Re calculate...
4. The data did not pass the Shapiro-Wilk test for normality, and nonparametrical statistics should be used.

# Off-line steps:
### Generation of core metabolome database.
http://localhost:8888/notebooks/PycharmProjects/core_metabolome/core_metabolome_db.ipynb
### Identification of good quality datasets.
http://localhost:8888/notebooks/PycharmProjects/neutral_loss/good_nl_reports/high_quality_data_investigations.ipynb
### Good datasets were researched from the beta server.  Run 1x w/ "reprocess = True" and "reprocess_not_downloading = True".  Run again w/False.
http://localhost:8888/notebooks/PycharmProjects/neutral_loss/good_nl_reports/Beta_server_neutral_losses_mass_search.ipynb

In [4]:
### Off-line variables:
core_metabolome = '/Users/dis/PycharmProjects/core_metabolome/core_metabolome_v3.pickle'
good_ds_ids = '/Users/dis/PycharmProjects/neutral_loss/good_nl_reports/good_ds_2020_Feb_25.txt'
beta_variables = {'NEUTRAL_LOSSES': ['-H2O'], 'MAX_FDR': 0.5, 'molDBs': 'core_metabolome_v3'}
path_to_reports = '/Users/dis/PycharmProjects/neutral_loss/good_nl_reports/good_nl_reports_core_metabolome_v3'

In [5]:
### Off-line steps:

# Generation of core metabolome database.  See:
'http://localhost:8888/notebooks/PycharmProjects/core_metabolome/core_metabolome_db.ipynb'
core_metabolome = '/Users/dis/PycharmProjects/core_metabolome/core_metabolome_v3.pickle'

# Identification of good quality datasets.
'http://localhost:8888/notebooks/PycharmProjects/neutral_loss/good_nl_reports/high_quality_data_investigations.ipynb'
good_ds_ids = '/Users/dis/PycharmProjects/neutral_loss/good_nl_reports/good_ds_2020_Feb_25.txt'

# Good datasets were researched from the beta server.  Run 1x w/ "reprocess = True" and
# "reprocess_not_downloading = True".  Run again w/False.  See:
'http://localhost:8888/notebooks/PycharmProjects/neutral_loss/good_nl_reports/Beta_server_neutral_losses_mass_search.ipynb'
beta_variables = {'NEUTRAL_LOSSES': ['-H2O'], 'MAX_FDR': 0.5, 'molDBs': 'core_metabolome_v3'}
path_to_reports = 'tbd'

In [6]:
# nl_00: Generates FP4 descriptors ('bits') and Mordred descriptors. 
# 777 seconds to reprocess everything on 11k. 22s without reprocessing.

db_out_0 = 'databases/core_metabolome_out.pickle'
db_in_0 = core_metabolome
dfs = main_loop_0(db_out_0, db_in_0, True, False)

db_df = dfs[0]
bits_df = dfs[1]
mord_norm_df = dfs[2]

Elapsed time:

20.17292070388794

Executed without error

databases/core_metabolome_out.pickle


In [15]:
db_df.to_pickle('/Users/dis/PycharmProjects/neutral_loss/databases/core_metabolome_out.pickle') # 0.1 Gb
bits_df.to_pickle('/Users/dis/PycharmProjects/neutral_loss/databases/bits_df.pickle')  #0.2 Gb
mord_norm_df.to_pickle('/Users/dis/PycharmProjects/neutral_loss/databases/mord_norm_df.pickle')  # 0.8 Gb

In [7]:
### Wait for runs to complete on beta server!

In [9]:
# Merge pickles for each dataset.
path_to_reports = '/Users/dis/PycharmProjects/neutral_loss/good_nl_reports/good_nl_reports_core_metabolome_v3'
master_df = pd.DataFrame()
counter = 0

for root, dirs, files in os.walk(path_to_reports):
    for file in files:
        if file.endswith(".pickle"):
            if counter % 50 == 0:
                print(counter)
            counter +=1
            current_df = pd.read_pickle(root + '/' + file)
            master_df = pd.concat([master_df, current_df], sort=True)

0
50
100
150
200
250
300


In [12]:
# Water only.  Edit for cases where neutral losses other than water under investigation.
terse_headers_0 = ['formula', 'adduct', 'ds_id', 'has_no_loss', 'has_H2O',
                   'msm', 'fdr', 'off_sample', 'hmdb_ids', 'intensity_avg',
                   'msm_H2O', 'fdr_H2O', 'off_sample_H2O', 'intensity_avg_H2O',
                   'colocalization_H2O', 'loss_intensity_ratio_H2O', 'loss_intensity_share_H2O',
                  'ion', 'ion_H2O', 'ion_formula', 'ion_formula_H2O']

master_df = master_df[terse_headers_0].copy(deep=True)
ds_id_meta = pd.read_pickle('/Users/dis/PycharmProjects/neutral_loss/good_nl_reports/ds_id_meta.pickle')
master_df = master_df.merge(ds_id_meta, left_on='ds_id', right_on= 'datasetId', how='left')
master_df.drop(columns=['datasetId']).to_pickle('high_quality_cm3.pickle')

Exploratory data analysis experimental space:
http://localhost:8888/notebooks/PycharmProjects/neutral_loss/Exploratory_data_analysis.ipynb

Exploraatory data analysis chemical space:
http://localhost:8888/notebooks/PycharmProjects/neutral_loss/Exploratory_data_analysis_chemical_space.ipynb#

In [6]:
# Loads pickles from previous step
# For QC, don't need to run for reprocess.
input_df = pd.read_pickle('high_quality_cm3.pickle')
hmdb_df = pd.read_pickle('/Users/dis/PycharmProjects/neutral_loss/databases/core_metabolome_out.pickle') # 0.1 Gb
mord_norm_df = pd.read_pickle('/Users/dis/PycharmProjects/neutral_loss/databases/mord_norm_df.pickle')  # 0.8 Gb
bits_df = pd.read_pickle('/Users/dis/PycharmProjects/neutral_loss/databases/bits_df.pickle')  #0.2 Gb

In [5]:
### nl_01 to nl_03 need to be updated after exploratory data analysis is finished!!!

In [15]:
# nl_01: Parses METASPACE output and filters HMDB for observed
%run nl_01_preprocess.py -m high_quality_cm3.pickle -p /Users/dis/PycharmProjects/neutral_loss/databases/core_metabolome_out.pickle -is_H2O True

Elapsed time (seconds):

3

Executed without error

high_quality_cm3_output_01.pickle
high_quality_cm3_hmdb_01.pickle


In [19]:
# Loads pickles from previous step, 3 seconds
# For QC, don't need to run for reprocess.
output_1 = pd.read_pickle('high_quality_cm3_output_01.pickle')
hmdb_1 = pd.read_pickle('high_quality_cm3_hmdb_01.pickle')

In [21]:
# nl_02: Joins METASPACE output and observed HMDB, 40 seconds
%run nl_02_join.py --o high_quality_cm3_output_01.pickle --h high_quality_cm3_hmdb_01.pickle

Elapsed time:


Executed without error

39.665668964385986
high_quality_cm3_output_02.pickle


In [22]:
# Load X's if running machine learning
join_df_path = 'high_quality_cm3_output_02.pickle' 
join_df = pd.read_pickle(join_df_path)  # 0.5 Gb
unique_hmdbs = list(join_df.hmdb_ids.unique())

mord_df = pd.read_pickle('/Users/dis/PycharmProjects/neutral_loss/databases/mord_norm_df.pickle')
mord_df = mord_df[mord_df.hmdb_ids.isin(unique_hmdbs)].copy(deep=True)

bits_df = pd.read_pickle('/Users/dis/PycharmProjects/neutral_loss/databases/bits_df.pickle')  
bits_df = bits_df[bits_df.hmdb_ids.isin(unique_hmdbs)].copy(deep=True)

In [25]:
join_df.analyzer.unique()

array(['FTICR', 'Orbitrap'], dtype=object)

In [26]:
# Can use this to filter on inferred vaccuum maldi vs ap maldi
all_ds = list(join_df.ds_id.unique())
ap_ds = list(join_df[join_df.analyzer == 'FTICR'].ds_id.unique())
vac_ds = list(join_df[join_df.analyzer != 'FTICR'].ds_id.unique())

In [30]:
# nl_03: Filtering and machine learning models
datasets = ['all_ds', 'ap_ds', 'vac_ds']
ds_dict = {'all_ds': all_ds, 'ap_ds': ap_ds, 'vac_ds': vac_ds}
dataset = [datasets[0]]
target = ['H2O'] # ['H2O']
polarity = [1] # {1:positive, -1:negative] 
fdrs = [0.20, 0.10, 0.05] # [0.20, 0.10, 0.05]
colocalizations = [0.00, 0.50, 0.75] # [0.00, 0.50, 0.75]
min_n_obs = [1, 4, 10] # [1, 4, 10]
one_id_only = [False] # [True, False]

# Outputs (y), and weights (w):
global_ys = ['n_loss_only_H2O', 'n_loss_wparent_H2O', 'n_loss_all_H2O']
y = ['n_loss_wparent_H2O']
w = ['weight']
w_norms = [False, '10_y_bins', 'n_obs', 'isobar',
          '10_y_bins_W_n_obs', '10_y_bins_W_isobar', 'n_obs_W_isobar',
          '10_y_bins_W_n_obs_W_isobar']
w_norm = [False, '10_y_bins', 'n_obs', 'isobar',
          '10_y_bins_W_n_obs', '10_y_bins_W_isobar', 'n_obs_W_isobar',
          '10_y_bins_W_n_obs_W_isobar']

# Class of model:
models = ['direct', 'ml', 'deepchem']
model = [models[1]]

# Split.  If true, any formula only appears in train/test/validate.
single_fold_group = True

filter_model_df = pd.DataFrame(columns=['target', 'polarity', 'fdr', 'coloc', 'min_n_obs'
                                       'model', 'submodel', 'params', 'one_id_only',
                                        'X', 'y', 'w'])
# Specific inputs (X) for each model:
if model[0] is 'direct':
    submodel = ['']
    params = ['']
    Xs = ['trues', 'falses', 'rando', 'H2O_Present', 'n_loss_wparent_H2O']
    X = Xs                           
elif model[0] is 'ml':
    submodels = ['random_forest', 'XGBoost', 'SVM']
    submodel = [submodels[1]]
    params = ['']
    Xs = ['mord_norm', 'bits'] 
    X = ['mord_norm', 'bits']
elif model[0] is 'deepchem':
    submodels = [('GraphConvModel', 'GraphConv'), 
              ('WeaveModel', 'Weave'), 
              ('MPNNModel','Weave') ] 
    submodel = [submodels[0]]    
    params = ['']
    Xs = ['Molecule', 'Smiles']
    X = [Xs[0]]          
    
model_list = list(itertools.product(*[target, polarity, fdrs, colocalizations, min_n_obs,
                                      model, submodel, params, one_id_only,
                                      X, y, w, w_norm, dataset]))
print()
filter_param_df = pd.DataFrame(model_list, columns=['target', 'polarity', 'fdr', 'coloc', 'min_n_obs',
                                                    'model', 'submodel', 'params', 'one_id_only',
                                                    'X', 'y', 'w', 'w_norm', 'dataset'])

filter_param_df




Unnamed: 0,target,polarity,fdr,coloc,min_n_obs,model,submodel,params,one_id_only,X,y,w,w_norm,dataset
0,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,n_loss_wparent_H2O,weight,False,all_ds
1,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,n_loss_wparent_H2O,weight,10_y_bins,all_ds
2,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,n_loss_wparent_H2O,weight,n_obs,all_ds
3,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,n_loss_wparent_H2O,weight,isobar,all_ds
4,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,n_loss_wparent_H2O,weight,10_y_bins_W_n_obs,all_ds
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,H2O,1,0.05,0.75,10,ml,XGBoost,,False,bits,n_loss_wparent_H2O,weight,isobar,all_ds
428,H2O,1,0.05,0.75,10,ml,XGBoost,,False,bits,n_loss_wparent_H2O,weight,10_y_bins_W_n_obs,all_ds
429,H2O,1,0.05,0.75,10,ml,XGBoost,,False,bits,n_loss_wparent_H2O,weight,10_y_bins_W_isobar,all_ds
430,H2O,1,0.05,0.75,10,ml,XGBoost,,False,bits,n_loss_wparent_H2O,weight,n_obs_W_isobar,all_ds


In [31]:
model_results = filter_split_model_score(filter_param_df, join_df_path, mord_df, bits_df,
                                         single_fold_group, ds_dict[dataset[0]])

start 0
filtering 0
aggregating 0
checking aggregation: 
             hmdb_ids     y  n_obs
12790   HMDB0049676+K   0.0      1
16575   HMDB0114925+K   0.0     45
3163    HMDB0007861+H   3.0      5
5708   HMDB0008995+Na  30.0     43
7252    HMDB0010357+H   1.0      8
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_40_38_960098
Printing histogram!
hist_plots/2020_04_06_14_40_39_132751
joining X 0
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_40_39_930605
splitting 0


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8692, 1826)
(4115, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 70, 'X': 3}
modeling 0
ml :  XGBoost
train_y: (8692,)
train_w: (8692,)
train_X: (8692, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_42_31_150253
start 1
filtering 1
aggregating 1
checking aggregation: 
             hmdb_ids     y  n_obs
1477    HMDB0003229+H   1.0      9
663     HMDB0000706+K   0.0      1
7333    HMDB0010400+H   1.0     43
10796  HMDB0042589+Na   0.0      3
8584    HMDB0011511+H  12.0     49
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_42_33_639800
Printing histogram!
hist_plots/2020_04_06_14_42_33_809770
joining X 1
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_42_34_525034
splitting 1


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8452, 1826)
(4564, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 68, 'X': 4}
modeling 1
ml :  XGBoost
train_y: (8452,)
train_w: (8452,)
train_X: (8452, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_44_19_667845
start 2
filtering 2
aggregating 2
checking aggregation: 
             hmdb_ids     y  n_obs
12015  HMDB0046099+Na   0.0      5
11142   HMDB0043854+H   0.0     66
4832    HMDB0008518+H  13.0     21
15019   HMDB0062758+K   0.0      2
16836  HMDB0115067+Na   0.0      3
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_44_22_024027
Printing histogram!
hist_plots/2020_04_06_14_44_22_192663
joining X 2
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_44_22_930625
splitting 2


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8917, 1826)
(4358, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 71, 'X': 9}
modeling 2
ml :  XGBoost
train_y: (8917,)
train_w: (8917,)
train_X: (8917, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_46_16_983509
start 3
filtering 3
aggregating 3
checking aggregation: 
             hmdb_ids     y  n_obs
3570   HMDB0008014+Na  67.0    177
11788  HMDB0045391+Na   3.0      8
8066    HMDB0011231+H   0.0      3
11593   HMDB0044754+K   3.0     80
14458   HMDB0055789+K   0.0      1
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_46_19_256918
Printing histogram!
hist_plots/2020_04_06_14_46_19_470636
joining X 3
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_46_20_234546
splitting 3


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9145, 1826)
(4023, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 68, 'X': 3}
modeling 3
ml :  XGBoost
train_y: (9145,)
train_w: (9145,)
train_X: (9145, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_48_16_657519
start 4
filtering 4
aggregating 4
checking aggregation: 
             hmdb_ids    y  n_obs
16303   HMDB0112785+K  0.0      1
15797  HMDB0112425+Na  2.0      2
2032   HMDB0006031+Na  0.0      3
3096    HMDB0007709+K  0.0     15
15956   HMDB0112520+H  7.0      9
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_48_18_913938
Printing histogram!
hist_plots/2020_04_06_14_48_19_081002
joining X 4
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_48_19_796419
splitting 4


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8672, 1826)
(4070, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 70, 'X': 6}
modeling 4
ml :  XGBoost
train_y: (8672,)
train_w: (8672,)
train_X: (8672, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_50_15_009077
start 5
filtering 5
aggregating 5
checking aggregation: 
             hmdb_ids    y  n_obs
17332   HMDB0115519+H  5.0     62
7864    HMDB0010681+H  0.0     53
5643   HMDB0008962+Na  1.0      3
2936    HMDB0007516+H  1.0      2
7628    HMDB0010585+K  2.0      2
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_50_17_313610
Printing histogram!
hist_plots/2020_04_06_14_50_17_492413
joining X 5
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_50_18_248377
splitting 5


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9109, 1826)
(4025, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 71, 'X': 10}
modeling 5
ml :  XGBoost
train_y: (9109,)
train_w: (9109,)
train_X: (9109, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_52_16_597872
start 6
filtering 6
aggregating 6
checking aggregation: 
             hmdb_ids     y  n_obs
4049    HMDB0008184+K  23.0     97
16855   HMDB0115075+K   2.0     12
4461   HMDB0008342+Na  25.0    110
3949   HMDB0008147+Na  30.0    175
12666   HMDB0049281+K   0.0      1
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_52_18_844832
Printing histogram!
hist_plots/2020_04_06_14_52_19_020922
joining X 6
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_52_19_749047
splitting 6


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8474, 1826)
(4876, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 68, 'X': 7}
modeling 6
ml :  XGBoost
train_y: (8474,)
train_w: (8474,)
train_X: (8474, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_54_12_012230
start 7
filtering 7
aggregating 7
checking aggregation: 
             hmdb_ids     y  n_obs
11844   HMDB0045524+H   0.0      1
907     HMDB0001008+H   3.0     83
16274   HMDB0112769+K   1.0     12
3661    HMDB0008046+K  91.0    140
11660  HMDB0044937+Na   0.0      1
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_54_14_318003
Printing histogram!
hist_plots/2020_04_06_14_54_14_484856
joining X 7
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_54_15_225472
splitting 7


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9172, 1826)
(4026, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 66, 'X': 8}
modeling 7
ml :  XGBoost
train_y: (9172,)
train_w: (9172,)
train_X: (9172, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_56_12_558127
start 8
filtering 8
aggregating 8
checking aggregation: 
             hmdb_ids    y  n_obs
9525    HMDB0013444+K  0.0     14
3291    HMDB0007904+K  1.0    170
13019  HMDB0050325+Na  1.0      8
11332   HMDB0044220+K  0.0      1
15471  HMDB0112105+Na  0.0      2
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_56_14_890317
Printing histogram!
hist_plots/2020_04_06_14_56_15_064667
joining X 8
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_56_15_503023
splitting 8


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8317, 1024)
(4826, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 63, 'X': 407207}
modeling 8
ml :  XGBoost
train_y: (8317,)
train_w: (8317,)
train_X: (8317, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_57_05_762589
start 9
filtering 9
aggregating 9
checking aggregation: 
             hmdb_ids    y  n_obs
16042   HMDB0112560+K  0.0      1
11964  HMDB0045905+Na  5.0     90
10306  HMDB0036219+Na  1.0      1
9025    HMDB0012359+K  0.0      2
15888  HMDB0112479+Na  3.0     10
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_57_08_007479
Printing histogram!
hist_plots/2020_04_06_14_57_08_173671
joining X 9
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_57_08_579086
splitting 9


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8870, 1024)
(4581, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 65, 'X': 413354}
modeling 9
ml :  XGBoost
train_y: (8870,)
train_w: (8870,)
train_X: (8870, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_58_02_279569
start 10
filtering 10
aggregating 10
checking aggregation: 
             hmdb_ids      y  n_obs
5810   HMDB0009057+Na   97.0    135
10737  HMDB0042480+Na    0.0      1
3637    HMDB0008038+H  103.0    175
10542  HMDB0041683+Na    5.0      9
14182   HMDB0054879+K    0.0      3
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_58_04_435681
Printing histogram!
hist_plots/2020_04_06_14_58_04_600224
joining X 10
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_58_05_001172
splitting 10


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8821, 1024)
(4577, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 73, 'X': 407656}
modeling 10
ml :  XGBoost
train_y: (8821,)
train_w: (8821,)
train_X: (8821, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_58_58_181841
start 11
filtering 11
aggregating 11
checking aggregation: 
             hmdb_ids     y  n_obs
16620   HMDB0114948+H   0.0      4
4677    HMDB0008438+H  92.0    110
2666    HMDB0007307+H   1.0      6
778    HMDB0000827+Na   0.0      3
12978   HMDB0050256+H   0.0     47
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_59_00_348858
Printing histogram!
hist_plots/2020_04_06_14_59_00_534659
joining X 11
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_59_00_978240
splitting 11


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8991, 1024)
(4684, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 69, 'X': 515136}
modeling 11
ml :  XGBoost
train_y: (8991,)
train_w: (8991,)
train_X: (8991, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_14_59_55_683532
start 12
filtering 12
aggregating 12
checking aggregation: 
             hmdb_ids     y  n_obs
7797    HMDB0010653+H   0.0      3
17192  HMDB0115328+Na  46.0     74
7739   HMDB0010629+Na   0.0      1
13472  HMDB0051734+Na   0.0     13
6940    HMDB0009821+H   0.0      9
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_14_59_57_881816
Printing histogram!
hist_plots/2020_04_06_14_59_58_045634
joining X 12
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_14_59_58_448031
splitting 12


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8994, 1024)
(4203, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 69, 'X': 392589}
modeling 12
ml :  XGBoost
train_y: (8994,)
train_w: (8994,)
train_X: (8994, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_00_52_890645
start 13
filtering 13
aggregating 13
checking aggregation: 
             hmdb_ids    y  n_obs
17637      msmls126+H  2.0      2
11041  HMDB0043318+Na  3.0      9
11111  HMDB0043597+Na  0.0      2
8530   HMDB0011489+Na  1.0     23
16595   HMDB0114932+H  0.0      4
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_00_55_066036
Printing histogram!
hist_plots/2020_04_06_15_00_55_229810
joining X 13
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_00_55_643719
splitting 13


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8414, 1024)
(5214, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 53, 'X': 508070}
modeling 13
ml :  XGBoost
train_y: (8414,)
train_w: (8414,)
train_X: (8414, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_01_46_841767
start 14
filtering 14
aggregating 14
checking aggregation: 
             hmdb_ids    y  n_obs
12375   HMDB0047937+H  0.0      8
11714   HMDB0045152+K  0.0     12
2990    HMDB0007565+K  0.0      2
12762   HMDB0049625+K  0.0      1
12340  HMDB0047822+Na  0.0      8
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_01_49_039568
Printing histogram!
hist_plots/2020_04_06_15_01_49_203930
joining X 14
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_01_49_615143
splitting 14


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8858, 1024)
(4223, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 65, 'X': 351052}
modeling 14
ml :  XGBoost
train_y: (8858,)
train_w: (8858,)
train_X: (8858, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_02_43_038031
start 15
filtering 15
aggregating 15
checking aggregation: 
             hmdb_ids     y  n_obs
851     HMDB0000913+H   0.0      1
1561    HMDB0003555+H   0.0      1
12520  HMDB0048708+Na   0.0      1
6772    HMDB0009643+H  83.0    102
13326   HMDB0051298+K   0.0      1
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_02_45_240054
Printing histogram!
hist_plots/2020_04_06_15_02_45_403835
joining X 15
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_02_45_818987
splitting 15


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7959, 1024)
(4880, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 65, 'X': 395274}
modeling 15
ml :  XGBoost
train_y: (7959,)
train_w: (7959,)
train_X: (7959, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_03_34_668001
start 16
filtering 16
aggregating 16
checking aggregation: 
             hmdb_ids    y  n_obs
8021    HMDB0011214+H  0.0     12
12177  HMDB0046725+Na  0.0      1
14461   HMDB0055802+H  0.0      1
1824   HMDB0005361+Na  0.0     38
3310   HMDB0007910+Na  0.0      2
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_03_36_918398
Printing histogram!
hist_plots/2020_04_06_15_03_37_085744
joining X 16
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_03_37_681961
splitting 16


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5154, 1826)
(3432, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 71, 'X': 4}
modeling 16
ml :  XGBoost
train_y: (5154,)
train_w: (5154,)
train_X: (5154, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_04_41_891499
start 17
filtering 17
aggregating 17
checking aggregation: 
             hmdb_ids    y  n_obs
15028   HMDB0062784+K  0.0      3
9863    HMDB0029486+H  3.0      6
13787   HMDB0053463+K  0.0     12
14009  HMDB0054203+Na  1.0     69
12557  HMDB0048895+Na  6.0      6
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_04_44_130067
Printing histogram!
hist_plots/2020_04_06_15_04_44_294114
joining X 17
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_04_44_847454
splitting 17


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5631, 1826)
(2958, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 66, 'X': 0}
modeling 17
ml :  XGBoost
train_y: (5631,)
train_w: (5631,)
train_X: (5631, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_05_54_540928
start 18
filtering 18
aggregating 18
checking aggregation: 
             hmdb_ids    y  n_obs
13347   HMDB0051331+H  0.0      1
11620  HMDB0044874+Na  0.0    119
8553    HMDB0011498+H  1.0      4
4975   HMDB0008586+Na  1.0     39
7301    HMDB0010389+H  0.0      4
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_05_56_718346
Printing histogram!
hist_plots/2020_04_06_15_05_56_890906
joining X 18
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_05_57_449466
splitting 18


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5415, 1826)
(2760, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 76, 'X': 5}
modeling 18
ml :  XGBoost
train_y: (5415,)
train_w: (5415,)
train_X: (5415, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_07_04_411765
start 19
filtering 19
aggregating 19
checking aggregation: 
             hmdb_ids    y  n_obs
3164    HMDB0007861+K  0.0    163
3404    HMDB0007945+K  4.0    107
4208    HMDB0008248+H  4.0     10
2896   HMDB0007486+Na  4.0      4
15914   HMDB0112497+H  8.0     55
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_07_06_590584
Printing histogram!
hist_plots/2020_04_06_15_07_06_757227
joining X 19
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_07_07_313296
splitting 19


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5635, 1826)
(2804, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 67, 'X': 3}
modeling 19
ml :  XGBoost
train_y: (5635,)
train_w: (5635,)
train_X: (5635, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_08_14_442488
start 20
filtering 20
aggregating 20
checking aggregation: 
            hmdb_ids    y  n_obs
6125   HMDB0009225+K  5.0     34
16680  HMDB0114978+H  0.0      3
12581  HMDB0049040+H  0.0     73
12907  HMDB0050093+H  0.0     47
2323   HMDB0007108+K  2.0     27
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_08_16_672375
Printing histogram!
hist_plots/2020_04_06_15_08_16_838361
joining X 20
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_08_17_394186
splitting 20


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5713, 1826)
(2943, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 77, 'X': 4}
modeling 20
ml :  XGBoost
train_y: (5713,)
train_w: (5713,)
train_X: (5713, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_09_29_168045
start 21
filtering 21
aggregating 21
checking aggregation: 
             hmdb_ids     y  n_obs
2738   HMDB0007357+Na  60.0    105
14734   HMDB0061640+H  10.0     10
12837   HMDB0049766+H   0.0      8
9720    HMDB0028910+K   0.0      1
6250    HMDB0009276+K   2.0      3
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_09_31_360823
Printing histogram!
hist_plots/2020_04_06_15_09_31_528975
joining X 21
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_09_32_082378
splitting 21


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5830, 1826)
(2940, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 73, 'X': 7}
modeling 21
ml :  XGBoost
train_y: (5830,)
train_w: (5830,)
train_X: (5830, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_10_43_357502
start 22
filtering 22
aggregating 22
checking aggregation: 
             hmdb_ids    y  n_obs
15231   HMDB0094101+K  0.0      5
8732   HMDB0011586+Na  1.0      2
10195   HMDB0033984+H  0.0     19
17378   HMDB0115534+K  0.0      2
16714   HMDB0114997+H  2.0     13
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_10_45_545681
Printing histogram!
hist_plots/2020_04_06_15_10_45_711319
joining X 22
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_10_46_277508
splitting 22


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5613, 1826)
(2950, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 67, 'X': 2}
modeling 22
ml :  XGBoost
train_y: (5613,)
train_w: (5613,)
train_X: (5613, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_11_55_924263
start 23
filtering 23
aggregating 23
checking aggregation: 
             hmdb_ids      y  n_obs
2417   HMDB0007159+Na   30.0     33
5025    HMDB0008603+K    0.0      4
17179   HMDB0115324+K    0.0      1
14735  HMDB0061640+Na    0.0      7
4516    HMDB0008368+H  124.0    175
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_11_58_104650
Printing histogram!
hist_plots/2020_04_06_15_11_58_271310
joining X 23
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_11_58_831851
splitting 23


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5662, 1826)
(2569, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 70, 'X': 2}
modeling 23
ml :  XGBoost
train_y: (5662,)
train_w: (5662,)
train_X: (5662, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_13_11_739167
start 24
filtering 24
aggregating 24
checking aggregation: 
             hmdb_ids     y  n_obs
4198    HMDB0008242+K  16.0    163
5877    HMDB0009087+H   0.0      1
9438   HMDB0013410+Na   0.0      1
14335   HMDB0055478+K   0.0      3
3147   HMDB0007850+Na   6.0      6
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_13_14_075249
Printing histogram!
hist_plots/2020_04_06_15_13_14_252778
joining X 24
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_13_14_625568
splitting 24
X_df_shapes:
(5835, 1024)
(2847, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 67, 'X': 236929}
modeling 24
ml :  XGBoost
train_y: (5835,)
train_w: (5835,)
train_X: (5835, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_13_50_231041
start 25
filtering 25
aggregating 25
checking aggregation: 
             hmdb_ids    y  n_obs
1639    HMDB0004362+H  0.0      1
8917   HMDB0012221+Na  4.0      5
7914    HMDB0010731+K  1.0      1
17205   HMDB0115334+H  0.0     11
16213   HMDB0112712+H  0.0     34
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_13_52_435150
Printing histogram!
hist_plots/2020_04_06_15_13_52_624784
joining X 25
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_13_52_963096
splitting 25
X_df_shapes:
(5214, 1024)
(3001, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 61, 'X': 184295}
modeling 25
ml :  XGBoost
train_y: (5214,)
train_w: (5214,)
train_X: (5214, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_14_24_764980
start 26
filtering 26
aggregating 26
checking aggregation: 
             hmdb_ids    y  n_obs
16599   HMDB0114934+K  9.0    136
525    HMDB0000546+Na  0.0      3
12842  HMDB0049781+Na  0.0      8
16264   HMDB0112765+H  7.0      9
7919    HMDB0010733+K  0.0      1
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_14_26_979727
Printing histogram!
hist_plots/2020_04_06_15_14_27_144382
joining X 26
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_14_27_480488
splitting 26
X_df_shapes:
(5614, 1024)
(2694, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 73, 'X': 208788}
modeling 26
ml :  XGBoost
train_y: (5614,)
train_w: (5614,)
train_X: (5614, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_15_04_281054
start 27
filtering 27
aggregating 27
checking aggregation: 
            hmdb_ids    y  n_obs
8220   HMDB0011292+H  1.0     95
1730   HMDB0004948+H  3.0      5
16806  HMDB0115051+H  1.0      2
13948  HMDB0053975+K  0.0      3
7998   HMDB0011206+K  9.0     27
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_15_06_471858
Printing histogram!
hist_plots/2020_04_06_15_15_06_641702
joining X 27
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_15_06_995827
splitting 27


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5596, 1024)
(2647, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 68, 'X': 184027}
modeling 27
ml :  XGBoost
train_y: (5596,)
train_w: (5596,)
train_X: (5596, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_15_43_527106
start 28
filtering 28
aggregating 28
checking aggregation: 
             hmdb_ids    y  n_obs
1584    HMDB0003797+K  0.0      3
786     HMDB0000848+K  0.0     44
4390    HMDB0008315+H  1.0     11
17170   HMDB0115317+H  0.0     10
12052  HMDB0046235+Na  0.0      1
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_15_45_716945
Printing histogram!
hist_plots/2020_04_06_15_15_45_882830
joining X 28
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_15_46_222248
splitting 28
X_df_shapes:
(6080, 1024)
(2761, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 71, 'X': 255330}
modeling 28
ml :  XGBoost
train_y: (6080,)
train_w: (6080,)
train_X: (6080, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_16_22_989242
start 29
filtering 29
aggregating 29
checking aggregation: 
            hmdb_ids    y  n_obs
7296   HMDB0010387+K  0.0      7
3623   HMDB0008033+K  4.0     49
17424  HMDB0115554+H  1.0      2
11787  HMDB0045391+K  0.0      1
3929   HMDB0008141+H  4.0    111
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_16_25_191625
Printing histogram!
hist_plots/2020_04_06_15_16_25_359098
joining X 29
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_16_25_691619
splitting 29
X_df_shapes:
(5686, 1024)
(2840, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 71, 'X': 245457}
modeling 29
ml :  XGBoost
train_y: (5686,)
train_w: (5686,)
train_X: (5686, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_17_01_177743
start 30
filtering 30
aggregating 30
checking aggregation: 
             hmdb_ids    y  n_obs
8041   HMDB0011220+Na  4.0     46
3896    HMDB0008130+H  5.0     72
2446   HMDB0007177+Na  4.0      4
14672  HMDB0060040+Na  0.0      1
1503   HMDB0003337+Na  0.0     20
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_17_03_497463
Printing histogram!
hist_plots/2020_04_06_15_17_03_678719
joining X 30
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_17_04_028643
splitting 30
X_df_shapes:
(5529, 1024)
(2958, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 62, 'X': 233648}
modeling 30
ml :  XGBoost
train_y: (5529,)
train_w: (5529,)
train_X: (5529, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_17_39_007664
start 31
filtering 31
aggregating 31
checking aggregation: 
             hmdb_ids    y  n_obs
12196   HMDB0046841+H  0.0      6
11299  HMDB0044150+Na  0.0      4
2086    HMDB0006317+H  1.0     53
17032  HMDB0115218+Na  4.0     19
17292   HMDB0115503+K  0.0     18
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_17_41_290000
Printing histogram!
hist_plots/2020_04_06_15_17_41_470529
joining X 31
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_17_41_828865
splitting 31
X_df_shapes:
(5729, 1024)
(2534, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 62, 'X': 199945}
modeling 31
ml :  XGBoost
train_y: (5729,)
train_w: (5729,)
train_X: (5729, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_18_16_246674
start 32
filtering 32
aggregating 32
checking aggregation: 
             hmdb_ids     y  n_obs
2294    HMDB0007082+H   0.0      1
12060  HMDB0046242+Na   0.0      2
11920   HMDB0045717+K   0.0      1
3833    HMDB0008109+H   2.0     16
5736    HMDB0009007+H  12.0    105
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_18_18_452996
Printing histogram!
hist_plots/2020_04_06_15_18_18_614987
joining X 32
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_18_19_099131
splitting 32
X_df_shapes:
(4003, 1826)
(1958, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 63, 'X': 0}
m

Printing regression actual versus predicted output!
model_plots/2020_04_06_15_26_00_216847
start 41
filtering 41
aggregating 41
checking aggregation: 
             hmdb_ids    y  n_obs
9802    HMDB0029112+H  0.0      1
17049   HMDB0115231+K  2.0      3
10241  HMDB0034422+Na  0.0      1
7029    HMDB0009859+K  0.0      3
15663   HMDB0112316+H  0.0      3
(17685, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_26_02_495997
Printing histogram!
hist_plots/2020_04_06_15_26_02_671751
joining X 41
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_26_02_987476
splitting 41
X_df_shapes:
(4225, 1024)
(1989, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 60, 'X': 135751}
modeling 41
ml :  XGBoost
train_y: (4225,)
train_w: (4225,)
train_X: (4225, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_26_31_019045
start 42
filtering

  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8808, 1826)
(3709, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 36, 'X': 6}
modeling 48
ml :  XGBoost
train_y: (8808,)
train_w: (8808,)
train_X: (8808, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_31_16_854850
start 49
filtering 49
aggregating 49
checking aggregation: 
            hmdb_ids    y  n_obs
4495   HMDB0008456+H  8.0     60
6727   HMDB0009837+K  0.0      2
105    HMDB0000098+K  0.0      1
937    HMDB0001262+K  1.0      2
4815  HMDB0008607+Na  0.0      3
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_31_19_135799
Printing histogram!
hist_plots/2020_04_06_15_31_19_318781
joining X 49
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_31_20_054875
splitting 49


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8226, 1826)
(4138, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 40, 'X': 4}
modeling 49
ml :  XGBoost
train_y: (8226,)
train_w: (8226,)
train_X: (8226, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_33_05_231770
start 50
filtering 50
aggregating 50
checking aggregation: 
             hmdb_ids    y  n_obs
3053    HMDB0007892+H  0.0     53
11628   HMDB0046121+K  0.0     75
11175   HMDB0044738+K  0.0      1
1875   HMDB0005785+Na  0.0      2
1821    HMDB0005441+H  0.0    104
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_33_07_487343
Printing histogram!
hist_plots/2020_04_06_15_33_07_660068
joining X 50
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_33_08_372633
splitting 50


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8649, 1826)
(4385, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 37, 'X': 10}
modeling 50
ml :  XGBoost
train_y: (8649,)
train_w: (8649,)
train_X: (8649, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_35_02_090206
start 51
filtering 51
aggregating 51
checking aggregation: 
             hmdb_ids    y  n_obs
9339    HMDB0028721+K  0.0     22
10070   HMDB0039819+K  0.0      1
14164  HMDB0059777+Na  0.0     14
1077    HMDB0001901+H  0.0      2
5957    HMDB0009252+K  0.0    169
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_35_04_395218
Printing histogram!
hist_plots/2020_04_06_15_35_04_570249
joining X 51
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_35_05_286976
splitting 51


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8917, 1826)
(4141, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 35, 'X': 5}
modeling 51
ml :  XGBoost
train_y: (8917,)
train_w: (8917,)
train_X: (8917, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_36_52_518969
start 52
filtering 52
aggregating 52
checking aggregation: 
             hmdb_ids     y  n_obs
824     HMDB0000995+H   0.0      1
3508    HMDB0008067+K  10.0    165
13723  HMDB0054859+Na   0.0     18
9266    HMDB0013553+H   0.0      5
16196   HMDB0115030+K   0.0      1
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_36_54_784150
Printing histogram!
hist_plots/2020_04_06_15_36_54_963575
joining X 52
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_36_55_673905
splitting 52


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8556, 1826)
(4212, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 36, 'X': 5}
modeling 52
ml :  XGBoost
train_y: (8556,)
train_w: (8556,)
train_X: (8556, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_38_47_330009
start 53
filtering 53
aggregating 53
checking aggregation: 
             hmdb_ids    y  n_obs
13221   HMDB0052810+H  0.0      1
10677   HMDB0043469+K  0.0      1
4016    HMDB0008257+K  0.0      4
6503   HMDB0009596+Na  4.0      6
15585  HMDB0112642+Na  0.0     31
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_38_49_570959
Printing histogram!
hist_plots/2020_04_06_15_38_49_740804
joining X 53
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_38_50_454965
splitting 53


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9129, 1826)
(3688, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 33, 'X': 6}
modeling 53
ml :  XGBoost
train_y: (9129,)
train_w: (9129,)
train_X: (9129, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_40_44_741727
start 54
filtering 54
aggregating 54
checking aggregation: 
            hmdb_ids    y  n_obs
13696  HMDB0054791+H  0.0      1
8944   HMDB0012448+H  0.0      7
8897   HMDB0012427+K  0.0     12
8276   HMDB0011496+K  0.0     47
16035  HMDB0114940+H  0.0    113
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_40_46_993357
Printing histogram!
hist_plots/2020_04_06_15_40_47_161687
joining X 54
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_40_47_869598
splitting 54


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8315, 1826)
(4222, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 38, 'X': 7}
modeling 54
ml :  XGBoost
train_y: (8315,)
train_w: (8315,)
train_X: (8315, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_42_37_697071
start 55
filtering 55
aggregating 55
checking aggregation: 
             hmdb_ids    y  n_obs
12600   HMDB0050342+K  1.0    116
545     HMDB0000635+H  0.0      2
12540  HMDB0050231+Na  0.0     13
3876    HMDB0008199+K  0.0     85
4273    HMDB0008354+K  0.0      3
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_42_40_004767
Printing histogram!
hist_plots/2020_04_06_15_42_40_179049
joining X 55
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_42_40_912291
splitting 55


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8488, 1826)
(4767, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 43, 'X': 6}
modeling 55
ml :  XGBoost
train_y: (8488,)
train_w: (8488,)
train_X: (8488, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_44_30_114904
start 56
filtering 56
aggregating 56
checking aggregation: 
             hmdb_ids    y  n_obs
6050    HMDB0009290+H  7.0     25
16044  HMDB0114947+Na  0.0     15
13146   HMDB0052614+K  0.0      4
2663    HMDB0007450+H  0.0      1
15774   HMDB0112802+K  0.0      3
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_44_32_384439
Printing histogram!
hist_plots/2020_04_06_15_44_32_556867
joining X 56
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_44_32_992496
splitting 56


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8565, 1024)
(3995, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 42, 'X': 337265}
modeling 56
ml :  XGBoost
train_y: (8565,)
train_w: (8565,)
train_X: (8565, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_45_27_652023
start 57
filtering 57
aggregating 57
checking aggregation: 
             hmdb_ids    y  n_obs
7001   HMDB0010360+Na  0.0      3
7373    HMDB0010587+K  1.0      9
14459   HMDB0062452+H  0.0      1
182     HMDB0000176+H  0.0      1
10422   HMDB0042642+H  0.0      4
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_45_29_947584
Printing histogram!
hist_plots/2020_04_06_15_45_30_116345
joining X 57
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_45_30_542984
splitting 57


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9024, 1024)
(4166, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 33, 'X': 434397}
modeling 57
ml :  XGBoost
train_y: (9024,)
train_w: (9024,)
train_X: (9024, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_46_27_842210
start 58
filtering 58
aggregating 58
checking aggregation: 
             hmdb_ids    y  n_obs
10175  HMDB0042172+Na  0.0      4
13779  HMDB0055101+Na  0.0     18
16607  HMDB0115327+Na  7.0    144
11614  HMDB0046107+Na  0.0     94
637    HMDB0000737+Na  0.0      7
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_46_30_083936
Printing histogram!
hist_plots/2020_04_06_15_46_30_259770
joining X 58
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_46_30_687527
splitting 58


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8479, 1024)
(3939, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 42, 'X': 348202}
modeling 58
ml :  XGBoost
train_y: (8479,)
train_w: (8479,)
train_X: (8479, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_47_24_918829
start 59
filtering 59
aggregating 59
checking aggregation: 
             hmdb_ids    y  n_obs
7838    HMDB0011249+K  0.0      9
5294   HMDB0008901+Na  0.0     67
7498   HMDB0010638+Na  0.0     90
12031  HMDB0048356+Na  0.0      5
13386  HMDB0053613+Na  0.0     25
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_47_27_151021
Printing histogram!
hist_plots/2020_04_06_15_47_27_323526
joining X 59
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_47_27_749681
splitting 59


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7719, 1024)
(3972, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 34, 'X': 257330}
modeling 59
ml :  XGBoost
train_y: (7719,)
train_w: (7719,)
train_X: (7719, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_48_16_818722
start 60
filtering 60
aggregating 60
checking aggregation: 
             hmdb_ids    y  n_obs
15622   HMDB0112692+H  0.0     18
15742  HMDB0112789+Na  0.0      1
1130    HMDB0002082+K  0.0      8
1087    HMDB0001975+K  1.0      1
16365  HMDB0115147+Na  0.0     41
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_48_19_037997
Printing histogram!
hist_plots/2020_04_06_15_48_19_211816
joining X 60
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_48_19_628263
splitting 60


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8349, 1024)
(4638, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 48, 'X': 387464}
modeling 60
ml :  XGBoost
train_y: (8349,)
train_w: (8349,)
train_X: (8349, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_49_13_660228
start 61
filtering 61
aggregating 61
checking aggregation: 
             hmdb_ids    y  n_obs
16814  HMDB0115541+Na  7.0    144
2086   HMDB0007036+Na  0.0      1
919     HMDB0001235+H  0.0     42
9185    HMDB0013437+K  2.0     10
12809   HMDB0051084+K  0.0      6
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_49_15_939779
Printing histogram!
hist_plots/2020_04_06_15_49_16_109681
joining X 61
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_49_16_525791
splitting 61


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9016, 1024)
(4552, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 34, 'X': 431194}
modeling 61
ml :  XGBoost
train_y: (9016,)
train_w: (9016,)
train_X: (9016, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_50_13_819157
start 62
filtering 62
aggregating 62
checking aggregation: 
             hmdb_ids    y  n_obs
7129    HMDB0010421+K  0.0     99
13610   HMDB0054309+K  0.0      2
1203   HMDB0002285+Na  1.0      1
13362  HMDB0053511+Na  0.0     13
1595    HMDB0004948+K  0.0      6
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_50_16_063254
Printing histogram!
hist_plots/2020_04_06_15_50_17_123770
joining X 62
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_50_17_542416
splitting 62


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8691, 1024)
(4383, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 38, 'X': 439475}
modeling 62
ml :  XGBoost
train_y: (8691,)
train_w: (8691,)
train_X: (8691, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_51_12_942158
start 63
filtering 63
aggregating 63
checking aggregation: 
             hmdb_ids     y  n_obs
4726   HMDB0008568+Na   1.0    101
10229   HMDB0042271+K   0.0     19
5913    HMDB0009231+H  37.0     79
8633   HMDB0012233+Na   0.0      2
12437   HMDB0049843+H   0.0     63
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_51_15_252776
Printing histogram!
hist_plots/2020_04_06_15_51_15_418647
joining X 63
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_51_15_829725
splitting 63


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9195, 1024)
(4447, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 38, 'X': 477094}
modeling 63
ml :  XGBoost
train_y: (9195,)
train_w: (9195,)
train_X: (9195, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_52_14_086521
start 64
filtering 64
aggregating 64
checking aggregation: 
             hmdb_ids    y  n_obs
4907    HMDB0008652+H  8.0     60
13945   HMDB0055646+H  0.0      1
14477   HMDB0062546+K  0.0      1
9377   HMDB0028906+Na  0.0      1
1729    HMDB0005383+H  0.0      2
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_52_16_331742
Printing histogram!
hist_plots/2020_04_06_15_52_16_504321
joining X 64
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_52_17_093891
splitting 64


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5594, 1826)
(3048, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 39, 'X': 3}
modeling 64
ml :  XGBoost
train_y: (5594,)
train_w: (5594,)
train_X: (5594, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_53_25_638752
start 65
filtering 65
aggregating 65
checking aggregation: 
             hmdb_ids    y  n_obs
17067      msmls192+K  2.0     19
3329   HMDB0008002+Na  0.0     67
10284  HMDB0042406+Na  0.0    128
16032   HMDB0114939+H  0.0      4
14080   HMDB0056224+K  0.0     15
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_53_28_044246
Printing histogram!
hist_plots/2020_04_06_15_53_28_217292
joining X 65
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_53_28_779265
splitting 65


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5029, 1826)
(2697, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 42, 'X': 6}
modeling 65
ml :  XGBoost
train_y: (5029,)
train_w: (5029,)
train_X: (5029, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_54_33_524999
start 66
filtering 66
aggregating 66
checking aggregation: 
             hmdb_ids    y  n_obs
10648   HMDB0043403+H  0.0      1
7993    HMDB0011311+K  0.0     34
8897    HMDB0012427+K  0.0     12
8020    HMDB0011321+K  0.0     16
9640   HMDB0031110+Na  0.0    109
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_54_35_848759
Printing histogram!
hist_plots/2020_04_06_15_54_36_036509
joining X 66
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_54_36_604563
splitting 66


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5685, 1826)
(2261, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 36, 'X': 1}
modeling 66
ml :  XGBoost
train_y: (5685,)
train_w: (5685,)
train_X: (5685, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_55_50_096797
start 67
filtering 67
aggregating 67
checking aggregation: 
            hmdb_ids    y  n_obs
13413  HMDB0053741+H  0.0     46
2780   HMDB0007537+H  0.0      1
4105   HMDB0008290+H  0.0      1
10963  HMDB0044290+H  0.0      3
2775   HMDB0007535+K  0.0     10
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_55_52_396599
Printing histogram!
hist_plots/2020_04_06_15_55_52_568119
joining X 67
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_55_53_124876
splitting 67


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5199, 1826)
(2809, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 36, 'X': 1}
modeling 67
ml :  XGBoost
train_y: (5199,)
train_w: (5199,)
train_X: (5199, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_56_56_898218
start 68
filtering 68
aggregating 68
checking aggregation: 
             hmdb_ids    y  n_obs
11607   HMDB0046099+K  0.0      8
12020   HMDB0048245+H  0.0     73
7841    HMDB0011251+K  0.0     47
12547  HMDB0050234+Na  0.0      4
10842   HMDB0044084+K  0.0      1
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_56_59_257732
Printing histogram!
hist_plots/2020_04_06_15_56_59_438048
joining X 68
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_57_00_012284
splitting 68


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4785, 1826)
(3147, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 35, 'X': 2}
modeling 68
ml :  XGBoost
train_y: (4785,)
train_w: (4785,)
train_X: (4785, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_58_05_557788
start 69
filtering 69
aggregating 69
checking aggregation: 
             hmdb_ids    y  n_obs
7700   HMDB0011177+Na  0.0      3
15104   HMDB0112279+H  6.0     26
7351   HMDB0010578+Na  0.0      9
16766  HMDB0115524+Na  7.0    144
10241   HMDB0042337+K  0.0      1
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_58_07_966224
Printing histogram!
hist_plots/2020_04_06_15_58_08_150179
joining X 69
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_58_08_731565
splitting 69


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5925, 1826)
(2581, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 36, 'X': 2}
modeling 69
ml :  XGBoost
train_y: (5925,)
train_w: (5925,)
train_X: (5925, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_15_59_23_260845
start 70
filtering 70
aggregating 70
checking aggregation: 
             hmdb_ids    y  n_obs
4096    HMDB0008286+H  0.0     49
13375  HMDB0053579+Na  0.0     13
15702  HMDB0112765+Na  3.0     19
10687  HMDB0043492+Na  0.0      6
769     HMDB0000900+H  0.0      1
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_15_59_25_527075
Printing histogram!
hist_plots/2020_04_06_15_59_25_705320
joining X 70
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_15_59_26_261919
splitting 70


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5435, 1826)
(2352, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 35, 'X': 2}
modeling 70
ml :  XGBoost
train_y: (5435,)
train_w: (5435,)
train_X: (5435, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_00_39_837777
start 71
filtering 71
aggregating 71
checking aggregation: 
             hmdb_ids    y  n_obs
9004   HMDB0013122+Na  0.0     47
5466    HMDB0008991+H  0.0     56
8487   HMDB0011695+Na  0.0     82
9630   HMDB0031067+Na  0.0      2
11694   HMDB0046333+K  0.0      2
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_00_42_209357
Printing histogram!
hist_plots/2020_04_06_16_00_42_399962
joining X 71
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_00_42_993843
splitting 71


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4890, 1826)
(2590, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 35, 'X': 4}
modeling 71
ml :  XGBoost
train_y: (4890,)
train_w: (4890,)
train_X: (4890, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_01_50_149835
start 72
filtering 72
aggregating 72
checking aggregation: 
             hmdb_ids    y  n_obs
141     HMDB0000134+H  0.0      1
5436    HMDB0008973+K  2.0    105
13369  HMDB0053559+Na  0.0      4
8168    HMDB0011407+K  0.0     13
5029    HMDB0008735+K  0.0      3
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_01_52_567307
Printing histogram!
hist_plots/2020_04_06_16_01_52_752353
joining X 72
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_01_53_127092
splitting 72


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5056, 1024)
(2539, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 37, 'X': 192146}
modeling 72
ml :  XGBoost
train_y: (5056,)
train_w: (5056,)
train_X: (5056, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_02_26_231227
start 73
filtering 73
aggregating 73
checking aggregation: 
             hmdb_ids     y  n_obs
996     HMDB0001429+K   0.0      1
12134  HMDB0048789+Na   0.0    116
469     HMDB0000529+K   0.0      1
12923   HMDB0051359+H   0.0     68
2351    HMDB0007221+H  30.0     31
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_02_28_541984
Printing histogram!
hist_plots/2020_04_06_16_02_28_714770
joining X 73
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_02_29_068786
splitting 73


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5775, 1024)
(2543, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 49, 'X': 268583}
modeling 73
ml :  XGBoost
train_y: (5775,)
train_w: (5775,)
train_X: (5775, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_03_06_955804
start 74
filtering 74
aggregating 74
checking aggregation: 
             hmdb_ids    y  n_obs
16585  HMDB0115314+Na  0.0     36
11187  HMDB0044753+Na  0.0    129
2012    HMDB0006737+K  0.0      1
5028    HMDB0008735+H  6.0     82
12355   HMDB0049632+H  0.0     46
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_03_09_250379
Printing histogram!
hist_plots/2020_04_06_16_03_09_437895
joining X 74
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_03_09_785834
splitting 74


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5366, 1024)
(2392, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 34, 'X': 179731}
modeling 74
ml :  XGBoost
train_y: (5366,)
train_w: (5366,)
train_X: (5366, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_03_45_071152
start 75
filtering 75
aggregating 75
checking aggregation: 
             hmdb_ids    y  n_obs
10467  HMDB0042724+Na  0.0      3
8985    HMDB0012798+H  0.0      2
4726   HMDB0008568+Na  1.0    101
6274   HMDB0009456+Na  1.0     15
13355   HMDB0053497+K  0.0      6
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_03_47_369687
Printing histogram!
hist_plots/2020_04_06_16_03_47_550616
joining X 75
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_03_47_907669
splitting 75


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4766, 1024)
(3303, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 37, 'X': 221904}
modeling 75
ml :  XGBoost
train_y: (4766,)
train_w: (4766,)
train_X: (4766, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_04_19_436291
start 76
filtering 76
aggregating 76
checking aggregation: 
             hmdb_ids    y  n_obs
10613   HMDB0043240+H  0.0      1
5133   HMDB0008823+Na  1.0     50
7020    HMDB0010379+H  5.0     34
2814    HMDB0007579+K  0.0      1
10533  HMDB0042896+Na  0.0      1
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_04_21_776538
Printing histogram!
hist_plots/2020_04_06_16_04_21_970282
joining X 76
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_04_22_322270
splitting 76


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5176, 1024)
(2793, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 40, 'X': 229269}
modeling 76
ml :  XGBoost
train_y: (5176,)
train_w: (5176,)
train_X: (5176, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_04_56_485637
start 77
filtering 77
aggregating 77
checking aggregation: 
            hmdb_ids    y  n_obs
6173   HMDB0009375+K  0.0     11
675    HMDB0000774+K  0.0      1
8903   HMDB0012429+K  0.0      1
6312   HMDB0009481+K  0.0     45
14491  HMDB0062633+K  0.0      1
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_04_58_839306
Printing histogram!
hist_plots/2020_04_06_16_04_59_024544
joining X 77
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_04_59_374568
splitting 77
X_df_shapes:
(5417, 1024)
(2420, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 34, 'X': 141711}
modeling 77
ml :  XGBoost
train_y: (5417,)
train_w: (5417,)
train_X: (5417, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_05_35_858409
start 78
filtering 78
aggregating 78
checking aggregation: 
             hmdb_ids    y  n_obs
6305   HMDB0009471+Na  6.0      9
7567   HMDB0010668+Na  0.0      1
11137   HMDB0044683+H  0.0     11
8164    HMDB0011406+H  0.0     96
9612    HMDB0031042+K  0.0      1
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_05_38_194032
Printing histogram!
hist_plots/2020_04_06_16_05_38_372818
joining X 78
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_05_38_728391
splitting 78


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5318, 1024)
(2973, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 37, 'X': 259442}
modeling 78
ml :  XGBoost
train_y: (5318,)
train_w: (5318,)
train_X: (5318, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_06_13_926456
start 79
filtering 79
aggregating 79
checking aggregation: 
             hmdb_ids    y  n_obs
148     HMDB0000148+K  0.0      3
12601  HMDB0050342+Na  0.0    115
6524    HMDB0009612+K  3.0     20
14755   HMDB0094666+H  0.0      2
4959    HMDB0008689+K  0.0     65
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_06_16_261907
Printing histogram!
hist_plots/2020_04_06_16_06_16_444138
joining X 79
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_06_16_798815
splitting 79
X_df_shapes:
(4690, 1024)
(2586, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 38, 'X': 166344}
modeling 79
ml :  XGBoost
train_y: (4690,)
train_w: (4690,)
train_X: (4690, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_06_47_682175
start 80
filtering 80
aggregating 80
checking aggregation: 
            hmdb_ids    y  n_obs
5159   HMDB0008834+H  0.0     58
6767   HMDB0009854+K  0.0     13
12948  HMDB0051399+H  0.0      1
1735   HMDB0005385+H  0.0      4
13379  HMDB0053589+K  0.0     12
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_06_50_076640
Printing histogram!
hist_plots/2020_04_06_16_06_50_253676
joining X 80
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_06_50_746300
splitting 80
X_df_shapes:
(3577, 1826)
(1673, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 1}
modeling 80
m

Printing regression actual versus predicted output!
model_plots/2020_04_06_16_14_04_444500
start 89
filtering 89
aggregating 89
checking aggregation: 
             hmdb_ids    y  n_obs
13502  HMDB0053978+Na  0.0      4
2889   HMDB0007704+Na  0.0      3
16414   HMDB0115200+H  0.0      1
11143   HMDB0044687+H  0.0    111
4124    HMDB0008296+K  2.0    166
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_14_06_785276
Printing histogram!
hist_plots/2020_04_06_16_14_06_950633
joining X 89
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_14_07_255177
splitting 89
X_df_shapes:
(3554, 1024)
(2174, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 145321}
modeling 89
ml :  XGBoost
train_y: (3554,)
train_w: (3554,)
train_X: (3554, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_14_30_842425
start 90
filtering

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_16_26_027805
start 94
filtering 94
aggregating 94
checking aggregation: 
             hmdb_ids    y  n_obs
15001  HMDB0112122+Na  0.0      2
10324   HMDB0042467+K  0.0      1
11616   HMDB0046112+K  0.0     88
10446   HMDB0042694+H  0.0     40
7597    HMDB0010682+H  0.0     53
(17095, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_16_28_391018
Printing histogram!
hist_plots/2020_04_06_16_16_28_574510
joining X 94
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_16_28_894085
splitting 94
X_df_shapes:
(4265, 1024)
(1825, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 136386}
modeling 94
ml :  XGBoost
train_y: (4265,)
train_w: (4265,)
train_X: (4265, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_16_56_978146
start 95
filt

  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8375, 1826)
(4393, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 5}
modeling 96
ml :  XGBoost
train_y: (8375,)
train_w: (8375,)
train_X: (8375, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_19_11_638736
start 97
filtering 97
aggregating 97
checking aggregation: 
             hmdb_ids    y  n_obs
3316   HMDB0008045+Na  5.0     30
14872   HMDB0112260+H  0.0      2
11773   HMDB0047914+K  0.0    110
4342    HMDB0008447+K  0.0      4
4633   HMDB0008593+Na  5.0     30
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_19_14_109752
Printing histogram!
hist_plots/2020_04_06_16_19_14_293472
joining X 97
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_19_15_021568
splitting 97


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7578, 1826)
(4297, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 30, 'X': 8}
modeling 97
ml :  XGBoost
train_y: (7578,)
train_w: (7578,)
train_X: (7578, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_20_53_881464
start 98
filtering 98
aggregating 98
checking aggregation: 
             hmdb_ids    y  n_obs
8371   HMDB0011733+Na  0.0      3
14816  HMDB0112140+Na  0.0      2
14062   HMDB0061189+H  0.0      1
8342    HMDB0011699+H  0.0     30
5229    HMDB0008940+K  0.0     40
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_20_56_291347
Printing histogram!
hist_plots/2020_04_06_16_20_56_469264
joining X 98
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_20_57_180922
splitting 98


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8724, 1826)
(3800, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 7}
modeling 98
ml :  XGBoost
train_y: (8724,)
train_w: (8724,)
train_X: (8724, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_22_50_091992
start 99
filtering 99
aggregating 99
checking aggregation: 
             hmdb_ids    y  n_obs
521     HMDB0000621+K  0.0      1
9350    HMDB0029811+H  0.0      1
14182  HMDB0062281+Na  0.0      1
6339    HMDB0009588+H  2.0     40
8248   HMDB0011555+Na  0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_22_52_428164
Printing histogram!
hist_plots/2020_04_06_16_22_52_610787
joining X 99
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_22_53_328993
splitting 99


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8269, 1826)
(4105, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 4}
modeling 99
ml :  XGBoost
train_y: (8269,)
train_w: (8269,)
train_X: (8269, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_24_35_892992
start 100
filtering 100
aggregating 100
checking aggregation: 
             hmdb_ids    y  n_obs
4552   HMDB0008555+Na  0.0      5
172    HMDB0000168+Na  0.0      3
7274   HMDB0010606+Na  0.0      9
16209   HMDB0115209+H  0.0    113
11662   HMDB0047000+H  0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_24_38_335502
Printing histogram!
hist_plots/2020_04_06_16_24_38_524568
joining X 100
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_24_39_259800
splitting 100


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9746, 1826)
(3706, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 7}
modeling 100
ml :  XGBoost
train_y: (9746,)
train_w: (9746,)
train_X: (9746, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_26_42_719280
start 101
filtering 101
aggregating 101
checking aggregation: 
             hmdb_ids    y  n_obs
15272  HMDB0112556+Na  1.0      8
14894   HMDB0112281+H  0.0      1
10006   HMDB0042199+K  0.0      1
15037   HMDB0112410+K  0.0      3
2838    HMDB0007865+H  0.0      2
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_26_45_185668
Printing histogram!
hist_plots/2020_04_06_16_26_45_385284
joining X 101
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_26_46_114422
splitting 101


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7715, 1826)
(4708, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 10}
modeling 101
ml :  XGBoost
train_y: (7715,)
train_w: (7715,)
train_X: (7715, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_28_27_757223
start 102
filtering 102
aggregating 102
checking aggregation: 
             hmdb_ids    y  n_obs
2400    HMDB0007311+K  0.0     10
12888   HMDB0052188+H  0.0      3
14      HMDB0000011+H  1.0      1
6625   HMDB0009854+Na  0.0     16
5531    HMDB0009097+H  0.0     65
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_28_30_293933
Printing histogram!
hist_plots/2020_04_06_16_28_30_483765
joining X 102
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_28_31_276263
splitting 102


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8469, 1826)
(4289, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 4}
modeling 102
ml :  XGBoost
train_y: (8469,)
train_w: (8469,)
train_X: (8469, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_30_23_039530
start 103
filtering 103
aggregating 103
checking aggregation: 
             hmdb_ids    y  n_obs
15738  HMDB0114878+Na  4.0    147
11162   HMDB0045271+H  0.0     18
10947   HMDB0044678+K  0.0      2
12102   HMDB0049393+K  0.0    111
13059  HMDB0052913+Na  0.0      5
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_30_25_451131
Printing histogram!
hist_plots/2020_04_06_16_30_25_646318
joining X 103
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_30_26_379432
splitting 103


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8637, 1826)
(4575, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 6}
modeling 103
ml :  XGBoost
train_y: (8637,)
train_w: (8637,)
train_X: (8637, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_32_20_040336
start 104
filtering 104
aggregating 104
checking aggregation: 
             hmdb_ids    y  n_obs
1682   HMDB0005389+Na  0.0    119
8845    HMDB0013127+K  0.0      2
2624   HMDB0007509+Na  0.0      1
14032  HMDB0060251+Na  0.0      4
5077    HMDB0008867+H  0.0     12
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_32_22_442149
Printing histogram!
hist_plots/2020_04_06_16_32_22_620873
joining X 104
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_32_23_086774
splitting 104


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8220, 1024)
(4497, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 438037}
modeling 104
ml :  XGBoost
train_y: (8220,)
train_w: (8220,)
train_X: (8220, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_33_17_288816
start 105
filtering 105
aggregating 105
checking aggregation: 
            hmdb_ids     y  n_obs
15498  HMDB0112773+K   0.0      2
11596  HMDB0046755+H   0.0      3
1459   HMDB0004645+K   0.0      5
6342   HMDB0009589+H  21.0     38
10526  HMDB0043631+H   0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_33_19_694437
Printing histogram!
hist_plots/2020_04_06_16_33_19_879710
joining X 105
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_33_20_333109
splitting 105


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8245, 1024)
(4301, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 31, 'X': 369632}
modeling 105
ml :  XGBoost
train_y: (8245,)
train_w: (8245,)
train_X: (8245, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_34_14_321529
start 106
filtering 106
aggregating 106
checking aggregation: 
             hmdb_ids     y  n_obs
15915   HMDB0114997+H   0.0     11
13208   HMDB0053641+K   0.0      6
13821   HMDB0055805+H   0.0     72
3439    HMDB0008091+H   0.0     10
3074   HMDB0007949+Na  69.0     72
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_34_16_718994
Printing histogram!
hist_plots/2020_04_06_16_34_16_907338
joining X 106
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_34_17_341731
splitting 106


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8907, 1024)
(3679, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 305785}
modeling 106
ml :  XGBoost
train_y: (8907,)
train_w: (8907,)
train_X: (8907, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_35_15_505512
start 107
filtering 107
aggregating 107
checking aggregation: 
            hmdb_ids    y  n_obs
7418   HMDB0010668+K  2.0      6
8249   HMDB0011556+H  0.0      4
4873  HMDB0008727+Na  2.0    138
6502   HMDB0009798+K  0.0      2
1581   HMDB0005048+K  0.0      3
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_35_17_925359
Printing histogram!
hist_plots/2020_04_06_16_35_18_109965
joining X 107
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_35_18_538741
splitting 107


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8458, 1024)
(4084, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 396738}
modeling 107
ml :  XGBoost
train_y: (8458,)
train_w: (8458,)
train_X: (8458, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_36_13_655234
start 108
filtering 108
aggregating 108
checking aggregation: 
             hmdb_ids     y  n_obs
1423    HMDB0004038+K   0.0      3
5817    HMDB0009253+H  19.0    120
10720  HMDB0044165+Na   0.0    128
14175  HMDB0062248+Na   0.0      6
16607   HMDB0115550+K   0.0     52
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_36_16_080211
Printing histogram!
hist_plots/2020_04_06_16_36_16_271092
joining X 108
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_36_16_711673
splitting 108


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(9163, 1024)
(3719, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 370531}
modeling 108
ml :  XGBoost
train_y: (9163,)
train_w: (9163,)
train_X: (9163, 1024)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_37_17_829644
start 109
filtering 109
aggregating 109
checking aggregation: 
            hmdb_ids    y  n_obs
5800   HMDB0009245+K  0.0      3
9967   HMDB0042092+K  0.0     38
5495   HMDB0009083+H  0.0     44
1376   HMDB0003550+H  2.0      4
8316  HMDB0011616+Na  0.0      2
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_37_20_369958
Printing histogram!
hist_plots/2020_04_06_16_37_20_551677
joining X 109
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_37_21_015606
splitting 109


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8101, 1024)
(4157, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 22, 'X': 351212}
modeling 109
ml :  XGBoost
train_y: (8101,)
train_w: (8101,)
train_X: (8101, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_38_14_558237
start 110
filtering 110
aggregating 110
checking aggregation: 
             hmdb_ids    y  n_obs
762     HMDB0000927+H  0.0      1
7309    HMDB0010622+H  0.0      3
7569   HMDB0011205+Na  0.0      9
12082   HMDB0049288+K  0.0    114
1848    HMDB0006217+H  0.0     15
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_38_16_930505
Printing histogram!
hist_plots/2020_04_06_16_38_17_117414
joining X 110
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_38_17_560225
splitting 110


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(8915, 1024)
(4197, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 425172}
modeling 110
ml :  XGBoost
train_y: (8915,)
train_w: (8915,)
train_X: (8915, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_39_15_973187
start 111
filtering 111
aggregating 111
checking aggregation: 
             hmdb_ids    y  n_obs
11686   HMDB0047196+K  0.0      1
11735  HMDB0047805+Na  0.0     91
15862   HMDB0114966+K  0.0     29
3036    HMDB0007936+H  0.0     58
7728    HMDB0011269+H  0.0     12
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_39_18_518751
Printing histogram!
hist_plots/2020_04_06_16_39_18_716051
joining X 111
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_39_19_147570
splitting 111


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7800, 1024)
(4633, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 392945}
modeling 111
ml :  XGBoost
train_y: (7800,)
train_w: (7800,)
train_X: (7800, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_40_10_927008
start 112
filtering 112
aggregating 112
checking aggregation: 
             hmdb_ids    y  n_obs
4495   HMDB0008528+Na  0.0     37
12121   HMDB0049447+K  0.0     12
11132   HMDB0045185+H  0.0     63
8143    HMDB0011506+K  0.0    104
8789    HMDB0012454+H  0.0      3
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_40_13_344387
Printing histogram!
hist_plots/2020_04_06_16_40_13_533206
joining X 112
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_40_14_151152
splitting 112


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5178, 1826)
(2346, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 3}
modeling 112
ml :  XGBoost
train_y: (5178,)
train_w: (5178,)
train_X: (5178, 1826)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_41_15_863160
start 113
filtering 113
aggregating 113
checking aggregation: 
             hmdb_ids    y  n_obs
14188  HMDB0062291+Na  0.0     18
12184   HMDB0049670+K  0.0      1
4295    HMDB0008428+K  8.0    140
10633  HMDB0044043+Na  0.0    128
8348    HMDB0011704+H  0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_41_18_458572
Printing histogram!
hist_plots/2020_04_06_16_41_18_659661
joining X 113
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_41_19_336477
splitting 113


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5427, 1826)
(2511, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 30, 'X': 2}
modeling 113
ml :  XGBoost
train_y: (5427,)
train_w: (5427,)
train_X: (5427, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_42_31_070371
start 114
filtering 114
aggregating 114
checking aggregation: 
             hmdb_ids    y  n_obs
16045   HMDB0115072+K  0.0    115
2786   HMDB0007741+Na  0.0      1
3034    HMDB0007935+K  0.0     55
10668  HMDB0044094+Na  0.0    128
14678  HMDB0108556+Na  0.0     91
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_42_33_688193
Printing histogram!
hist_plots/2020_04_06_16_42_33_884384
joining X 114
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_42_34_638754
splitting 114


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5069, 1826)
(2717, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 3}
modeling 114
ml :  XGBoost
train_y: (5069,)
train_w: (5069,)
train_X: (5069, 1826)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_43_41_926163
start 115
filtering 115
aggregating 115
checking aggregation: 
             hmdb_ids    y  n_obs
13394  HMDB0054254+Na  0.0     25
2536   HMDB0007439+Na  0.0      2
4554    HMDB0008556+K  1.0      9
11822   HMDB0048219+H  0.0      1
9454    HMDB0031066+K  0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_43_44_428414
Printing histogram!
hist_plots/2020_04_06_16_43_44_629864
joining X 115
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_43_45_207046
splitting 115


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5094, 1826)
(2808, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 5}
modeling 115
ml :  XGBoost
train_y: (5094,)
train_w: (5094,)
train_X: (5094, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_44_48_919099
start 116
filtering 116
aggregating 116
checking aggregation: 
             hmdb_ids    y  n_obs
1377    HMDB0003553+K  3.0      3
14920   HMDB0112300+H  1.0     19
10371  HMDB0042989+Na  0.0     19
3287    HMDB0008035+K  6.0    161
6399    HMDB0009681+H  0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_44_51_409008
Printing histogram!
hist_plots/2020_04_06_16_44_51_594254
joining X 116
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_44_52_161551
splitting 116


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4936, 1826)
(2578, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 2}
modeling 116
ml :  XGBoost
train_y: (4936,)
train_w: (4936,)
train_X: (4936, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_45_57_620705
start 117
filtering 117
aggregating 117
checking aggregation: 
             hmdb_ids    y  n_obs
1437   HMDB0004158+Na  0.0      2
14214   HMDB0062318+K  0.0      4
96      HMDB0000089+K  0.0      1
12758  HMDB0051387+Na  0.0     18
10408  HMDB0043120+Na  0.0      7
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_46_00_130688
Printing histogram!
hist_plots/2020_04_06_16_46_00_313332
joining X 117
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_46_00_887008
splitting 117


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5001, 1826)
(2651, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 3}
modeling 117
ml :  XGBoost
train_y: (5001,)
train_w: (5001,)
train_X: (5001, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_47_05_054912
start 118
filtering 118
aggregating 118
checking aggregation: 
             hmdb_ids    y  n_obs
5234    HMDB0008942+H  0.0     65
16744  HMDB0116630+Na  0.0     31
14718  HMDB0110127+Na  0.0     91
13594  HMDB0055106+Na  0.0     18
12138   HMDB0049532+H  0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_47_07_489616
Printing histogram!
hist_plots/2020_04_06_16_47_07_672822
joining X 118
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_47_08_235071
splitting 118


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5097, 1826)
(2554, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 4}
modeling 118
ml :  XGBoost
train_y: (5097,)
train_w: (5097,)
train_X: (5097, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_48_15_705341
start 119
filtering 119
aggregating 119
checking aggregation: 
             hmdb_ids    y  n_obs
11018  HMDB0044859+Na  0.0      7
11763  HMDB0047873+Na  0.0      1
5165    HMDB0008909+H  3.0    145
5451    HMDB0009061+K  1.0    120
8660    HMDB0012397+H  1.0      5
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_48_18_152824
Printing histogram!
hist_plots/2020_04_06_16_48_18_334052
joining X 119
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_48_18_906070
splitting 119


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5503, 1826)
(2660, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 4}
modeling 119
ml :  XGBoost
train_y: (5503,)
train_w: (5503,)
train_X: (5503, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_49_30_731978
start 120
filtering 120
aggregating 120
checking aggregation: 
             hmdb_ids    y  n_obs
3898    HMDB0008264+H  0.0     16
11548   HMDB0046533+K  0.0      1
5812    HMDB0009251+K  0.0     31
16730  HMDB0116606+Na  0.0      6
1101   HMDB0002098+Na  0.0      2
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_49_33_125976
Printing histogram!
hist_plots/2020_04_06_16_49_33_310717
joining X 120
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_49_33_682309
splitting 120
X_df_shapes:
(5816, 1024)
(2311, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 221138}
modeling 120
ml :  XGBoost
train_y: (5816,)
train_w: (5816,)
train_X: (5816, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_50_10_486544
start 121
filtering 121
aggregating 121
checking aggregation: 
             hmdb_ids    y  n_obs
1796   HMDB0005474+Na  0.0     96
9951    HMDB0041742+H  0.0      3
7554    HMDB0011188+K  0.0      1
13276   HMDB0053891+H  0.0      1
8216    HMDB0011536+K  0.0      9
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_50_12_806864
Printing histogram!
hist_plots/2020_04_06_16_50_12_983773
joining X 121
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_50_13_339256
splitting 121


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5028, 1024)
(3161, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 241722}
modeling 121
ml :  XGBoost
train_y: (5028,)
train_w: (5028,)
train_X: (5028, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_50_46_470539
start 122
filtering 122
aggregating 122
checking aggregation: 
             hmdb_ids    y  n_obs
13249   HMDB0053839+K  0.0      1
1192    HMDB0002390+K  0.0      2
13409  HMDB0054290+Na  0.0     25
11469   HMDB0046246+H  0.0      1
11036  HMDB0044885+Na  0.0      7
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_50_48_850346
Printing histogram!
hist_plots/2020_04_06_16_50_49_038952
joining X 122
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_50_49_391670
splitting 122
X_df_shapes:
(5227, 1024)
(2562, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 219004}
modeling 122
ml :  XGBoost
train_y: (5227,)
train_w: (5227,)
train_X: (5227, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_51_23_368944
start 123
filtering 123
aggregating 123
checking aggregation: 
            hmdb_ids    y  n_obs
9177   HMDB0028764+H  0.0      1
9764   HMDB0036599+K  0.0      1
6475   HMDB0009787+H  0.0      1
13099  HMDB0053264+K  0.0    114
3052   HMDB0007941+K  5.0     12
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_51_25_718003
Printing histogram!
hist_plots/2020_04_06_16_51_25_895469
joining X 123
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_51_26_248658
splitting 123
X_df_shapes:
(5077, 1024)
(1968, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 115118}
modeling 123
ml :  XGBoost
train_y: (5077,)
train_w: (5077,)
train_X: (5077, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_51_59_052141
start 124
filtering 124
aggregating 124
checking aggregation: 
             hmdb_ids     y  n_obs
3026   HMDB0007932+Na   0.0     11
4719    HMDB0008629+H  10.0     38
1698    HMDB0005403+K   0.0     71
16849      msmls303+H   0.0     10
5678    HMDB0009184+H   0.0      6
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_52_01_433316
Printing histogram!
hist_plots/2020_04_06_16_52_01_613256
joining X 124
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_52_01_964159
splitting 124
X_df_shapes:
(5017, 1024)
(2376, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 155468}
modeling 124
ml :  XGBoost
train_y: (5017,)
train_w: (5017,)
train_X: (5017, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_52_34_883714
start 125
filtering 125
aggregating 125
checking aggregation: 
             hmdb_ids    y  n_obs
4440    HMDB0008503+H  2.0      8
1419   HMDB0004029+Na  0.0      4
16861      msmls533+H  0.0      5
9403    HMDB0030979+H  0.0      8
5487    HMDB0009075+K  2.0     63
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_52_37_292215
Printing histogram!
hist_plots/2020_04_06_16_52_37_471114
joining X 125
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_52_37_825462
splitting 125


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5301, 1024)
(2513, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 202278}
modeling 125
ml :  XGBoost
train_y: (5301,)
train_w: (5301,)
train_X: (5301, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_53_12_400652
start 126
filtering 126
aggregating 126
checking aggregation: 
             hmdb_ids    y  n_obs
3750    HMDB0008203+H  2.0    143
7047    HMDB0010444+K  0.0    116
2035    HMDB0007052+H  2.0      6
10457   HMDB0043359+K  0.0      1
3537   HMDB0008124+Na  0.0     75
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_53_14_810977
Printing histogram!
hist_plots/2020_04_06_16_53_14_994533
joining X 126
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_53_15_354726
splitting 126


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4656, 1024)
(2765, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 183820}
modeling 126
ml :  XGBoost
train_y: (4656,)
train_w: (4656,)
train_X: (4656, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_53_46_124882
start 127
filtering 127
aggregating 127
checking aggregation: 
             hmdb_ids     y  n_obs
13425   HMDB0054316+K   0.0      3
11848  HMDB0048365+Na   0.0      3
9665    HMDB0034227+H   0.0      1
8196   HMDB0011524+Na  24.0     42
2989    HMDB0007918+H   0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_53_48_558727
Printing histogram!
hist_plots/2020_04_06_16_53_48_741034
joining X 127
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_53_49_094043
splitting 127


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4991, 1024)
(2945, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 254991}
modeling 127
ml :  XGBoost
train_y: (4991,)
train_w: (4991,)
train_X: (4991, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_54_22_180810
start 128
filtering 128
aggregating 128
checking aggregation: 
            hmdb_ids    y  n_obs
8341  HMDB0011698+Na  0.0     23
8272   HMDB0011574+H  0.0      1
4256  HMDB0008409+Na  2.0    138
2977  HMDB0007912+Na  1.0    174
7336   HMDB0010633+H  1.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_54_24_589217
Printing histogram!
hist_plots/2020_04_06_16_54_24_766829
joining X 128
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_54_25_259686
splitting 128
X_df_shapes:
(3899, 1826)
(1436, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_55_12_980192
start 129
filtering 129
aggregating 129
checking aggregation: 
             hmdb_ids    y  n_obs
16503   HMDB0115510+K  0.0    163
11455   HMDB0046217+H  0.0      1
7849    HMDB0011316+H  1.0     49
16684  HMDB0115589+Na  0.0     15
1447   HMDB0004259+Na  0.0      6
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_55_15_331378
Printing histogram!
hist_plots/2020_04_06_16_55_15_510976
joining X 129
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_55_15_999120
splitting 129
X_df_shapes:
(3563, 1826)
(2058, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 0}
modeling 129
ml :  XGBoost
train_y: (3563,)
train_w: (3563,)
train_X: (3563, 1826)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_56_00_837508
start 130
filtering 130
aggregating 130
checking aggregation: 
             hmdb_ids     y  n_obs
2433    HMDB0007338+K   0.0      1
5919   HMDB0009293+Na  55.0     55
823    HMDB0001060+Na   0.0     25
10283   HMDB0042746+H   0.0      1
10296   HMDB0042768+H   0.0      3
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_56_03_205577
Printing histogram!
hist_plots/2020_04_06_16_56_03_389679
joining X 130
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_56_03_875412
splitting 130
X_df_shapes:
(3613, 1826)
(1981, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 0}
modeling 130
ml :  XGBoost
train_y: (3613,)
train_w: (3613,)
train_X: (3613, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_56_49_406673
start 131
filte

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_16_57_39_295233
start 132
filtering 132
aggregating 132
checking aggregation: 
             hmdb_ids     y  n_obs
4083    HMDB0008334+H  28.0     41
7740    HMDB0011274+H   0.0    116
12380   HMDB0050270+K   0.0      1
10000   HMDB0042193+K   0.0      1
12013  HMDB0049099+Na   0.0     14
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_16_57_41_675363
Printing histogram!
hist_plots/2020_04_06_16_57_41_856394
joining X 132
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_16_57_42_338016
splitting 132
X_df_shapes:
(4072, 1826)
(1913, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 1}
modeling 132
ml :  XGBoost
train_y: (4072,)
train_w: (4072,)
train_X: (4072, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_16_58_32_984672
sta

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_17_00_12_110903
start 135
filtering 135
aggregating 135
checking aggregation: 
            hmdb_ids    y  n_obs
4394  HMDB0008473+Na  2.0    138
2314   HMDB0007251+H  0.0      5
9175  HMDB0028732+Na  0.0      2
9649  HMDB0034134+Na  0.0      1
7798   HMDB0011294+K  0.0     13
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_00_14_491529
Printing histogram!
hist_plots/2020_04_06_17_00_14_675988
joining X 135
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_00_15_168721
splitting 135
X_df_shapes:
(3855, 1826)
(1935, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 2}
modeling 135
ml :  XGBoost
train_y: (3855,)
train_w: (3855,)
train_X: (3855, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_17_01_03_147238
start 136
filtering 136
aggregating 136
checking aggregation: 
             hmdb_ids    y  n_obs
12542   HMDB0050789+H  0.0     35
10143   HMDB0042475+K  0.0      1
9617   HMDB0033721+Na  0.0      2
5663    HMDB0009171+H  0.0     23
64      HMDB0000056+K  0.0      1
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_01_05_529092
Printing histogram!
hist_plots/2020_04_06_17_01_05_705083
joining X 136
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_01_06_029331
splitting 136
X_df_shapes:
(3849, 1024)
(2015, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 22, 'X': 145166}
modeling 136
ml :  XGBoost
train_y: (3849,)
train_w: (3849,)
train_X: (3849, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_01_30_485681
start 137
filter

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_17_02_52_461208
start 140
filtering 140
aggregating 140
checking aggregation: 
             hmdb_ids    y  n_obs
1211   HMDB0002488+Na  0.0     43
6909    HMDB0010391+H  1.0     41
460    HMDB0000529+Na  0.0      4
768     HMDB0000946+H  0.0     62
15909   HMDB0114993+H  0.0    113
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_02_54_800812
Printing histogram!
hist_plots/2020_04_06_17_02_54_982233
joining X 140
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_02_55_287011
splitting 140
X_df_shapes:
(4268, 1024)
(1791, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 135903}
modeling 140
ml :  XGBoost
train_y: (4268,)
train_w: (4268,)
train_X: (4268, 1024)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_17_03_23_346016
start 141
filtering 141
aggregating 141
checking aggregation: 
             hmdb_ids    y  n_obs
1238    HMDB0002704+H  0.0      1
16066   HMDB0115082+K  0.0      4
5048    HMDB0008854+K  0.0      2
13544   HMDB0054879+K  0.0      3
6792   HMDB0009919+Na  0.0     25
(16868, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_03_25_731019
Printing histogram!
hist_plots/2020_04_06_17_03_25_910668
joining X 141
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_03_27_830653
splitting 141
X_df_shapes:
(3892, 1024)
(1672, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 111038}
modeling 141
ml :  XGBoost
train_y: (3892,)
train_w: (3892,)
train_X: (3892, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_03_53_030332
sta

  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7159, 1826)
(3495, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 45, 'X': 6}
modeling 144
ml :  XGBoost
train_y: (7159,)
train_w: (7159,)
train_X: (7159, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_06_28_889238
start 145
filtering 145
aggregating 145
checking aggregation: 
             hmdb_ids    y  n_obs
11577  HMDB0067861+Na  0.0      1
11389   HMDB0062378+K  0.0      1
8337   HMDB0042178+Na  0.0      2
5326    HMDB0009465+H  5.0     13
1330   HMDB0005461+Na  0.0      2
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_06_31_198330
Printing histogram!
hist_plots/2020_04_06_17_06_31_382289
joining X 145
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_06_32_055865
splitting 145


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7004, 1826)
(3213, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 39, 'X': 1}
modeling 145
ml :  XGBoost
train_y: (7004,)
train_w: (7004,)
train_X: (7004, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_08_07_028681
start 146
filtering 146
aggregating 146
checking aggregation: 
             hmdb_ids    y  n_obs
10864   HMDB0054237+H  0.0     23
7271   HMDB0012105+Na  0.0     70
8788    HMDB0043895+K  0.0      2
7506   HMDB0012443+Na  0.0      5
3910   HMDB0008565+Na  5.0     37
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_08_09_402494
Printing histogram!
hist_plots/2020_04_06_17_08_09_593098
joining X 146
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_08_10_246983
splitting 146


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6749, 1826)
(3800, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 44, 'X': 3}
modeling 146
ml :  XGBoost
train_y: (6749,)
train_w: (6749,)
train_X: (6749, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_09_44_120653
start 147
filtering 147
aggregating 147
checking aggregation: 
             hmdb_ids    y  n_obs
712     HMDB0001397+H  0.0     18
12319  HMDB0112636+Na  0.0      1
8998    HMDB0044312+H  0.0      5
1086    HMDB0004872+K  0.0     17
12589  HMDB0114854+Na  0.0      3
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_09_46_420963
Printing histogram!
hist_plots/2020_04_06_17_09_46_596302
joining X 147
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_09_47_252391
splitting 147


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6082, 1826)
(4000, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 44, 'X': 1}
modeling 147
ml :  XGBoost
train_y: (6082,)
train_w: (6082,)
train_X: (6082, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_11_08_967222
start 148
filtering 148
aggregating 148
checking aggregation: 
             hmdb_ids    y  n_obs
11179   HMDB0056321+K  0.0      4
3120    HMDB0008196+H  0.0     40
9903   HMDB0049104+Na  1.0      4
10782   HMDB0053862+K  0.0      6
10683   HMDB0053404+H  0.0     13
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_11_11_307786
Printing histogram!
hist_plots/2020_04_06_17_11_11_492768
joining X 148
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_11_12_156362
splitting 148


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7204, 1826)
(3384, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 46, 'X': 2}
modeling 148
ml :  XGBoost
train_y: (7204,)
train_w: (7204,)
train_X: (7204, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_12_50_744209
start 149
filtering 149
aggregating 149
checking aggregation: 
             hmdb_ids    y  n_obs
5512   HMDB0009593+Na  6.0      9
8760   HMDB0043631+Na  1.0      4
12294  HMDB0112582+Na  3.0      6
2631    HMDB0008017+H  2.0     31
8834   HMDB0043955+Na  0.0     78
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_12_53_019679
Printing histogram!
hist_plots/2020_04_06_17_12_53_204961
joining X 149
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_12_53_861261
splitting 149


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6873, 1826)
(3361, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 40, 'X': 7}
modeling 149
ml :  XGBoost
train_y: (6873,)
train_w: (6873,)
train_X: (6873, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_14_26_468923
start 150
filtering 150
aggregating 150
checking aggregation: 
             hmdb_ids    y  n_obs
2261    HMDB0007869+H  0.0    113
6501    HMDB0011208+H  0.0    145
8167   HMDB0037186+Na  0.0      1
3069   HMDB0008172+Na  1.0      6
12056  HMDB0112378+Na  2.0      3
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_14_28_820415
Printing histogram!
hist_plots/2020_04_06_17_14_28_999037
joining X 150
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_14_29_646919
splitting 150


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6805, 1826)
(3118, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 46, 'X': 1}
modeling 150
ml :  XGBoost
train_y: (6805,)
train_w: (6805,)
train_X: (6805, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_16_02_527967
start 151
filtering 151
aggregating 151
checking aggregation: 
            hmdb_ids     y  n_obs
3660   HMDB0008430+K   0.0     28
1519  HMDB0007057+Na  13.0     14
9777   HMDB0048067+H   0.0     13
4656   HMDB0009051+H   1.0     33
805   HMDB0002080+Na   0.0      2
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_16_04_767957
Printing histogram!
hist_plots/2020_04_06_17_16_04_944211
joining X 151
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_16_05_605176
splitting 151


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6892, 1826)
(3761, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 49, 'X': 2}
modeling 151
ml :  XGBoost
train_y: (6892,)
train_w: (6892,)
train_X: (6892, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_17_39_737137
start 152
filtering 152
aggregating 152
checking aggregation: 
             hmdb_ids     y  n_obs
1021    HMDB0003797+K   0.0      1
9686    HMDB0047002+H   0.0      1
3801    HMDB0008508+H  80.0    133
954    HMDB0003290+Na   0.0      1
11984  HMDB0112313+Na   1.0      1
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_17_42_065969
Printing histogram!
hist_plots/2020_04_06_17_17_42_246783
joining X 152
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_17_42_654449
splitting 152


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6415, 1024)
(3720, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 41, 'X': 248162}
modeling 152
ml :  XGBoost
train_y: (6415,)
train_w: (6415,)
train_X: (6415, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_18_24_419603
start 153
filtering 153
aggregating 153
checking aggregation: 
             hmdb_ids    y  n_obs
10060  HMDB0049781+Na  0.0      5
5004   HMDB0009249+Na  1.0     24
13579      msmls113+H  0.0    102
9848    HMDB0048698+K  0.0      2
6567    HMDB0011235+K  0.0     28
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_18_26_671724
Printing histogram!
hist_plots/2020_04_06_17_18_26_849401
joining X 153
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_18_27_243138
splitting 153


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7058, 1024)
(3623, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 50, 'X': 344566}
modeling 153
ml :  XGBoost
train_y: (7058,)
train_w: (7058,)
train_X: (7058, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_19_13_200196
start 154
filtering 154
aggregating 154
checking aggregation: 
            hmdb_ids    y  n_obs
9919  HMDB0049156+Na  0.0      5
655    HMDB0001185+H  0.0     43
8353   HMDB0042204+K  4.0    109
4517  HMDB0008963+Na  0.0      1
559    HMDB0000884+K  0.0     15
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_19_15_446922
Printing histogram!
hist_plots/2020_04_06_17_19_15_621501
joining X 154
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_19_16_018464
splitting 154


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7227, 1024)
(3238, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 40, 'X': 250247}
modeling 154
ml :  XGBoost
train_y: (7227,)
train_w: (7227,)
train_X: (7227, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_20_02_830612
start 155
filtering 155
aggregating 155
checking aggregation: 
             hmdb_ids     y  n_obs
722    HMDB0001438+Na   0.0      1
2361    HMDB0007905+H   4.0     31
13591     msmls183+Na   0.0      3
2300    HMDB0007882+K   2.0     32
7015   HMDB0011495+Na  74.0     90
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_20_05_097523
Printing histogram!
hist_plots/2020_04_06_17_20_05_274257
joining X 155
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_20_05_668099
splitting 155


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6861, 1024)
(3672, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 43, 'X': 317061}
modeling 155
ml :  XGBoost
train_y: (6861,)
train_w: (6861,)
train_X: (6861, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_20_50_805187
start 156
filtering 156
aggregating 156
checking aggregation: 
             hmdb_ids     y  n_obs
3027    HMDB0008157+K   1.0     10
6740   HMDB0011306+Na   0.0     20
12819  HMDB0115013+Na  20.0     87
13427  HMDB0115563+Na   0.0      4
6041   HMDB0010430+Na   0.0    116
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_20_53_085378
Printing histogram!
hist_plots/2020_04_06_17_20_53_256186
joining X 156
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_20_53_647351
splitting 156


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6781, 1024)
(2847, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 48, 'X': 182108}
modeling 156
ml :  XGBoost
train_y: (6781,)
train_w: (6781,)
train_X: (6781, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_21_36_992624
start 157
filtering 157
aggregating 157
checking aggregation: 
             hmdb_ids    y  n_obs
10641  HMDB0053037+Na  0.0      3
1539    HMDB0007082+H  0.0      1
2125   HMDB0007585+Na  0.0      4
7077    HMDB0011524+K  0.0     69
10521  HMDB0052485+Na  0.0      1
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_21_39_240599
Printing histogram!
hist_plots/2020_04_06_17_21_39_416480
joining X 157
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_21_39_799370
splitting 157


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6114, 1024)
(3863, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 48, 'X': 284277}
modeling 157
ml :  XGBoost
train_y: (6114,)
train_w: (6114,)
train_X: (6114, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_22_19_676173
start 158
filtering 158
aggregating 158
checking aggregation: 
             hmdb_ids     y  n_obs
3274   HMDB0008263+Na  34.0    168
1250    HMDB0005412+H   0.0    102
11199   HMDB0059629+K   1.0      1
9982    HMDB0049447+H   0.0     13
1261   HMDB0005424+Na   0.0     60
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_22_21_915777
Printing histogram!
hist_plots/2020_04_06_17_22_22_089589
joining X 158
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_22_22_480882
splitting 158


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6752, 1024)
(3327, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 52, 'X': 247571}
modeling 158
ml :  XGBoost
train_y: (6752,)
train_w: (6752,)
train_X: (6752, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_23_06_212769
start 159
filtering 159
aggregating 159
checking aggregation: 
            hmdb_ids    y  n_obs
9064  HMDB0044474+Na  0.0      4
709    HMDB0001392+K  0.0      1
8001   HMDB0031648+K  0.0      1
2836  HMDB0008091+Na  1.0     67
5821   HMDB0009900+H  0.0      5
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_23_08_504412
Printing histogram!
hist_plots/2020_04_06_17_23_08_682223
joining X 159
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_23_09_072252
splitting 159


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6182, 1024)
(3448, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 46, 'X': 226864}
modeling 159
ml :  XGBoost
train_y: (6182,)
train_w: (6182,)
train_X: (6182, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_23_49_444305
start 160
filtering 160
aggregating 160
checking aggregation: 
            hmdb_ids     y  n_obs
3734   HMDB0008466+H  88.0    120
3427   HMDB0008324+K  18.0     20
9749  HMDB0047892+Na   0.0    114
8302  HMDB0042061+Na   0.0      4
1894   HMDB0007352+K   0.0      4
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_23_51_732138
Printing histogram!
hist_plots/2020_04_06_17_23_51_910537
joining X 160
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_23_52_445253
splitting 160


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3886, 1826)
(1933, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 41, 'X': 2}
modeling 160
ml :  XGBoost
train_y: (3886,)
train_w: (3886,)
train_X: (3886, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_24_44_817306
start 161
filtering 161
aggregating 161
checking aggregation: 
             hmdb_ids     y  n_obs
7722    HMDB0013449+H   0.0      1
3212   HMDB0008234+Na  63.0    171
10167   HMDB0050307+H   0.0     17
13063   HMDB0115221+K   1.0      1
3492    HMDB0008348+H   3.0      4
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_24_47_109923
Printing histogram!
hist_plots/2020_04_06_17_24_47_291968
joining X 161
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_24_47_790494
splitting 161


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3672, 1826)
(2335, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 44, 'X': 0}
modeling 161
ml :  XGBoost
train_y: (3672,)
train_w: (3672,)
train_X: (3672, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_25_37_261809
start 162
filtering 162
aggregating 162
checking aggregation: 
             hmdb_ids     y  n_obs
4141    HMDB0008696+K  11.0    104
10304   HMDB0050808+H   0.0     23
6956   HMDB0011460+Na   9.0    169
13472   HMDB0115584+K   0.0      9
9963    HMDB0049389+H   0.0      3
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_25_39_524002
Printing histogram!
hist_plots/2020_04_06_17_25_39_700896
joining X 162
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_25_40_206952
splitting 162


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4033, 1826)
(2117, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 48, 'X': 0}
modeling 162
ml :  XGBoost
train_y: (4033,)
train_w: (4033,)
train_X: (4033, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_26_35_153017
start 163
filtering 163
aggregating 163
checking aggregation: 
             hmdb_ids     y  n_obs
5636    HMDB0009796+H   0.0      2
8088   HMDB0034074+Na   0.0      1
12527  HMDB0114796+Na   0.0     12
2404   HMDB0007922+Na  38.0     55
12295   HMDB0112583+H   3.0      7
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_26_37_457619
Printing histogram!
hist_plots/2020_04_06_17_26_37_637504
joining X 163
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_26_38_139969
splitting 163


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4049, 1826)
(2183, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 40, 'X': 0}
modeling 163
ml :  XGBoost
train_y: (4049,)
train_w: (4049,)
train_X: (4049, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_27_31_405900
start 164
filtering 164
aggregating 164
checking aggregation: 
             hmdb_ids      y  n_obs
3294    HMDB0008271+H  108.0    116
2087    HMDB0007536+H    4.0      4
2523    HMDB0007978+H  101.0    172
12214  HMDB0112530+Na    3.0      6
10131   HMDB0050218+H    0.0     23
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_27_33_650654
Printing histogram!
hist_plots/2020_04_06_17_27_33_829233
joining X 164
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_27_34_329321
splitting 164


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4012, 1826)
(2084, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 45, 'X': 0}
modeling 164
ml :  XGBoost
train_y: (4012,)
train_w: (4012,)
train_X: (4012, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_28_29_471333
start 165
filtering 165
aggregating 165
checking aggregation: 
             hmdb_ids     y  n_obs
1307    HMDB0005445+H   0.0      1
6160   HMDB0010500+Na   0.0      1
7790    HMDB0015655+H   0.0      6
1492   HMDB0007031+Na  23.0     58
12168   HMDB0112504+H   5.0      6
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_28_31_714600
Printing histogram!
hist_plots/2020_04_06_17_28_31_892219
joining X 165
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_28_32_395946
splitting 165


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4328, 1826)
(1775, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 39, 'X': 2}
modeling 165
ml :  XGBoost
train_y: (4328,)
train_w: (4328,)
train_X: (4328, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_29_29_254299
start 166
filtering 166
aggregating 166
checking aggregation: 
             hmdb_ids    y  n_obs
662    HMDB0001201+Na  0.0      2
12680  HMDB0114930+Na  1.0      6
1378    HMDB0006270+H  2.0      3
4714    HMDB0009072+K  0.0     12
5523   HMDB0009601+Na  1.0      1
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_29_31_572764
Printing histogram!
hist_plots/2020_04_06_17_29_31_749144
joining X 166
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_29_32_251916
splitting 166


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4416, 1826)
(2000, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 46, 'X': 0}
modeling 166
ml :  XGBoost
train_y: (4416,)
train_w: (4416,)
train_X: (4416, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_30_32_109768
start 167
filtering 167
aggregating 167
checking aggregation: 
            hmdb_ids    y  n_obs
3988  HMDB0008603+Na  0.0      3
968    HMDB0003355+H  5.0      9
3414  HMDB0008316+Na  0.0      9
2129   HMDB0007599+H  0.0      1
1887   HMDB0007346+K  0.0     14
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_30_34_408224
Printing histogram!
hist_plots/2020_04_06_17_30_34_587985
joining X 167
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_30_35_089257
splitting 167


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3888, 1826)
(2114, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 48, 'X': 1}
modeling 167
ml :  XGBoost
train_y: (3888,)
train_w: (3888,)
train_X: (3888, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_31_29_303276
start 168
filtering 168
aggregating 168
checking aggregation: 
             hmdb_ids     y  n_obs
10660   HMDB0053314+H   0.0     13
10272   HMDB0050754+H   0.0     23
10564  HMDB0052646+Na   0.0      1
3969    HMDB0008597+H  65.0     65
8131   HMDB0035255+Na   0.0      1
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_31_31_617024
Printing histogram!
hist_plots/2020_04_06_17_31_31_792819
joining X 168
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_31_32_127619
splitting 168
X_df_shapes:
(4001, 1024)
(2209, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 48, 'X': 154644}
modeling 168
ml :  XGBoost
train_y: (4001,)
train_w: (4001,)
train_X: (4001, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_31_58_308406
start 169
filtering 169
aggregating 169
checking aggregation: 
             hmdb_ids    y  n_obs
12052  HMDB0112375+Na  6.0      6
2487    HMDB0007965+H  0.0    113
136     HMDB0000168+H  1.0      1
7891    HMDB0029224+K  0.0      3
6730    HMDB0011302+H  0.0     10
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_32_00_580367
Printing histogram!
hist_plots/2020_04_06_17_32_00_755543
joining X 169
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_32_01_075779


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 169
X_df_shapes:
(4369, 1024)
(1746, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 39, 'X': 104354}
modeling 169
ml :  XGBoost
train_y: (4369,)
train_w: (4369,)
train_X: (4369, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_32_29_540550
start 170
filtering 170
aggregating 170
checking aggregation: 
           hmdb_ids     y  n_obs
5573  HMDB0009690+H   0.0      3
6585  HMDB0011242+K  18.0     20
7046  HMDB0011513+H   0.0     15
1456  HMDB0006851+H   1.0      1
8085  HMDB0033951+K   0.0      1
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_32_31_820047
Printing histogram!
hist_plots/2020_04_06_17_32_34_063170
joining X 170
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_32_34_371150
splitting 170
X_df_shapes:
(3821, 1024)
(2034, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 47, 'X': 121288}
modeling 170
ml :  XGBoost
train_y: (3821,)
train_w: (3821,)
train_X: (3821, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_32_59_424754
start 171
filtering 171
aggregating 171
checking aggregation: 
            hmdb_ids     y  n_obs
7254   HMDB0012100+H  29.0    107
1114   HMDB0004956+H  16.0     16
4372   HMDB0008887+H   0.0     26
9919  HMDB0049156+Na   0.0      5
8320  HMDB0042163+Na   0.0      4
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_33_01_676444
Printing histogram!
hist_plots/2020_04_06_17_33_01_844395
joining X 171
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_33_02_157467


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 171
X_df_shapes:
(4089, 1024)
(1955, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 41, 'X': 140509}
modeling 171
ml :  XGBoost
train_y: (4089,)
train_w: (4089,)
train_X: (4089, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_33_28_172684
start 172
filtering 172
aggregating 172
checking aggregation: 
             hmdb_ids    y  n_obs
8725    HMDB0043426+K  0.0      2
12205  HMDB0112524+Na  2.0      7
12944   HMDB0115096+K  0.0      4
9797    HMDB0048403+K  0.0      1
10486   HMDB0051809+K  0.0      4
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_33_30_408130
Printing histogram!
hist_plots/2020_04_06_17_33_30_581507
joining X 172
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_33_30_896432
splitting 172
X_df_shapes:
(4053, 1024)
(2203, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 46, 'X': 159618}
modeling 172
ml :  XGBoost
train_y: (4053,)
train_w: (4053,)
train_X: (4053, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_33_57_285141
start 173
filtering 173
aggregating 173
checking aggregation: 
            hmdb_ids    y  n_obs
12889  HMDB0115066+K  0.0     14
9854   HMDB0048763+H  5.0     22
8121   HMDB0034561+K  0.0      1
972    HMDB0003363+K  0.0      1
9980   HMDB0049442+K  0.0      3
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_33_59_521098
Printing histogram!
hist_plots/2020_04_06_17_33_59_700805
joining X 173
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_34_00_012899


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 173
X_df_shapes:
(4306, 1024)
(1968, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 42, 'X': 138643}
modeling 173
ml :  XGBoost
train_y: (4306,)
train_w: (4306,)
train_X: (4306, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_34_27_570610
start 174
filtering 174
aggregating 174
checking aggregation: 
             hmdb_ids     y  n_obs
10886   HMDB0054320+K   0.0      3
11461  HMDB0062738+Na   3.0      3
7312    HMDB0012331+K   0.0      2
13045   HMDB0115209+K  15.0    103
7350   HMDB0012355+Na   1.0      3
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_34_29_858813
Printing histogram!
hist_plots/2020_04_06_17_34_30_026747
joining X 174
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_34_30_336440


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 174
X_df_shapes:
(4050, 1024)
(2300, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 41, 'X': 165313}
modeling 174
ml :  XGBoost
train_y: (4050,)
train_w: (4050,)
train_X: (4050, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_34_56_561279
start 175
filtering 175
aggregating 175
checking aggregation: 
             hmdb_ids    y  n_obs
5913   HMDB0010375+Na  0.0      5
10930   HMDB0054684+H  0.0      1
7560    HMDB0013205+H  0.0      2
8746   HMDB0043546+Na  0.0     13
5354    HMDB0009485+H  6.0    151
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_34_58_779968
Printing histogram!
hist_plots/2020_04_06_17_34_58_950872
joining X 175
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_34_59_265180


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 175
X_df_shapes:
(4062, 1024)
(2005, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 49, 'X': 136587}
modeling 175
ml :  XGBoost
train_y: (4062,)
train_w: (4062,)
train_X: (4062, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_35_25_606924
start 176
filtering 176
aggregating 176
checking aggregation: 
            hmdb_ids     y  n_obs
2685   HMDB0008036+H  67.0     67
9269   HMDB0045131+H   0.0     10
2864   HMDB0008101+H  29.0    168
657    HMDB0001189+H   0.0      1
10814  HMDB0054005+H   0.0      4
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_35_27_855347
Printing histogram!
hist_plots/2020_04_06_17_35_28_024572
joining X 176
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_35_28_496950
splitting 176
X_df_shapes:
(2584, 1826)
(1768, 1826)
Testing split, n overlap train/te

Printing regression actual versus predicted output!
model_plots/2020_04_06_17_41_27_816213
start 185
filtering 185
aggregating 185
checking aggregation: 
             hmdb_ids     y  n_obs
9954    HMDB0049288+H   0.0     17
11304  HMDB0061706+Na   1.0      1
9147   HMDB0044737+Na   0.0      2
4147    HMDB0008718+H  20.0    109
12082   HMDB0112429+H   1.0      1
(13614, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_41_30_045187
Printing histogram!
hist_plots/2020_04_06_17_41_30_210824
joining X 185
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_41_30_504067
splitting 185
X_df_shapes:
(3233, 1024)
(1666, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 38, 'X': 132204}
modeling 185
ml :  XGBoost
train_y: (3233,)
train_w: (3233,)
train_X: (3233, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_41_51_655271
start 

  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6804, 1826)
(3224, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 2}
modeling 192
ml :  XGBoost
train_y: (6804,)
train_w: (6804,)
train_X: (6804, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_45_34_746578
start 193
filtering 193
aggregating 193
checking aggregation: 
            hmdb_ids      y  n_obs
5166   HMDB0009486+K    0.0      8
2359   HMDB0007981+H  105.0    145
3602   HMDB0008498+H    8.0    142
1776  HMDB0007377+Na    0.0      1
9428   HMDB0047892+K    2.0    105
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_45_36_975346
Printing histogram!
hist_plots/2020_04_06_17_45_37_150291
joining X 193
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_45_37_771331
splitting 193


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6454, 1826)
(3278, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 31, 'X': 3}
modeling 193
ml :  XGBoost
train_y: (6454,)
train_w: (6454,)
train_X: (6454, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_46_59_237685
start 194
filtering 194
aggregating 194
checking aggregation: 
             hmdb_ids    y  n_obs
579    HMDB0001127+Na  0.0      1
10169   HMDB0051815+K  0.0      3
7515    HMDB0028757+K  0.0      1
7192    HMDB0012417+K  0.0      1
1387   HMDB0007032+Na  0.0      2
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_47_01_451713
Printing histogram!
hist_plots/2020_04_06_17_47_01_620447
joining X 194
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_47_02_244617
splitting 194


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6263, 1826)
(3974, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 3}
modeling 194
ml :  XGBoost
train_y: (6263,)
train_w: (6263,)
train_X: (6263, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_48_24_572932
start 195
filtering 195
aggregating 195
checking aggregation: 
            hmdb_ids    y  n_obs
5002   HMDB0009360+H  1.0      4
4237   HMDB0008910+H  0.0     33
9734  HMDB0049756+Na  0.0     92
9974   HMDB0050790+H  0.0     23
3380  HMDB0008383+Na  0.0      1
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_48_26_825709
Printing histogram!
hist_plots/2020_04_06_17_48_26_994060
joining X 195
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_48_27_618067
splitting 195


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6059, 1826)
(3806, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 22, 'X': 5}
modeling 195
ml :  XGBoost
train_y: (6059,)
train_w: (6059,)
train_X: (6059, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_49_42_703954
start 196
filtering 196
aggregating 196
checking aggregation: 
            hmdb_ids    y  n_obs
1299   HMDB0006460+H  0.0     19
849    HMDB0003141+K  0.0      1
10669  HMDB0055168+H  0.0      1
7814   HMDB0035159+H  0.0     15
639    HMDB0001348+K  0.0    154
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_49_44_906481
Printing histogram!
hist_plots/2020_04_06_17_49_45_081834
joining X 196
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_49_45_705882
splitting 196


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5905, 1826)
(3295, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 4}
modeling 196
ml :  XGBoost
train_y: (5905,)
train_w: (5905,)
train_X: (5905, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_51_04_322562
start 197
filtering 197
aggregating 197
checking aggregation: 
             hmdb_ids    y  n_obs
1743   HMDB0007338+Na  0.0      1
12423   HMDB0115038+H  0.0      7
903     HMDB0003416+H  2.0     13
6570   HMDB0011330+Na  0.0      1
12902   HMDB0115525+K  0.0      2
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_51_06_577872
Printing histogram!
hist_plots/2020_04_06_17_51_06_751105
joining X 197
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_51_07_388907
splitting 197


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6477, 1826)
(3535, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 29, 'X': 2}
modeling 197
ml :  XGBoost
train_y: (6477,)
train_w: (6477,)
train_X: (6477, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_52_29_286133
start 198
filtering 198
aggregating 198
checking aggregation: 
            hmdb_ids    y  n_obs
7202   HMDB0012423+K  1.0     30
7892  HMDB0038242+Na  0.0      2
3677   HMDB0008534+K  0.0      7
4118   HMDB0008837+H  0.0      5
9494   HMDB0048496+K  0.0    110
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_52_31_496436
Printing histogram!
hist_plots/2020_04_06_17_52_31_667695
joining X 198
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_52_32_297264
splitting 198


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(7063, 1826)
(3123, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 30, 'X': 5}
modeling 198
ml :  XGBoost
train_y: (7063,)
train_w: (7063,)
train_X: (7063, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_54_03_699236
start 199
filtering 199
aggregating 199
checking aggregation: 
             hmdb_ids    y  n_obs
10720   HMDB0055466+H  0.0     47
13122   HMDB0127846+K  0.0      1
833     HMDB0002931+K  0.0      2
8576    HMDB0044098+H  0.0      2
11725  HMDB0112450+Na  0.0      4
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_54_05_944985
Printing histogram!
hist_plots/2020_04_06_17_54_06_119989
joining X 199
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_54_06_747558
splitting 199


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6255, 1826)
(3632, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 29, 'X': 2}
modeling 199
ml :  XGBoost
train_y: (6255,)
train_w: (6255,)
train_X: (6255, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_55_29_472550
start 200
filtering 200
aggregating 200
checking aggregation: 
             hmdb_ids    y  n_obs
13154      msmls367+K  2.0     26
8238    HMDB0042641+H  0.0      1
30     HMDB0000034+Na  0.0      2
10343  HMDB0053315+Na  0.0      2
11411  HMDB0108576+Na  0.0     20
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_55_31_686755
Printing histogram!
hist_plots/2020_04_06_17_55_31_854686
joining X 200
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_55_32_237940
splitting 200


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6685, 1024)
(3323, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 302535}
modeling 200
ml :  XGBoost
train_y: (6685,)
train_w: (6685,)
train_X: (6685, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_56_14_781919
start 201
filtering 201
aggregating 201
checking aggregation: 
             hmdb_ids    y  n_obs
2834   HMDB0008149+Na  0.0     14
10910  HMDB0060038+Na  0.0      1
1853    HMDB0007453+K  0.0      1
4743    HMDB0009210+H  0.0      3
12848   HMDB0115504+K  0.0     18
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_56_17_017489
Printing histogram!
hist_plots/2020_04_06_17_56_17_192281
joining X 201
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_56_17_560363
splitting 201


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6801, 1024)
(3703, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 30, 'X': 375121}
modeling 201
ml :  XGBoost
train_y: (6801,)
train_w: (6801,)
train_X: (6801, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_57_00_680985
start 202
filtering 202
aggregating 202
checking aggregation: 
            hmdb_ids     y  n_obs
7740  HMDB0032443+Na   0.0      2
2626   HMDB0008079+K  12.0    133
3717  HMDB0008558+Na  37.0     84
3935   HMDB0008686+H   0.0     83
8863   HMDB0044871+H   0.0     10
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_57_02_890580
Printing histogram!
hist_plots/2020_04_06_17_57_05_457141
joining X 202
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_57_05_837859
splitting 202


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6650, 1024)
(3865, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 378473}
modeling 202
ml :  XGBoost
train_y: (6650,)
train_w: (6650,)
train_X: (6650, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_57_48_385555
start 203
filtering 203
aggregating 203
checking aggregation: 
            hmdb_ids     y  n_obs
4241   HMDB0008920+H   0.0      4
9513   HMDB0048634+H   0.0     10
8257   HMDB0042684+H   0.0     60
4434  HMDB0009012+Na  21.0     21
1216  HMDB0005445+Na   0.0      2
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_57_50_562784
Printing histogram!
hist_plots/2020_04_06_17_57_50_728475
joining X 203
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_57_51_096383
splitting 203


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6931, 1024)
(3296, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 249891}
modeling 203
ml :  XGBoost
train_y: (6931,)
train_w: (6931,)
train_X: (6931, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_58_35_167038
start 204
filtering 204
aggregating 204
checking aggregation: 
             hmdb_ids    y  n_obs
6371    HMDB0011248+K  0.0      2
4177    HMDB0008875+H  0.0     99
13006   HMDB0115572+K  0.0      1
6350    HMDB0011241+K  3.0      7
333    HMDB0000565+Na  0.0      1
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_58_37_359682
Printing histogram!
hist_plots/2020_04_06_17_58_37_534217
joining X 204
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_58_37_905718
splitting 204


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6418, 1024)
(3272, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 227747}
modeling 204
ml :  XGBoost
train_y: (6418,)
train_w: (6418,)
train_X: (6418, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_17_59_19_169430
start 205
filtering 205
aggregating 205
checking aggregation: 
             hmdb_ids    y  n_obs
6344    HMDB0011238+K  0.0      1
12411   HMDB0115024+H  0.0      3
122    HMDB0000168+Na  0.0      2
2200    HMDB0007909+H  0.0      5
6707   HMDB0011439+Na  0.0     33
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_17_59_21_372557
Printing histogram!
hist_plots/2020_04_06_17_59_21_543076
joining X 205
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_17_59_21_916898
splitting 205


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6658, 1024)
(3285, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 30, 'X': 264073}
modeling 205
ml :  XGBoost
train_y: (6658,)
train_w: (6658,)
train_X: (6658, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_00_04_732318
start 206
filtering 206
aggregating 206
checking aggregation: 
             hmdb_ids     y  n_obs
9000   HMDB0045321+Na   0.0     78
6211   HMDB0011128+Na  40.0    146
10559  HMDB0054295+Na   0.0      4
10450   HMDB0053796+K   0.0      3
12267   HMDB0114932+H   0.0      3
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_00_06_901589
Printing histogram!
hist_plots/2020_04_06_18_00_07_069714
joining X 206
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_00_07_450211
splitting 206


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6487, 1024)
(3705, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 29, 'X': 314966}
modeling 206
ml :  XGBoost
train_y: (6487,)
train_w: (6487,)
train_X: (6487, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_00_49_358928
start 207
filtering 207
aggregating 207
checking aggregation: 
            hmdb_ids    y  n_obs
6198   HMDB0010732+H  0.0      4
7068   HMDB0012331+H  3.0     11
2596   HMDB0008068+K  1.0    157
2597  HMDB0008068+Na  3.0    168
4245  HMDB0008922+Na  0.0      5
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_00_51_596273
Printing histogram!
hist_plots/2020_04_06_18_00_51_769819
joining X 207
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_00_52_142838
splitting 207


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5877, 1024)
(4150, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 33, 'X': 292794}
modeling 207
ml :  XGBoost
train_y: (5877,)
train_w: (5877,)
train_X: (5877, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_01_30_528287
start 208
filtering 208
aggregating 208
checking aggregation: 
            hmdb_ids    y  n_obs
8271   HMDB0042720+K  0.0      2
670   HMDB0001487+Na  0.0      8
8285  HMDB0042764+Na  0.0    102
8904  HMDB0044923+Na  0.0      2
9176   HMDB0046086+H  0.0     14
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_01_32_747204
Printing histogram!
hist_plots/2020_04_06_18_01_32_919173
joining X 208
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_01_33_426172
splitting 208


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4049, 1826)
(1635, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 2}
modeling 208
ml :  XGBoost
train_y: (4049,)
train_w: (4049,)
train_X: (4049, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_02_23_572383
start 209
filtering 209
aggregating 209
checking aggregation: 
             hmdb_ids     y  n_obs
8310    HMDB0042844+K   0.0      1
9126   HMDB0045767+Na   0.0      6
4381    HMDB0008993+H  18.0    105
12614  HMDB0115209+Na   6.0    111
3978    HMDB0008721+H   0.0     71
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_02_25_791303
Printing histogram!
hist_plots/2020_04_06_18_02_25_960408
joining X 209
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_02_26_454103
splitting 209


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4014, 1826)
(2027, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 0}
modeling 209
ml :  XGBoost
train_y: (4014,)
train_w: (4014,)
train_X: (4014, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_03_16_302533
start 210
filtering 210
aggregating 210
checking aggregation: 
            hmdb_ids    y  n_obs
567    HMDB0001072+H  0.0      1
1746  HMDB0007340+Na  1.0      1
9072   HMDB0045584+K  0.0      2
1791  HMDB0007394+Na  1.0     22
7196  HMDB0012420+Na  0.0      3
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_03_18_501344
Printing histogram!
hist_plots/2020_04_06_18_03_18_678393
joining X 210
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_03_19_171361
splitting 210


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4197, 1826)
(1742, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 1}
modeling 210
ml :  XGBoost
train_y: (4197,)
train_w: (4197,)
train_X: (4197, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_04_13_250930
start 211
filtering 211
aggregating 211
checking aggregation: 
             hmdb_ids    y  n_obs
11405  HMDB0108536+Na  0.0     22
1868    HMDB0007476+H  4.0      4
1669    HMDB0007272+K  0.0      9
6451    HMDB0011283+K  0.0     14
7051    HMDB0012265+K  0.0      1
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_04_15_461754
Printing histogram!
hist_plots/2020_04_06_18_04_15_635278
joining X 211
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_04_16_128494
splitting 211


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3906, 1826)
(1980, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 0}
modeling 211
ml :  XGBoost
train_y: (3906,)
train_w: (3906,)
train_X: (3906, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_05_04_104780
start 212
filtering 212
aggregating 212
checking aggregation: 
            hmdb_ids     y  n_obs
5079   HMDB0009401+K   0.0      5
7201   HMDB0012423+H   3.0     18
8180   HMDB0042531+K   0.0      1
4597  HMDB0009108+Na  21.0     21
5787   HMDB0010419+K   0.0     12
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_05_06_371310
Printing histogram!
hist_plots/2020_04_06_18_05_06_546318
joining X 212
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_05_07_038719
splitting 212


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4027, 1826)
(2079, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 29, 'X': 1}
modeling 212
ml :  XGBoost
train_y: (4027,)
train_w: (4027,)
train_X: (4027, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_06_00_096918
start 213
filtering 213
aggregating 213
checking aggregation: 
            hmdb_ids    y  n_obs
8967  HMDB0045185+Na  0.0     78
9781   HMDB0050040+K  0.0      2
5280  HMDB0009581+Na  0.0      4
8712   HMDB0044367+K  0.0      1
5795  HMDB0010423+Na  0.0    116
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_06_02_299900
Printing histogram!
hist_plots/2020_04_06_18_06_02_472262
joining X 213
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_06_02_975709
splitting 213


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4015, 1826)
(2093, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 0}
modeling 213
ml :  XGBoost
train_y: (4015,)
train_w: (4015,)
train_X: (4015, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_06_51_363014
start 214
filtering 214
aggregating 214
checking aggregation: 
             hmdb_ids    y  n_obs
9523    HMDB0048655+K  2.0    105
10046   HMDB0051168+H  0.0     17
9998    HMDB0050858+K  0.0      3
9974    HMDB0050790+H  0.0     23
10625  HMDB0054823+Na  0.0      3
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_06_53_581894
Printing histogram!
hist_plots/2020_04_06_18_06_53_764402
joining X 214
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_06_54_258844
splitting 214


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3877, 1826)
(1995, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 0}
modeling 214
ml :  XGBoost
train_y: (3877,)
train_w: (3877,)
train_X: (3877, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_07_44_868021
start 215
filtering 215
aggregating 215
checking aggregation: 
            hmdb_ids     y  n_obs
3472  HMDB0008426+Na   2.0     10
2733   HMDB0008116+H   8.0    142
6954   HMDB0011716+H   1.0     10
771    HMDB0002264+K   1.0      1
2605   HMDB0008071+K  25.0    116
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_07_47_130995
Printing histogram!
hist_plots/2020_04_06_18_07_47_309421
joining X 215
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_07_47_798224
splitting 215


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3939, 1826)
(2153, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 0}
modeling 215
ml :  XGBoost
train_y: (3939,)
train_w: (3939,)
train_X: (3939, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_08_38_905050
start 216
filtering 216
aggregating 216
checking aggregation: 
           hmdb_ids    y  n_obs
2065  HMDB0007859+K  0.0     94
4631  HMDB0009134+H  1.0      6
2194  HMDB0007906+K  0.0     13
1160  HMDB0005421+K  2.0     16
9311  HMDB0046834+H  0.0      3
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_08_41_298083
Printing histogram!
hist_plots/2020_04_06_18_08_41_480689
joining X 216
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_08_41_814423


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 216
X_df_shapes:
(4154, 1024)
(1885, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 136847}
modeling 216
ml :  XGBoost
train_y: (4154,)
train_w: (4154,)
train_X: (4154, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_09_08_782425
start 217
filtering 217
aggregating 217
checking aggregation: 
             hmdb_ids    y  n_obs
12772   HMDB0115332+K  0.0     31
2463   HMDB0008018+Na  2.0     12
5045   HMDB0009386+Na  0.0      4
5888    HMDB0010458+H  0.0      1
1872    HMDB0007478+K  0.0      4
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_09_11_131029
Printing histogram!
hist_plots/2020_04_06_18_09_11_313401
joining X 217
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_09_11_630810


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 217
X_df_shapes:
(4422, 1024)
(1869, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 139267}
modeling 217
ml :  XGBoost
train_y: (4422,)
train_w: (4422,)
train_X: (4422, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_09_40_136726
start 218
filtering 218
aggregating 218
checking aggregation: 
            hmdb_ids     y  n_obs
4263  HMDB0008929+Na   0.0      3
5009  HMDB0009362+Na  10.0     18
5887  HMDB0010457+Na   0.0      2
9463   HMDB0048183+K   0.0      1
1981   HMDB0007628+K   0.0      4
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_09_42_349453
Printing histogram!
hist_plots/2020_04_06_18_09_42_531796
joining X 218
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_09_42_834450


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 218
X_df_shapes:
(4173, 1024)
(1820, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 31, 'X': 131871}
modeling 218
ml :  XGBoost
train_y: (4173,)
train_w: (4173,)
train_X: (4173, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_10_09_981628
start 219
filtering 219
aggregating 219
checking aggregation: 
             hmdb_ids    y  n_obs
3936    HMDB0008686+K  0.0     11
11424  HMDB0109669+Na  0.0      3
10836   HMDB0056283+H  0.0      1
3088   HMDB0008260+Na  0.0     17
2233    HMDB0007923+K  0.0      1
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_10_12_171517
Printing histogram!
hist_plots/2020_04_06_18_10_12_342557
joining X 219
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_10_12_647760


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 219
X_df_shapes:
(4456, 1024)
(1733, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 22, 'X': 150037}
modeling 219
ml :  XGBoost
train_y: (4456,)
train_w: (4456,)
train_X: (4456, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_10_40_799871
start 220
filtering 220
aggregating 220
checking aggregation: 
             hmdb_ids    y  n_obs
1499   HMDB0007138+Na  3.0     15
4061   HMDB0008788+Na  2.0     12
11303  HMDB0094659+Na  0.0      1
13020  HMDB0115578+Na  0.0      1
3627    HMDB0008511+H  0.0      5
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_10_43_006470
Printing histogram!
hist_plots/2020_04_06_18_10_43_177115
joining X 220
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_10_43_484855


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 220
X_df_shapes:
(3973, 1024)
(2129, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 156433}
modeling 220
ml :  XGBoost
train_y: (3973,)
train_w: (3973,)
train_X: (3973, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_11_09_068557
start 221
filtering 221
aggregating 221
checking aggregation: 
             hmdb_ids     y  n_obs
2797    HMDB0008137+K  10.0     82
3209    HMDB0008307+K   3.0     16
11436  HMDB0110037+Na   0.0      8
8349    HMDB0043062+H   0.0      1
10207  HMDB0052516+Na   0.0     78
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_11_11_268328
Printing histogram!
hist_plots/2020_04_06_18_11_11_442285
joining X 221
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_11_11_748771


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 221
X_df_shapes:
(3904, 1024)
(2106, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 21, 'X': 137952}
modeling 221
ml :  XGBoost
train_y: (3904,)
train_w: (3904,)
train_X: (3904, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_11_36_772955
start 222
filtering 222
aggregating 222
checking aggregation: 
            hmdb_ids     y  n_obs
8243  HMDB0042665+Na   0.0      1
2220   HMDB0007919+H  23.0     94
7473   HMDB0013475+K   0.0      1
4502   HMDB0009062+K   1.0     84
5448  HMDB0009815+Na   0.0     80
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_11_38_982293
Printing histogram!
hist_plots/2020_04_06_18_11_39_155246
joining X 222
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_11_39_459477


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 222
X_df_shapes:
(3730, 1024)
(2375, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 34, 'X': 160605}
modeling 222
ml :  XGBoost
train_y: (3730,)
train_w: (3730,)
train_X: (3730, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_12_03_553629
start 223
filtering 223
aggregating 223
checking aggregation: 
            hmdb_ids     y  n_obs
9288   HMDB0046667+H   0.0      7
4044   HMDB0008761+K   0.0     17
4283  HMDB0008937+Na   9.0     71
2361  HMDB0007981+Na  21.0    115
1656   HMDB0007257+H   1.0      1
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_12_05_772670
Printing histogram!
hist_plots/2020_04_06_18_12_05_950806
joining X 223
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_12_06_258299


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 223
X_df_shapes:
(4007, 1024)
(1834, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 106353}
modeling 223
ml :  XGBoost
train_y: (4007,)
train_w: (4007,)
train_X: (4007, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_12_32_189255
start 224
filtering 224
aggregating 224
checking aggregation: 
             hmdb_ids    y  n_obs
1663    HMDB0007264+H  0.0      1
11181  HMDB0065129+Na  0.0     12
6060    HMDB0010616+K  1.0      1
1861   HMDB0007461+Na  0.0      1
3709   HMDB0008554+Na  0.0      8
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_12_34_411087
Printing histogram!
hist_plots/2020_04_06_18_12_34_585491
joining X 224
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_12_35_061216
splitting 224
X_df_shapes:
(2908, 1826)
(1515, 1826)
Testing split, n overlap train/te

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_13_49_150119
start 226
filtering 226
aggregating 226
checking aggregation: 
            hmdb_ids    y  n_obs
7390   HMDB0013426+H  0.0    145
8196   HMDB0042580+K  0.0      2
12790  HMDB0115401+H  0.0      2
3409   HMDB0008399+K  8.0     29
12959  HMDB0115547+H  0.0     63
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_13_51_356711
Printing histogram!
hist_plots/2020_04_06_18_13_51_536471
joining X 226
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_13_51_995912
splitting 226
X_df_shapes:
(2544, 1826)
(1786, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 0}
modeling 226
ml :  XGBoost
train_y: (2544,)
train_w: (2544,)
train_X: (2544, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_14_26_126220
start 227
filtering 227
agg

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_15_03_032197
start 228
filtering 228
aggregating 228
checking aggregation: 
             hmdb_ids     y  n_obs
3660    HMDB0008527+H   0.0      1
3162   HMDB0008290+Na   0.0      2
9164    HMDB0045996+K   0.0      3
12976  HMDB0115557+Na   0.0      5
6824    HMDB0011517+H  82.0    102
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_15_05_271243
Printing histogram!
hist_plots/2020_04_06_18_15_05_457840
joining X 228
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_15_05_946002
splitting 228
X_df_shapes:
(3049, 1826)
(1275, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 1}
modeling 228
ml :  XGBoost
train_y: (3049,)
train_w: (3049,)
train_X: (3049, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_15_45_265289
sta

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_16_19_987341
start 230
filtering 230
aggregating 230
checking aggregation: 
             hmdb_ids    y  n_obs
503     HMDB0000884+K  0.0     15
4079    HMDB0008799+H  0.0     17
5869    HMDB0010450+K  0.0     95
11429  HMDB0109814+Na  0.0     12
732     HMDB0002074+K  0.0      1
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_16_22_185209
Printing histogram!
hist_plots/2020_04_06_18_16_22_361500
joining X 230
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_16_22_811567
splitting 230
X_df_shapes:
(2973, 1826)
(1247, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 1}
modeling 230
ml :  XGBoost
train_y: (2973,)
train_w: (2973,)
train_X: (2973, 1826)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_17_01_760979
start 231
filtering 231
aggregating 231
checking aggregation: 
            hmdb_ids    y  n_obs
3537   HMDB0008457+K  0.0     19
4702   HMDB0009184+H  0.0      1
8730   HMDB0044422+K  0.0      1
749   HMDB0002122+Na  0.0     10
3879  HMDB0008637+Na  0.0      2
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_17_03_989834
Printing histogram!
hist_plots/2020_04_06_18_17_04_166922
joining X 231
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_17_04_626367
splitting 231
X_df_shapes:
(2700, 1826)
(1520, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 19, 'X': 1}
modeling 231
ml :  XGBoost
train_y: (2700,)
train_w: (2700,)
train_X: (2700, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_17_40_985511
start 

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_18_01_638711
start 233
filtering 233
aggregating 233
checking aggregation: 
             hmdb_ids     y  n_obs
13011   HMDB0115574+H   0.0     63
3916    HMDB0008660+H   8.0    142
4195   HMDB0008888+Na   0.0      7
5045   HMDB0009386+Na   0.0      4
2804   HMDB0008139+Na  45.0    103
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_18_03_902831
Printing histogram!
hist_plots/2020_04_06_18_18_04_076137
joining X 233
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_18_04_359319
splitting 233
X_df_shapes:
(2626, 1024)
(1551, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 92497}
modeling 233
ml :  XGBoost
train_y: (2626,)
train_w: (2626,)
train_X: (2626, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_18_21_436264
start 2

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_19_06_778016
start 236
filtering 236
aggregating 236
checking aggregation: 
            hmdb_ids     y  n_obs
1699   HMDB0007295+K   0.0      4
2617   HMDB0008075+K   0.0     28
6779  HMDB0011494+Na  49.0     65
7837   HMDB0036236+K   0.0      1
302    HMDB0000517+K   0.0      1
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_19_09_016743
Printing histogram!
hist_plots/2020_04_06_18_19_09_187212
joining X 236
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_19_09_479341
splitting 236
X_df_shapes:
(2700, 1024)
(1492, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 19, 'X': 101033}
modeling 236
ml :  XGBoost
train_y: (2700,)
train_w: (2700,)
train_X: (2700, 1024)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_19_27_007096
start 237
filtering 237
aggregating 237
checking aggregation: 
             hmdb_ids    y  n_obs
7262    HMDB0012838+H  0.0      1
13091  HMDB0116749+Na  0.0      5
2949    HMDB0008197+H  0.0      5
3012    HMDB0008225+H  0.0      8
3460   HMDB0008422+Na  1.0     81
(13161, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_19_29_238650
Printing histogram!
hist_plots/2020_04_06_18_19_29_411982
joining X 237
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_19_29_698357
splitting 237
X_df_shapes:
(2884, 1024)
(1716, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 121935}
modeling 237
ml :  XGBoost
train_y: (2884,)
train_w: (2884,)
train_X: (2884, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_19_51_024291
sta

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_20_36_471777
start 240
filtering 240
aggregating 240
checking aggregation: 
             hmdb_ids    y  n_obs
10480  HMDB0055341+Na  0.0      4
4392   HMDB0009086+Na  0.0      2
4167   HMDB0008961+Na  0.0      3
4716    HMDB0009269+H  1.0     25
8652    HMDB0044879+K  0.0     94
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_20_38_626556
Printing histogram!
hist_plots/2020_04_06_18_20_38_798447
joining X 240
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_20_39_441313
splitting 240


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6724, 1826)
(2551, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 3}
modeling 240
ml :  XGBoost
train_y: (6724,)
train_w: (6724,)
train_X: (6724, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_22_00_269927
start 241
filtering 241
aggregating 241
checking aggregation: 
             hmdb_ids    y  n_obs
1577   HMDB0007264+Na  0.0      1
3544   HMDB0008544+Na  0.0      1
12134   HMDB0115013+K  0.0     46
4968    HMDB0009461+H  1.0     25
2764    HMDB0008177+K  0.0    131
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_22_02_477125
Printing histogram!
hist_plots/2020_04_06_18_22_02_647124
joining X 241
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_22_03_268217
splitting 241


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6490, 1826)
(3041, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 21, 'X': 3}
modeling 241
ml :  XGBoost
train_y: (6490,)
train_w: (6490,)
train_X: (6490, 1826)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_23_24_951523
start 242
filtering 242
aggregating 242
checking aggregation: 
             hmdb_ids    y  n_obs
4428    HMDB0009104+H  1.0      4
3532   HMDB0008537+Na  0.0      5
9320    HMDB0048789+H  0.0     17
5579   HMDB0010397+Na  1.0      6
12228  HMDB0115080+Na  1.0    106
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_23_27_235973
Printing histogram!
hist_plots/2020_04_06_18_23_27_430596
joining X 242
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_23_28_046424
splitting 242


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6177, 1826)
(3760, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 2}
modeling 242
ml :  XGBoost
train_y: (6177,)
train_w: (6177,)
train_X: (6177, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_24_46_430744
start 243
filtering 243
aggregating 243
checking aggregation: 
             hmdb_ids    y  n_obs
3093   HMDB0008318+Na  0.0      1
12540  HMDB0115408+Na  0.0     47
6971   HMDB0012394+Na  0.0      1
11290  HMDB0112141+Na  0.0      1
10888   HMDB0062784+H  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_24_48_657878
Printing histogram!
hist_plots/2020_04_06_18_24_48_837064
joining X 243
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_24_49_450382
splitting 243


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6049, 1826)
(3321, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 4}
modeling 243
ml :  XGBoost
train_y: (6049,)
train_w: (6049,)
train_X: (6049, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_26_04_051165
start 244
filtering 244
aggregating 244
checking aggregation: 
             hmdb_ids    y  n_obs
4537   HMDB0009173+Na  7.0      8
4370    HMDB0009072+K  0.0     12
3892   HMDB0008762+Na  0.0     66
10570  HMDB0055913+Na  0.0      4
6506    HMDB0011392+K  0.0      7
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_26_06_271134
Printing histogram!
hist_plots/2020_04_06_18_26_06_448179
joining X 244
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_26_07_062900
splitting 244


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6351, 1826)
(3440, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 3}
modeling 244
ml :  XGBoost
train_y: (6351,)
train_w: (6351,)
train_X: (6351, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_27_26_510001
start 245
filtering 245
aggregating 245
checking aggregation: 
            hmdb_ids    y  n_obs
6558  HMDB0011475+Na  0.0     30
3701  HMDB0008628+Na  2.0     70
3876   HMDB0008752+H  0.0     71
3465  HMDB0008504+Na  1.0     89
3773   HMDB0008668+H  0.0     17
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_27_28_746868
Printing histogram!
hist_plots/2020_04_06_18_27_28_918852
joining X 245
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_27_29_529902
splitting 245


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6332, 1826)
(3185, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 2}
modeling 245
ml :  XGBoost
train_y: (6332,)
train_w: (6332,)
train_X: (6332, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_28_48_446324
start 246
filtering 246
aggregating 246
checking aggregation: 
             hmdb_ids    y  n_obs
12648  HMDB0115528+Na  0.0      2
7846    HMDB0042322+K  0.0     95
1020   HMDB0005357+Na  0.0     20
7150   HMDB0013404+Na  1.0     15
6633    HMDB0011511+K  0.0     10
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_28_50_689283
Printing histogram!
hist_plots/2020_04_06_18_28_50_864366
joining X 246
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_28_51_481992
splitting 246


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6635, 1826)
(3191, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 19, 'X': 3}
modeling 246
ml :  XGBoost
train_y: (6635,)
train_w: (6635,)
train_X: (6635, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_30_15_400860
start 247
filtering 247
aggregating 247
checking aggregation: 
            hmdb_ids    y  n_obs
5577   HMDB0010397+H  1.0      6
512   HMDB0000960+Na  0.0     13
7561  HMDB0033774+Na  0.0      1
9071   HMDB0046677+K  0.0      3
3657   HMDB0008605+H  0.0     17
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_30_17_634221
Printing histogram!
hist_plots/2020_04_06_18_30_17_806655
joining X 247
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_30_18_422702
splitting 247


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6489, 1826)
(3500, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 2}
modeling 247
ml :  XGBoost
train_y: (6489,)
train_w: (6489,)
train_X: (6489, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_31_40_116388
start 248
filtering 248
aggregating 248
checking aggregation: 
            hmdb_ids    y  n_obs
11785  HMDB0112785+H  0.0      2
418    HMDB0000755+H  2.0      6
315    HMDB0000562+H  0.0      7
12750  HMDB0115575+H  0.0     19
1891   HMDB0007709+K  0.0      4
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_31_42_310217
Printing histogram!
hist_plots/2020_04_06_18_31_42_488224
joining X 248
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_31_42_872783
splitting 248
X_df_shapes:
(5590, 1024)
(3989, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 281416}
modeling 248
ml :  XGBoost
train_y: (5590,)
train_w: (5590,)
train_X: (5590, 1024)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_32_18_957168
start 249
filtering 249
aggregating 249
checking aggregation: 
           hmdb_ids    y  n_obs
9995  HMDB0052543+K  0.0      6
3399  HMDB0008463+K  0.0     28
1896  HMDB0007733+H  4.0      5
811   HMDB0003193+K  0.0      3
9290  HMDB0048618+K  0.0     78
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_32_21_174691
Printing histogram!
hist_plots/2020_04_06_18_32_21_343580
joining X 249
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_32_21_717608
splitting 249


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6218, 1024)
(3252, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 19, 'X': 241678}
modeling 249
ml :  XGBoost
train_y: (6218,)
train_w: (6218,)
train_X: (6218, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_33_01_370676
start 250
filtering 250
aggregating 250
checking aggregation: 
             hmdb_ids    y  n_obs
9687   HMDB0050608+Na  0.0     56
11129  HMDB0106154+Na  0.0      2
7779   HMDB0042163+Na  0.0      4
9650    HMDB0050438+H  0.0     29
6710   HMDB0011563+Na  1.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_33_03_561748
Printing histogram!
hist_plots/2020_04_06_18_33_03_740564
joining X 250
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_33_04_123901
splitting 250


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6762, 1024)
(3032, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 222968}
modeling 250
ml :  XGBoost
train_y: (6762,)
train_w: (6762,)
train_X: (6762, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_33_47_025889
start 251
filtering 251
aggregating 251
checking aggregation: 
             hmdb_ids    y  n_obs
4248    HMDB0009002+K  0.0      2
6310   HMDB0011294+Na  0.0     12
1930   HMDB0007859+Na  0.0    161
12156   HMDB0115032+H  0.0     60
5839    HMDB0010586+H  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_33_49_219371
Printing histogram!
hist_plots/2020_04_06_18_33_49_393538
joining X 251
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_33_49_767227
splitting 251


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6848, 1024)
(3018, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 261833}
modeling 251
ml :  XGBoost
train_y: (6848,)
train_w: (6848,)
train_X: (6848, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_34_33_328842
start 252
filtering 252
aggregating 252
checking aggregation: 
             hmdb_ids     y  n_obs
4801   HMDB0009313+Na   0.0      3
9451    HMDB0049551+H   0.0     35
6210   HMDB0011253+Na   6.0    117
4421   HMDB0009099+Na  65.0     67
12830   HMDB0116754+H   0.0      7
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_34_35_700433
Printing histogram!
hist_plots/2020_04_06_18_34_35_874987
joining X 252
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_34_36_250581
splitting 252


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6430, 1024)
(2875, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 194899}
modeling 252
ml :  XGBoost
train_y: (6430,)
train_w: (6430,)
train_X: (6430, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_35_17_264726
start 253
filtering 253
aggregating 253
checking aggregation: 
            hmdb_ids     y  n_obs
794   HMDB0003000+Na   0.0      1
5507   HMDB0010221+H   0.0      1
3585   HMDB0008568+H  10.0     28
5616  HMDB0010415+Na   0.0     98
7234   HMDB0013442+K   0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_35_19_468576
Printing histogram!
hist_plots/2020_04_06_18_35_19_636882
joining X 253
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_35_20_026686
splitting 253


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6056, 1024)
(3214, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 234108}
modeling 253
ml :  XGBoost
train_y: (6056,)
train_w: (6056,)
train_X: (6056, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_35_58_344314
start 254
filtering 254
aggregating 254
checking aggregation: 
             hmdb_ids    y  n_obs
3230    HMDB0008383+H  0.0     17
7063   HMDB0012601+Na  0.0      1
11808  HMDB0112798+Na  0.0      3
9267   HMDB0048466+Na  0.0    114
315     HMDB0000562+H  0.0      7
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_36_00_561796
Printing histogram!
hist_plots/2020_04_06_18_36_00_737654
joining X 254
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_36_01_115040
splitting 254


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(6657, 1024)
(3044, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 21, 'X': 258761}
modeling 254
ml :  XGBoost
train_y: (6657,)
train_w: (6657,)
train_X: (6657, 1024)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_36_43_282742
start 255
filtering 255
aggregating 255
checking aggregation: 
             hmdb_ids     y  n_obs
11264  HMDB0112125+Na   0.0      4
9362   HMDB0049104+Na   0.0      3
3708    HMDB0008631+H  38.0     86
4678    HMDB0009256+H   0.0     43
7741   HMDB0040900+Na   0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_36_45_487550
Printing histogram!
hist_plots/2020_04_06_18_36_45_658044
joining X 255
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_36_46_047132
splitting 255


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5957, 1024)
(3862, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 315376}
modeling 255
ml :  XGBoost
train_y: (5957,)
train_w: (5957,)
train_X: (5957, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_37_24_502462
start 256
filtering 256
aggregating 256
checking aggregation: 
             hmdb_ids    y  n_obs
7663   HMDB0037647+Na  0.0      1
5514    HMDB0010325+K  0.0      1
8631   HMDB0044788+Na  0.0      1
8422   HMDB0044256+Na  0.0    118
12163  HMDB0115038+Na  0.0      2
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_37_26_689550
Printing histogram!
hist_plots/2020_04_06_18_37_26_865354
joining X 256
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_37_27_362169
splitting 256


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3396, 1826)
(1895, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 1}
modeling 256
ml :  XGBoost
train_y: (3396,)
train_w: (3396,)
train_X: (3396, 1826)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_38_10_417578
start 257
filtering 257
aggregating 257
checking aggregation: 
             hmdb_ids    y  n_obs
9602   HMDB0050234+Na  0.0      1
7844   HMDB0042321+Na  0.0    118
3291   HMDB0008409+Na  2.0    107
12495  HMDB0115326+Na  6.0     73
3099    HMDB0008321+K  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_38_12_638305
Printing histogram!
hist_plots/2020_04_06_18_38_12_822435
joining X 257
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_38_13_314304
splitting 257


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4041, 1826)
(1313, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 1}
modeling 257
ml :  XGBoost
train_y: (4041,)
train_w: (4041,)
train_X: (4041, 1826)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_39_02_068368
start 258
filtering 258
aggregating 258
checking aggregation: 
            hmdb_ids    y  n_obs
6098   HMDB0011209+H  0.0     78
4304   HMDB0009042+K  1.0     16
2730  HMDB0008165+Na  5.0     10
3615  HMDB0008589+Na  0.0      2
8576   HMDB0044686+H  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_39_04_250782
Printing histogram!
hist_plots/2020_04_06_18_39_04_422713
joining X 258
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_39_04_902484
splitting 258


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3986, 1826)
(2282, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 14, 'X': 0}
modeling 258
ml :  XGBoost
train_y: (3986,)
train_w: (3986,)
train_X: (3986, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_39_53_967384
start 259
filtering 259
aggregating 259
checking aggregation: 
             hmdb_ids    y  n_obs
1517    HMDB0007214+K  0.0     32
10934  HMDB0064415+Na  0.0      1
1530    HMDB0007220+K  0.0     34
3447   HMDB0008496+Na  0.0      5
12463   HMDB0115306+H  0.0     63
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_39_56_147180
Printing histogram!
hist_plots/2020_04_06_18_39_56_316857
joining X 259
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_39_56_800000
splitting 259


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4117, 1826)
(1782, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 1}
modeling 259
ml :  XGBoost
train_y: (4117,)
train_w: (4117,)
train_X: (4117, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_40_44_719957
start 260
filtering 260
aggregating 260
checking aggregation: 
            hmdb_ids     y  n_obs
7821  HMDB0042220+Na   0.0      4
3243  HMDB0008393+Na   6.0    117
4604   HMDB0009223+K   0.0     37
2992   HMDB0008279+K   0.0     52
2510   HMDB0008088+H  38.0     86
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_40_46_913172
Printing histogram!
hist_plots/2020_04_06_18_40_47_081354
joining X 260
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_40_47_557908
splitting 260


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3957, 1826)
(1799, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 0}
modeling 260
ml :  XGBoost
train_y: (3957,)
train_w: (3957,)
train_X: (3957, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_41_37_394111
start 261
filtering 261
aggregating 261
checking aggregation: 
            hmdb_ids    y  n_obs
9866   HMDB0051326+H  0.0     35
3997  HMDB0008862+Na  0.0      2
6702  HMDB0011553+Na  0.0      1
6935   HMDB0012373+H  0.0      1
5922  HMDB0010630+Na  0.0     10
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_41_39_613762
Printing histogram!
hist_plots/2020_04_06_18_41_39_788717
joining X 261
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_41_40_265201
splitting 261


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3548, 1826)
(1912, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 0}
modeling 261
ml :  XGBoost
train_y: (3548,)
train_w: (3548,)
train_X: (3548, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_42_23_722147
start 262
filtering 262
aggregating 262
checking aggregation: 
            hmdb_ids    y  n_obs
342   HMDB0000626+Na  0.0      4
4921  HMDB0009401+Na  0.0      7
5616  HMDB0010415+Na  0.0     98
1760   HMDB0007482+K  0.0      1
7765  HMDB0042068+Na  0.0      5
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_42_25_866503
Printing histogram!
hist_plots/2020_04_06_18_42_26_040108
joining X 262
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_42_26_527776
splitting 262


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3957, 1826)
(1853, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 0}
modeling 262
ml :  XGBoost
train_y: (3957,)
train_w: (3957,)
train_X: (3957, 1826)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_43_14_322861
start 263
filtering 263
aggregating 263
checking aggregation: 
             hmdb_ids    y  n_obs
7147   HMDB0013403+Na  0.0      4
10836   HMDB0062452+H  0.0      1
1204   HMDB0005849+Na  0.0      3
8751    HMDB0045205+K  0.0      2
6996    HMDB0012414+H  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_43_16_605568
Printing histogram!
hist_plots/2020_04_06_18_43_16_779455
joining X 263
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_43_17_257389
splitting 263


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3855, 1826)
(2020, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 2}
modeling 263
ml :  XGBoost
train_y: (3855,)
train_w: (3855,)
train_X: (3855, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_44_08_100921
start 264
filtering 264
aggregating 264
checking aggregation: 
            hmdb_ids    y  n_obs
3212   HMDB0008371+K  0.0     96
4282   HMDB0009018+H  0.0     76
4794  HMDB0009306+Na  3.0      3
5563   HMDB0010392+H  1.0      4
3794  HMDB0008690+Na  0.0      5
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_44_10_291017
Printing histogram!
hist_plots/2020_04_06_18_44_10_461698
joining X 264
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_44_10_774536


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 264
X_df_shapes:
(3338, 1024)
(2510, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 19, 'X': 166607}
modeling 264
ml :  XGBoost
train_y: (3338,)
train_w: (3338,)
train_X: (3338, 1024)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_44_32_121286
start 265
filtering 265
aggregating 265
checking aggregation: 
            hmdb_ids    y  n_obs
4809   HMDB0009330+K  1.0     16
11950  HMDB0114878+H  0.0      2
11960  HMDB0114882+H  0.0      1
3543   HMDB0008544+H  0.0     17
3951   HMDB0008832+H  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_44_34_325394
Printing histogram!
hist_plots/2020_04_06_18_44_34_499929
joining X 265
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_44_34_804829


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 265
X_df_shapes:
(3975, 1024)
(1775, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 14, 'X': 132606}
modeling 265
ml :  XGBoost
train_y: (3975,)
train_w: (3975,)
train_X: (3975, 1024)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_45_00_025504
start 266
filtering 266
aggregating 266
checking aggregation: 
           hmdb_ids    y  n_obs
1731  HMDB0007447+K  0.0     34
8251  HMDB0043897+K  0.0      1
2880  HMDB0008229+H  0.0      5
332   HMDB0000600+K  0.0      1
1509  HMDB0007206+K  0.0      4
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_45_02_222659
Printing histogram!
hist_plots/2020_04_06_18_45_02_398479
joining X 266
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_45_02_708028


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 266
X_df_shapes:
(3990, 1024)
(1737, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 128402}
modeling 266
ml :  XGBoost
train_y: (3990,)
train_w: (3990,)
train_X: (3990, 1024)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_45_27_942029
start 267
filtering 267
aggregating 267
checking aggregation: 
           hmdb_ids     y  n_obs
4752  HMDB0009288+K   0.0      7
983   HMDB0004975+H  19.0     21
6442  HMDB0011355+K   0.0      3
3952  HMDB0008833+H   0.0     66
39    HMDB0000050+H   2.0      6
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_45_30_174863
Printing histogram!
hist_plots/2020_04_06_18_45_30_345031
joining X 267
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_45_30_642786


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 267
X_df_shapes:
(4104, 1024)
(1785, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 125150}
modeling 267
ml :  XGBoost
train_y: (4104,)
train_w: (4104,)
train_X: (4104, 1024)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_45_56_561212
start 268
filtering 268
aggregating 268
checking aggregation: 
            hmdb_ids    y  n_obs
2129   HMDB0007938+K  1.0     63
8014  HMDB0042635+Na  0.0    102
2138   HMDB0007941+K  3.0      4
8457   HMDB0044312+H  0.0      5
3941   HMDB0008826+H  0.0      7
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_45_58_730038
Printing histogram!
hist_plots/2020_04_06_18_45_58_903900
joining X 268
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_45_59_203954


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 268
X_df_shapes:
(3737, 1024)
(1569, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 21, 'X': 70139}
modeling 268
ml :  XGBoost
train_y: (3737,)
train_w: (3737,)
train_X: (3737, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_46_23_514273
start 269
filtering 269
aggregating 269
checking aggregation: 
             hmdb_ids    y  n_obs
20      HMDB0000024+K  0.0      2
11783   HMDB0112782+K  0.0      3
4390   HMDB0009085+Na  0.0      5
9538   HMDB0049867+Na  0.0     78
2114    HMDB0007933+H  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_46_25_700885
Printing histogram!
hist_plots/2020_04_06_18_46_25_870672
joining X 269
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_46_26_177170


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 269
X_df_shapes:
(3482, 1024)
(2077, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 129719}
modeling 269
ml :  XGBoost
train_y: (3482,)
train_w: (3482,)
train_X: (3482, 1024)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_46_49_072228
start 270
filtering 270
aggregating 270
checking aggregation: 
             hmdb_ids    y  n_obs
12641   HMDB0115526+K  0.0      2
10815  HMDB0062378+Na  0.0      1
9046   HMDB0046460+Na  0.0      5
69     HMDB0000086+Na  0.0    124
11730   HMDB0112716+H  1.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_46_51_264720
Printing histogram!
hist_plots/2020_04_06_18_46_51_432082
joining X 270
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_46_51_734154


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 270
X_df_shapes:
(3896, 1024)
(1914, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 119970}
modeling 270
ml :  XGBoost
train_y: (3896,)
train_w: (3896,)
train_X: (3896, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_47_17_184727
start 271
filtering 271
aggregating 271
checking aggregation: 
             hmdb_ids    y  n_obs
5324   HMDB0009834+Na  0.0      3
5064    HMDB0009543+H  0.0      1
3142   HMDB0008339+Na  9.0    147
12042   HMDB0114954+K  0.0      4
5071    HMDB0009548+K  3.0      4
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_47_19_379720
Printing histogram!
hist_plots/2020_04_06_18_47_19_551375
joining X 271
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_47_19_854654


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


splitting 271
X_df_shapes:
(3720, 1024)
(2258, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 153987}
modeling 271
ml :  XGBoost
train_y: (3720,)
train_w: (3720,)
train_X: (3720, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_47_43_801110
start 272
filtering 272
aggregating 272
checking aggregation: 
            hmdb_ids     y  n_obs
8310  HMDB0044029+Na   0.0      4
8457   HMDB0044312+H   0.0      5
7500  HMDB0031679+Na   0.0      5
3688   HMDB0008624+K   0.0     28
3576   HMDB0008565+H  52.0     52
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_47_46_035651
Printing histogram!
hist_plots/2020_04_06_18_47_46_210309
joining X 272
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_47_46_686562
splitting 272
X_df_shapes:
(2812, 1826)
(1419, 1826)
Testing split, n overlap train/te

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_48_19_626204
start 273
filtering 273
aggregating 273
checking aggregation: 
             hmdb_ids    y  n_obs
9335    HMDB0048983+H  0.0     48
4615    HMDB0009227+H  0.0      7
10207   HMDB0053706+H  0.0      7
7320   HMDB0028722+Na  0.0      2
4193   HMDB0008974+Na  0.0     33
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_48_21_797225
Printing histogram!
hist_plots/2020_04_06_18_48_21_971264
joining X 273
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_48_22_419252
splitting 273
X_df_shapes:
(2839, 1826)
(1275, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 10, 'X': 0}
modeling 273
ml :  XGBoost
train_y: (2839,)
train_w: (2839,)
train_X: (2839, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_48_56_087210
start 274
filteri

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_49_30_132999
start 275
filtering 275
aggregating 275
checking aggregation: 
             hmdb_ids    y  n_obs
12478  HMDB0115313+Na  0.0     18
2615    HMDB0008124+K  0.0     17
6405    HMDB0011334+K  0.0     74
594     HMDB0001266+K  0.0      2
11259   HMDB0112123+K  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_49_32_409086
Printing histogram!
hist_plots/2020_04_06_18_49_32_584645
joining X 275
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_49_33_031541
splitting 275
X_df_shapes:
(3150, 1826)
(1337, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 0}
modeling 275
ml :  XGBoost
train_y: (3150,)
train_w: (3150,)
train_X: (3150, 1826)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_50_08_563901
start 276
filtering 276
aggregating 276
checking aggregation: 
            hmdb_ids    y  n_obs
238    HMDB0000408+K  0.0      1
4378   HMDB0009078+K  1.0     30
5647  HMDB0010428+Na  0.0    118
337   HMDB0000610+Na  0.0     27
7226  HMDB0013437+Na  0.0      2
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_50_10_807127
Printing histogram!
hist_plots/2020_04_06_18_50_10_983332
joining X 276
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_50_11_426358
splitting 276
X_df_shapes:
(2739, 1826)
(1482, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 0}
modeling 276
ml :  XGBoost
train_y: (2739,)
train_w: (2739,)
train_X: (2739, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_50_44_780840
start 277
filtering 277
aggregating 277
checking aggregation: 
             hmdb_ids    y  n_obs
10744   HMDB0061880+K  0.0      1
9500   HMDB0049743+Na  0.0    100
3759    HMDB0008659+K  0.0    131
5126    HMDB0009583+K  7.0    107
892     HMDB0003871+H  0.0      1
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_50_46_966211
Printing histogram!
hist_plots/2020_04_06_18_50_47_137559
joining X 277
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_50_47_584646
splitting 277
X_df_shapes:
(3116, 1826)
(1312, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 0}
modeling 277
ml :  XGBoost
train_y: (3116,)
train_w: (3116,)
train_X: (3116, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_51_21_000108
start 278

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_53_44_532577
start 283
filtering 283
aggregating 283
checking aggregation: 
            hmdb_ids    y  n_obs
3839  HMDB0008726+Na  0.0     14
7921  HMDB0042438+Na  0.0     94
8239   HMDB0043867+H  0.0      1
3102   HMDB0008324+K  4.0      6
7173   HMDB0013415+H  0.0     89
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_53_46_766099
Printing histogram!
hist_plots/2020_04_06_18_53_46_940948
joining X 283
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_53_47_223213
splitting 283
X_df_shapes:
(2686, 1024)
(1403, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 11, 'X': 81234}
modeling 283
ml :  XGBoost
train_y: (2686,)
train_w: (2686,)
train_X: (2686, 1024)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_54_04_205040
start 284
filtering 284
aggregating 284
checking aggregation: 
             hmdb_ids    y  n_obs
6006    HMDB0010685+K  0.0      1
4159   HMDB0008957+Na  0.0      5
11913   HMDB0114849+H  2.0     28
8760    HMDB0045239+K  0.0    110
3206    HMDB0008369+K  0.0    131
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_54_06_391681
Printing histogram!
hist_plots/2020_04_06_18_54_06_567969
joining X 284
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_54_06_848865
splitting 284
X_df_shapes:
(2951, 1024)
(1335, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 92164}
modeling 284
ml :  XGBoost
train_y: (2951,)
train_w: (2951,)
train_X: (2951, 1024)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_54_26_109278
start 285
filtering 285
aggregating 285
checking aggregation: 
             hmdb_ids    y  n_obs
12162   HMDB0115038+H  0.0      7
3528   HMDB0008535+Na  0.0     32
6751    HMDB0011695+H  0.0     80
12517   HMDB0115337+H  0.0     53
8943    HMDB0045996+K  0.0      3
(12896, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_54_28_293909
Printing histogram!
hist_plots/2020_04_06_18_54_28_470320
joining X 285
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_54_28_755743
splitting 285
X_df_shapes:
(2973, 1024)
(1103, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 14, 'X': 61449}
modeling 285
ml :  XGBoost
train_y: (2973,)
train_w: (2973,)
train_X: (2973, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_54_47_995863
star

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_18_55_26_342615
start 288
filtering 288
aggregating 288
checking aggregation: 
            hmdb_ids    y  n_obs
5654   HMDB0032358+H  0.0      2
4817   HMDB0011292+H  0.0     75
3238  HMDB0009002+Na  5.0      5
8142   HMDB0112515+K  0.0     21
7300   HMDB0053040+H  0.0      1
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_55_28_362516
Printing histogram!
hist_plots/2020_04_06_18_55_28_538775
joining X 288
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_55_29_075056
splitting 288


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4482, 1826)
(2277, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 2}
modeling 288
ml :  XGBoost
train_y: (4482,)
train_w: (4482,)
train_X: (4482, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_56_28_955503
start 289
filtering 289
aggregating 289
checking aggregation: 
           hmdb_ids     y  n_obs
1343  HMDB0007945+K   0.0      4
2871  HMDB0008718+K   0.0     13
4362  HMDB0010428+K   2.0     57
5552  HMDB0028901+H   1.0      1
2773  HMDB0008630+H  79.0     84
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_56_31_142881
Printing histogram!
hist_plots/2020_04_06_18_56_31_319509
joining X 289
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_56_31_840687
splitting 289


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4528, 1826)
(1984, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 25, 'X': 2}
modeling 289
ml :  XGBoost
train_y: (4528,)
train_w: (4528,)
train_X: (4528, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_57_32_990822
start 290
filtering 290
aggregating 290
checking aggregation: 
            hmdb_ids    y  n_obs
6963  HMDB0049731+Na  1.0      1
7453   HMDB0055245+H  0.0      1
178   HMDB0000476+Na  0.0     11
5455   HMDB0013413+K  0.0      3
7484   HMDB0055616+H  0.0      1
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_57_35_058161
Printing histogram!
hist_plots/2020_04_06_18_57_35_235428
joining X 290
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_57_35_758324
splitting 290


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4367, 1826)
(2402, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 4}
modeling 290
ml :  XGBoost
train_y: (4367,)
train_w: (4367,)
train_X: (4367, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_58_35_638956
start 291
filtering 291
aggregating 291
checking aggregation: 
            hmdb_ids    y  n_obs
592   HMDB0004973+Na  0.0      1
4993  HMDB0011439+Na  1.0      9
8241   HMDB0112641+H  1.0      2
8277   HMDB0112719+H  0.0     15
3653   HMDB0009281+H  0.0      1
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_58_37_721976
Printing histogram!
hist_plots/2020_04_06_18_58_37_899678
joining X 291
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_58_38_414759
splitting 291


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4955, 1826)
(2207, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 3}
modeling 291
ml :  XGBoost
train_y: (4955,)
train_w: (4955,)
train_X: (4955, 1826)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_18_59_41_625952
start 292
filtering 292
aggregating 292
checking aggregation: 
            hmdb_ids    y  n_obs
369    HMDB0001348+K  0.0    116
1969  HMDB0008194+Na  3.0     18
3889   HMDB0009492+H  3.0     11
2392   HMDB0008398+K  6.0    139
2888   HMDB0008725+K  2.0     41
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_18_59_43_647786
Printing histogram!
hist_plots/2020_04_06_18_59_43_822881
joining X 292
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_18_59_44_346125
splitting 292


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3956, 1826)
(2791, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 1}
modeling 292
ml :  XGBoost
train_y: (3956,)
train_w: (3956,)
train_X: (3956, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_00_38_709077
start 293
filtering 293
aggregating 293
checking aggregation: 
            hmdb_ids     y  n_obs
4892   HMDB0011327+H   0.0     23
6225   HMDB0044319+K   0.0      1
1399  HMDB0007977+Na  35.0     53
5123   HMDB0011537+K   0.0      1
345    HMDB0001235+H   0.0      2
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_00_40_765661
Printing histogram!
hist_plots/2020_04_06_19_00_40_942460
joining X 293
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_00_41_465512
splitting 293


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(5007, 1826)
(2251, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 22, 'X': 3}
modeling 293
ml :  XGBoost
train_y: (5007,)
train_w: (5007,)
train_X: (5007, 1826)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_01_45_687539
start 294
filtering 294
aggregating 294
checking aggregation: 
            hmdb_ids    y  n_obs
5718  HMDB0037498+Na  0.0      1
3398   HMDB0009101+H  2.0      6
4693   HMDB0011234+K  0.0      6
8340  HMDB0114783+Na  1.0      1
6205  HMDB0044278+Na  0.0      1
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_01_47_719084
Printing histogram!
hist_plots/2020_04_06_19_01_47_894473
joining X 294
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_01_48_414111
splitting 294


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4542, 1826)
(2205, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 2}
modeling 294
ml :  XGBoost
train_y: (4542,)
train_w: (4542,)
train_X: (4542, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_02_49_587394
start 295
filtering 295
aggregating 295
checking aggregation: 
            hmdb_ids     y  n_obs
8265  HMDB0112708+Na   4.0     41
6520  HMDB0045767+Na   0.0      2
1660   HMDB0008075+K   0.0      5
3752  HMDB0009363+Na   8.0      8
2037   HMDB0008227+H  12.0     80
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_02_51_632659
Printing histogram!
hist_plots/2020_04_06_19_02_51_810612
joining X 295
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_02_52_338307
splitting 295


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4555, 1826)
(1899, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 2}
modeling 295
ml :  XGBoost
train_y: (4555,)
train_w: (4555,)
train_X: (4555, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_03_53_533984
start 296
filtering 296
aggregating 296
checking aggregation: 
            hmdb_ids    y  n_obs
5641   HMDB0032164+K  0.0      1
4223  HMDB0009918+Na  0.0      7
3211   HMDB0008992+K  1.0      5
5890   HMDB0042529+K  0.0     13
6627  HMDB0046461+Na  0.0      2
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_03_55_569283
Printing histogram!
hist_plots/2020_04_06_19_03_55_744756
joining X 296
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_03_56_074151
splitting 296
X_df_shapes:
(4201, 1024)
(2507, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 24, 'X': 151509}
modeling 296
ml :  XGBoost
train_y: (4201,)
train_w: (4201,)
train_X: (4201, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_04_22_905130
start 297
filtering 297
aggregating 297
checking aggregation: 
            hmdb_ids     y  n_obs
1792   HMDB0008122+H  40.0     53
3568   HMDB0009235+H   3.0     11
5262   HMDB0012273+H   0.0      1
3923  HMDB0009543+Na   0.0      3
2224  HMDB0008313+Na   4.0     75
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_04_24_986504
Printing histogram!
hist_plots/2020_04_06_19_04_25_164609
joining X 297
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_04_25_484640
splitting 297
X_df_shapes:
(4579, 1024)
(2180, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 151812}
modeling 297
ml :  XGBoost
train_y: (4579,)
train_w: (4579,)
train_X: (4579, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_04_54_673628
start 298
filtering 298
aggregating 298
checking aggregation: 
            hmdb_ids    y  n_obs
5603   HMDB0030997+H  0.0      2
8652   HMDB0115106+H  1.0     12
4160  HMDB0009869+Na  1.0      1
5000   HMDB0011446+H  0.0      1
7406   HMDB0054214+H  0.0     18
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_04_56_688088
Printing histogram!
hist_plots/2020_04_06_19_04_56_861899
joining X 298
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_04_57_181412
splitting 298
X_df_shapes:
(4535, 1024)
(2330, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 21, 'X': 133657}
modeling 298
ml :  XGBoost
train_y: (4535,)
train_w: (4535,)
train_X: (4535, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_05_26_465573
start 299
filtering 299
aggregating 299
checking aggregation: 
            hmdb_ids    y  n_obs
7651  HMDB0063171+Na  0.0      2
8302   HMDB0112772+K  0.0      7
6877   HMDB0049222+H  0.0     12
6375  HMDB0044958+Na  0.0     49
8177   HMDB0112547+H  1.0      1
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_05_28_493163
Printing histogram!
hist_plots/2020_04_06_19_05_28_665258
joining X 299
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_05_28_984389
splitting 299
X_df_shapes:
(4592, 1024)
(1962, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 140028}
modeling 299
ml :  XGBoost
train_y: (4592,)
train_w: (4592,)
train_X: (4592, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_05_58_159919
start 300
filtering 300
aggregating 300
checking aggregation: 
            hmdb_ids    y  n_obs
6166   HMDB0044132+K  0.0      1
3328   HMDB0009063+H  0.0      1
7157  HMDB0051161+Na  0.0      1
7600   HMDB0062432+H  0.0     10
645    HMDB0005376+K  0.0     17
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_06_00_161007
Printing histogram!
hist_plots/2020_04_06_19_06_00_338848
joining X 300
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_06_00_655050
splitting 300
X_df_shapes:
(4595, 1024)
(2147, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 27, 'X': 128818}
modeling 300
ml :  XGBoost
train_y: (4595,)
train_w: (4595,)
train_X: (4595, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_06_29_840088
start 301
filtering 301
aggregating 301
checking aggregation: 
            hmdb_ids     y  n_obs
8660  HMDB0115112+Na   1.0      1
4014  HMDB0009611+Na  79.0     94
6339   HMDB0044875+H   0.0     21
5420   HMDB0013329+H   1.0      9
3509   HMDB0009198+K   0.0      6
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_06_31_883679
Printing histogram!
hist_plots/2020_04_06_19_06_32_063448
joining X 301
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_06_32_380769
splitting 301
X_df_shapes:
(4336, 1024)
(2293, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 26, 'X': 146573}
modeling 301
ml :  XGBoost
train_y: (4336,)
train_w: (4336,)
train_X: (4336, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_07_00_519379
start 302
filtering 302
aggregating 302
checking aggregation: 
           hmdb_ids    y  n_obs
5424  HMDB0013338+H  0.0     95
3096  HMDB0008925+H  8.0     30
4575  HMDB0010665+H  0.0      1
6686  HMDB0047000+H  0.0      1
1470  HMDB0008005+K  1.0    128
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_07_02_563913
Printing histogram!
hist_plots/2020_04_06_19_07_02_742362
joining X 302
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_07_03_062278
splitting 302
X_df_shapes:
(4415, 1024)
(2318, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 144216}
modeling 302
ml :  XGBoost
train_y: (4415,)
train_w: (4415,)
train_X: (4415, 1024)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_07_31_855955
start 303
filtering 303
aggregating 303
checking aggregation: 
            hmdb_ids     y  n_obs
3033   HMDB0008874+K   1.0      5
3296   HMDB0009051+K   0.0      5
2040   HMDB0008228+H  12.0     80
7656  HMDB0063486+Na   0.0     29
6670   HMDB0046875+H   0.0      4
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_07_33_934085
Printing histogram!
hist_plots/2020_04_06_19_07_34_119061
joining X 303
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_07_34_453514
splitting 303
X_df_shapes:
(4362, 1024)
(2466, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 142700}
modeling 303
ml :  XGBoost
train_y: (4362,)
train_w: (4362,)
train_X: (4362, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_08_02_544536
start 304
filtering 304
aggregating 304
checking aggregation: 
            hmdb_ids     y  n_obs
6029  HMDB0043240+Na   1.0      1
4673   HMDB0011220+H  12.0     80
3043   HMDB0008881+H   2.0     62
6488  HMDB0045673+Na   0.0     49
8998  HMDB0115595+Na   1.0      1
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_08_04_547861
Printing histogram!
hist_plots/2020_04_06_19_08_04_721551
joining X 304
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_08_05_181658
splitting 304
X_df_shapes:
(2607, 1826)
(1328, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 23, 'X': 0}
mo

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_10_33_636464
start 308
filtering 308
aggregating 308
checking aggregation: 
            hmdb_ids     y  n_obs
3297  HMDB0009051+Na   0.0     18
7663  HMDB0063700+Na   0.0      1
4148   HMDB0009859+H   0.0      2
1338  HMDB0007943+Na  17.0     49
5723  HMDB0037830+Na   1.0      1
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_10_35_611464
Printing histogram!
hist_plots/2020_04_06_19_10_35_790721
joining X 308
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_10_36_237961
splitting 308
X_df_shapes:
(2726, 1826)
(1029, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 20, 'X': 0}
modeling 308
ml :  XGBoost
train_y: (2726,)
train_w: (2726,)
train_X: (2726, 1826)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_11_12_659011
start 309


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_13_38_905964
start 314
filtering 314
aggregating 314
checking aggregation: 
            hmdb_ids     y  n_obs
3868  HMDB0009483+Na  36.0     59
245    HMDB0000719+H   1.0      1
5191   HMDB0011714+H   0.0      1
2164  HMDB0008290+Na   0.0      2
6100   HMDB0043910+K   1.0     61
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_13_40_930018
Printing histogram!
hist_plots/2020_04_06_19_13_41_110557
joining X 314
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_13_41_390360
splitting 314
X_df_shapes:
(2683, 1024)
(1136, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 28, 'X': 48828}
modeling 314
ml :  XGBoost
train_y: (2683,)
train_w: (2683,)
train_X: (2683, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_13_59_197085
start 315
filterin

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_15_38_969383
start 320
filtering 320
aggregating 320
checking aggregation: 
            hmdb_ids    y  n_obs
5890   HMDB0042529+K  0.0     13
2916  HMDB0008748+Na  0.0      3
650   HMDB0005378+Na  0.0     98
456    HMDB0002434+H  0.0      8
7752   HMDB0068866+K  1.0     16
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_15_40_981854
Printing histogram!
hist_plots/2020_04_06_19_15_41_162735
joining X 320
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_15_41_588529
splitting 320
X_df_shapes:
(1977, 1826)
(809, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 0}
modeling 320
ml :  XGBoost
train_y: (1977,)
train_w: (1977,)
train_X: (1977, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_16_07_498265
start 321
filtering 321
aggre

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_18_43_457603
start 327
filtering 327
aggregating 327
checking aggregation: 
            hmdb_ids      y  n_obs
1493   HMDB0008013+K    6.0    139
3744   HMDB0009359+K    0.0      6
5214   HMDB0012089+H  101.0    137
4544  HMDB0010631+Na    0.0      1
2808  HMDB0008654+Na    3.0      6
(9060, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_18_45_533107
Printing histogram!
hist_plots/2020_04_06_19_18_45_708888
joining X 327
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_18_46_126492
splitting 327
X_df_shapes:
(1750, 1826)
(902, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 0}
modeling 327
ml :  XGBoost
train_y: (1750,)
train_w: (1750,)
train_X: (1750, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_19_09_755

filtering 336
aggregating 336
checking aggregation: 
           hmdb_ids    y  n_obs
8327  HMDB0115080+K  5.0     53
871   HMDB0007176+H  0.0      1
1239  HMDB0007926+H  0.0      3
5306  HMDB0013444+H  0.0     50
6359  HMDB0046254+H  0.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_21_11_577112
Printing histogram!
hist_plots/2020_04_06_19_21_11_758930
joining X 336
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_21_12_277500
splitting 336


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4659, 1826)
(1973, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 14, 'X': 1}
modeling 336
ml :  XGBoost
train_y: (4659,)
train_w: (4659,)
train_X: (4659, 1826)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_22_11_765375
start 337
filtering 337
aggregating 337
checking aggregation: 
            hmdb_ids    y  n_obs
1127  HMDB0007874+Na  0.0     18
5241  HMDB0013407+Na  1.0     28
523   HMDB0004844+Na  0.0      3
2777   HMDB0008719+K  0.0      3
2836   HMDB0008763+K  0.0      2
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_22_13_809455
Printing histogram!
hist_plots/2020_04_06_19_22_13_993188
joining X 337
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_22_14_494709
splitting 337


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4249, 1826)
(2173, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 1}
modeling 337
ml :  XGBoost
train_y: (4249,)
train_w: (4249,)
train_X: (4249, 1826)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_23_07_873235
start 338
filtering 338
aggregating 338
checking aggregation: 
            hmdb_ids     y  n_obs
1941  HMDB0008211+Na  11.0     79
3962  HMDB0009822+Na   0.0      2
6912  HMDB0051151+Na   0.0      1
1929  HMDB0008206+Na   0.0      1
6707   HMDB0049688+K   0.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_23_09_903438
Printing histogram!
hist_plots/2020_04_06_19_23_10_082989
joining X 338
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_23_10_603945
splitting 338


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4468, 1826)
(2283, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 5}
modeling 338
ml :  XGBoost
train_y: (4468,)
train_w: (4468,)
train_X: (4468, 1826)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_24_09_665985
start 339
filtering 339
aggregating 339
checking aggregation: 
            hmdb_ids     y  n_obs
4216   HMDB0010436+H   0.0     22
4051  HMDB0009908+Na   0.0      7
4890   HMDB0011506+H  25.0     75
2278  HMDB0008375+Na   1.0     72
4883  HMDB0011498+Na   0.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_24_11_711898
Printing histogram!
hist_plots/2020_04_06_19_24_11_896865
joining X 339
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_24_12_407410
splitting 339


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4384, 1826)
(2111, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 0}
modeling 339
ml :  XGBoost
train_y: (4384,)
train_w: (4384,)
train_X: (4384, 1826)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_25_06_395215
start 340
filtering 340
aggregating 340
checking aggregation: 
            hmdb_ids     y  n_obs
7757   HMDB0112303+H   0.0      1
3721   HMDB0009468+H   1.0      7
8721  HMDB0116765+Na   0.0      1
899    HMDB0007217+K   0.0      9
2177   HMDB0008331+H  71.0     79
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_25_08_390349
Printing histogram!
hist_plots/2020_04_06_19_25_08_562021
joining X 340
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_25_09_073433
splitting 340


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4365, 1826)
(2119, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 3}
modeling 340
ml :  XGBoost
train_y: (4365,)
train_w: (4365,)
train_X: (4365, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_26_07_943325
start 341
filtering 341
aggregating 341
checking aggregation: 
           hmdb_ids     y  n_obs
4943  HMDB0011538+K   0.0     12
3037  HMDB0008952+H   0.0      1
2610  HMDB0008590+H  71.0     79
5823  HMDB0043463+H   0.0      1
4764  HMDB0011372+H   0.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_26_09_955473
Printing histogram!
hist_plots/2020_04_06_19_26_10_135143
joining X 341
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_26_10_648257
splitting 341


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4313, 1826)
(2619, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 1}
modeling 341
ml :  XGBoost
train_y: (4313,)
train_w: (4313,)
train_X: (4313, 1826)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_27_06_524655
start 342
filtering 342
aggregating 342
checking aggregation: 
            hmdb_ids    y  n_obs
3845  HMDB0009586+Na  7.0      7
1224   HMDB0007919+K  7.0     85
7910  HMDB0112550+Na  1.0     17
7900  HMDB0112545+Na  3.0      3
7816  HMDB0112429+Na  1.0      2
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_27_08_553726
Printing histogram!
hist_plots/2020_04_06_19_27_08_735871
joining X 342
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_27_09_248532
splitting 342


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4385, 1826)
(1978, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 1}
modeling 342
ml :  XGBoost
train_y: (4385,)
train_w: (4385,)
train_X: (4385, 1826)
Weighting:  n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_28_07_522144
start 343
filtering 343
aggregating 343
checking aggregation: 
            hmdb_ids    y  n_obs
71     HMDB0000148+H  1.0      2
5383  HMDB0030978+Na  0.0      1
7423  HMDB0064231+Na  0.0      3
5751   HMDB0042771+K  0.0      1
7654  HMDB0110114+Na  0.0      3
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_28_09_551299
Printing histogram!
hist_plots/2020_04_06_19_28_09_729161
joining X 343
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_28_10_241574
splitting 343


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4336, 1826)
(2071, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 1}
modeling 343
ml :  XGBoost
train_y: (4336,)
train_w: (4336,)
train_X: (4336, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_29_08_068062
start 344
filtering 344
aggregating 344
checking aggregation: 
            hmdb_ids    y  n_obs
8010   HMDB0112766+H  1.0      1
5447   HMDB0033166+H  1.0      1
7977   HMDB0112695+K  0.0      1
5756  HMDB0042815+Na  0.0      1
5061  HMDB0012105+Na  0.0     42
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_29_10_157572
Printing histogram!
hist_plots/2020_04_06_19_29_10_344111
joining X 344
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_29_10_688533
splitting 344
X_df_shapes:
(4486, 1024)
(2030, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 113682}
modeling 344
ml :  XGBoost
train_y: (4486,)
train_w: (4486,)
train_X: (4486, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_29_39_547732
start 345
filtering 345
aggregating 345
checking aggregation: 
            hmdb_ids    y  n_obs
4412   HMDB0010679+H  0.0      2
2842  HMDB0008786+Na  0.0      1
2096  HMDB0008293+Na  0.0      4
5802   HMDB0043235+H  0.0      1
1178   HMDB0007895+H  0.0      3
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_29_41_570632
Printing histogram!
hist_plots/2020_04_06_19_29_41_752989
joining X 345
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_29_42_071320
splitting 345
X_df_shapes:
(4747, 1024)
(2190, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 146162}
modeling 345
ml :  XGBoost
train_y: (4747,)
train_w: (4747,)
train_X: (4747, 1024)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_30_12_544979
start 346
filtering 346
aggregating 346
checking aggregation: 
            hmdb_ids     y  n_obs
4606  HMDB0011274+Na  13.0     19
8489  HMDB0115314+Na   0.0      7
804    HMDB0007078+K   0.0      1
7153   HMDB0054132+H   0.0      1
6180   HMDB0045239+H   0.0     21
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_30_14_585960
Printing histogram!
hist_plots/2020_04_06_19_30_14_765883
joining X 346
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_30_15_080094
splitting 346
X_df_shapes:
(4474, 1024)
(2191, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 155293}
modeling 346
ml :  XGBoost
train_y: (4474,)
train_w: (4474,)
train_X: (4474, 1024)
Weighting:  n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_30_44_191231
start 347
filtering 347
aggregating 347
checking aggregation: 
            hmdb_ids    y  n_obs
7545  HMDB0101278+Na  0.0      1
2073  HMDB0008279+Na  1.0     32
6196   HMDB0045347+K  0.0     13
7065   HMDB0053314+H  0.0      1
4711   HMDB0011322+K  1.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_30_46_236409
Printing histogram!
hist_plots/2020_04_06_19_30_46_414251
joining X 347
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_30_46_741293
splitting 347
X_df_shapes:
(4059, 1024)
(2208, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 124551}
modeling 347
ml :  XGBoost
train_y: (4059,)
train_w: (4059,)
train_X: (4059, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_31_12_744809
start 348
filtering 348
aggregating 348
checking aggregation: 
            hmdb_ids    y  n_obs
6123  HMDB0044905+Na  0.0     15
7125   HMDB0053847+H  0.0      1
5563  HMDB0042164+Na  0.0      2
2336   HMDB0008407+K  0.0     67
7687  HMDB0112110+Na  0.0      4
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_31_14_781620
Printing histogram!
hist_plots/2020_04_06_19_31_14_960821
joining X 348
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_31_15_284052
splitting 348
X_df_shapes:
(4287, 1024)
(2136, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 142286}
modeling 348
ml :  XGBoost
train_y: (4287,)
train_w: (4287,)
train_X: (4287, 1024)
Weighting:  10_y_bins_W_n_obs
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_31_43_165740
start 349
filtering 349
aggregating 349
checking aggregation: 
           hmdb_ids     y  n_obs
7450  HMDB0065748+K   0.0      3
415   HMDB0002393+K   0.0      1
4269  HMDB0010460+K   0.0      1
5962  HMDB0044263+H   0.0      1
1322  HMDB0007972+H  14.0    141
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_31_45_266765
Printing histogram!
hist_plots/2020_04_06_19_31_45_445464
joining X 349
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_31_45_766783
splitting 349
X_df_shapes:
(4734, 1024)
(2304, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 14, 'X': 136555}
modeling 349
ml :  XGBoost
train_y: (4734,)
train_w: (4734,)
train_X: (4734, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_32_16_254613
start 350
filtering 350
aggregating 350
checking aggregation: 
            hmdb_ids    y  n_obs
1095   HMDB0007862+H  0.0      1
6037  HMDB0044530+Na  0.0     50
7822   HMDB0112435+H  1.0      1
8347   HMDB0115098+K  1.0     85
8403  HMDB0115201+Na  0.0      4
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_32_18_263867
Printing histogram!
hist_plots/2020_04_06_19_32_18_444513
joining X 350
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_32_18_769188
splitting 350
X_df_shapes:
(4198, 1024)
(1889, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 108690}
modeling 350
ml :  XGBoost
train_y: (4198,)
train_w: (4198,)
train_X: (4198, 1024)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_32_46_132641
start 351
filtering 351
aggregating 351
checking aggregation: 
            hmdb_ids    y  n_obs
5422   HMDB0031786+H  0.0      8
8335  HMDB0115091+Na  0.0     77
7174   HMDB0054520+H  0.0      1
2963   HMDB0008901+K  0.0      5
8059  HMDB0114788+Na  0.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_32_48_187212
Printing histogram!
hist_plots/2020_04_06_19_32_48_367584
joining X 351
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_32_48_684248
splitting 351
X_df_shapes:
(4038, 1024)
(2454, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 16, 'X': 160449}
modeling 351
ml :  XGBoost
train_y: (4038,)
train_w: (4038,)
train_X: (4038, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_33_15_065648
start 352
filtering 352
aggregating 352
checking aggregation: 
            hmdb_ids    y  n_obs
3486   HMDB0009258+H  1.0      5
8020  HMDB0112772+Na  0.0      2
1949  HMDB0008216+Na  2.0      2
658    HMDB0005433+K  0.0     22
4354  HMDB0010603+Na  4.0     53
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_33_17_135442
Printing histogram!
hist_plots/2020_04_06_19_33_17_310273
joining X 352
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_33_17_763484
splitting 352
X_df_shapes:
(2545, 1826)
(1206, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 0}
modeling 352
ml :  XGBoost
train_y: (2545,)
train_w: (2545,)
train_X: (2545, 1826)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_33_50_217726
start 353
filtering 353
aggr

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_36_41_864475
start 358
filtering 358
aggregating 358
checking aggregation: 
            hmdb_ids    y  n_obs
3415  HMDB0009222+Na  1.0      6
2966   HMDB0008904+H  0.0      1
729    HMDB0006293+K  0.0      5
3541   HMDB0009288+H  0.0     12
3650   HMDB0009387+K  8.0     42
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_36_43_873700
Printing histogram!
hist_plots/2020_04_06_19_36_44_050865
joining X 358
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_36_44_489758
splitting 358
X_df_shapes:
(2282, 1826)
(1513, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 14, 'X': 0}
modeling 358
ml :  XGBoost
train_y: (2282,)
train_w: (2282,)
train_X: (2282, 1826)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_37_14_865724
start 359
filtering 359
aggregating 359
checking aggregation: 
            hmdb_ids    y  n_obs
3414   HMDB0009222+K  0.0     10
6814  HMDB0050389+Na  0.0      1
5863   HMDB0043910+H  0.0     22
6234   HMDB0045558+K  2.0      8
2889  HMDB0008838+Na  0.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_37_16_919632
Printing histogram!
hist_plots/2020_04_06_19_37_17_097357
joining X 359
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_37_17_531282
splitting 359
X_df_shapes:
(2250, 1826)
(1227, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 0}
modeling 359
ml :  XGBoost
train_y: (2250,)
train_w: (2250,)
train_X: (2250, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_37_47_504662
start 360
filtering 360
aggregating 360
checking aggregation: 
            hmdb_ids    y  n_obs
3884   HMDB0009651+H  0.0      5
3709   HMDB0009461+H  2.0      6
7189   HMDB0054876+K  0.0      1
3410   HMDB0009221+H  0.0     41
3118  HMDB0009000+Na  1.0     19
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_37_49_571897
Printing histogram!
hist_plots/2020_04_06_19_37_49_745715
joining X 360
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_37_50_034161
splitting 360
X_df_shapes:
(1817, 1024)
(1558, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 18, 'X': 56706}
modeling 360
ml :  XGBoost
train_y: (1817,)
train_w: (1817,)
train_X: (1817, 1024)
Weighting:  False
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_38_02_228136
start 361
filtering 361


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_38_19_837743
start 362
filtering 362
aggregating 362
checking aggregation: 
            hmdb_ids    y  n_obs
2269   HMDB0008371+K  0.0     39
5142  HMDB0012402+Na  0.0      1
5218   HMDB0013336+H  7.0      8
7867   HMDB0112515+K  0.0     21
4711   HMDB0011322+K  1.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_38_21_833441
Printing histogram!
hist_plots/2020_04_06_19_38_22_011574
joining X 362
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_38_22_285486
splitting 362
X_df_shapes:
(2683, 1024)
(1371, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 87993}
modeling 362
ml :  XGBoost
train_y: (2683,)
train_w: (2683,)
train_X: (2683, 1024)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_38_40_103163
start 363
filtering 363
aggregating 363
checking aggregation: 
            hmdb_ids     y  n_obs
3735   HMDB0009483+K  12.0     50
6672   HMDB0049393+H   0.0     21
3643   HMDB0009384+H   0.0      1
5090   HMDB0012343+H   1.0      1
8701  HMDB0116586+Na   0.0      2
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_38_42_122250
Printing histogram!
hist_plots/2020_04_06_19_38_42_298224
joining X 363
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_38_42_570087
splitting 363
X_df_shapes:
(2310, 1024)
(1276, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 67326}
modeling 363
ml :  XGBoost
train_y: (2310,)
train_w: (2310,)
train_X: (2310, 1024)
Weighting:  isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_38_57_419855
start 364
filteri

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_39_15_204826
start 365
filtering 365
aggregating 365
checking aggregation: 
            hmdb_ids    y  n_obs
998    HMDB0007418+K  0.0      8
1589   HMDB0008072+K  4.0     24
8582  HMDB0115513+Na  0.0     10
3912   HMDB0009695+H  1.0      2
6002   HMDB0044346+K  0.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_39_17_235036
Printing histogram!
hist_plots/2020_04_06_19_39_17_412682
joining X 365
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_39_17_691115
splitting 365
X_df_shapes:
(2561, 1024)
(1056, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 51221}
modeling 365
ml :  XGBoost
train_y: (2561,)
train_w: (2561,)
train_X: (2561, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_39_34_160814
start 366
f

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_40_56_389826
start 370
filtering 370
aggregating 370
checking aggregation: 
            hmdb_ids    y  n_obs
6684   HMDB0049428+K  0.0      1
5516   HMDB0038291+K  0.0      1
52     HMDB0000089+K  0.0      1
3176  HMDB0009050+Na  0.0     18
297    HMDB0001072+K  0.0      2
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_40_58_381300
Printing histogram!
hist_plots/2020_04_06_19_40_58_554314
joining X 370
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_40_58_962149
splitting 370
X_df_shapes:
(1860, 1826)
(743, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 6, 'X': 1}
modeling 370
ml :  XGBoost
train_y: (1860,)
train_w: (1860,)
train_X: (1860, 1826)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_41_22_840798
start 371
filtering 371
aggregating 371
checking aggregation: 
            hmdb_ids    y  n_obs
8603  HMDB0115523+Na  0.0      7
8116   HMDB0114867+H  0.0     20
409    HMDB0002354+H  0.0      1
5735   HMDB0042701+H  0.0     22
8704  HMDB0116607+Na  0.0      6
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_41_24_908789
Printing histogram!
hist_plots/2020_04_06_19_41_25_082998
joining X 371
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_41_25_493046
splitting 371
X_df_shapes:
(1752, 1826)
(752, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 0}
modeling 371
ml :  XGBoost
train_y: (1752,)
train_w: (1752,)
train_X: (1752, 1826)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_41_45_258403
start 372
filtering 372
aggregating 372
checking aggregation: 
           hmdb_ids      y  n_obs
2706  HMDB0008652+K    0.0     16
1727  HMDB0008122+H   29.0     42
4079  HMDB0010169+H  109.0    147
3772  HMDB0009514+K    0.0      4
8065  HMDB0114805+H    0.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_41_47_289794
Printing histogram!
hist_plots/2020_04_06_19_41_47_464761
joining X 372
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_41_47_879610
splitting 372
X_df_shapes:
(1504, 1826)
(747, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 0}
modeling 372
ml :  XGBoost
train_y: (1504,)
train_w: (1504,)
train_X: (1504, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_42_07_674596
start 373
filtering 373
aggregating 373
checking aggregation: 
           hmdb_ids    y  n_obs
1328  HMDB0007974+H  1.0     76
3749  HMDB0009488+K  0.0      4
5015  HMDB0011733+K  0.0      1
4468  HMDB0011208+K  2.0     21
1305  HMDB0007965+H  0.0     43
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_42_09_688072
Printing histogram!
hist_plots/2020_04_06_19_42_09_861629
joining X 373
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_42_10_275198
splitting 373
X_df_shapes:
(1445, 1826)
(1131, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 4, 'X': 0}
modeling 373
ml :  XGBoost
train_y: (1445,)
train_w: (1445,)
train_X: (1445, 1826)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_42_27_833217
start 374
filtering 374
aggregating 374
checking aggregation: 
            hmdb_ids    y  n_obs
7254  HMDB0059633+Na  1.0      1
8563  HMDB0115504+Na  0.0     25
4634   HMDB0011286+H  1.0     79
6804   HMDB0050313+K  0.0     13
2326   HMDB0008403+K  0.0     39
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_42_29_810608
Printing histogram!
hist_plots/2020_04_06_19_42_29_983085
joining X 374
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_42_30_397672
splitting 374
X_df_shapes:
(1652, 1826)
(891, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 0}
modeling 374
ml :  XGBoost
train_y: (1652,)
train_w: (1652,)
train_X: (1652, 1826)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_42_52_002717
start 375
filtering 375
aggregating 375
checking aggregation: 
            hmdb_ids    y  n_obs
6912  HMDB0051151+Na  0.0      1
1027   HMDB0007504+K  0.0      5
2550  HMDB0008535+Na  2.0      2
4677   HMDB0011308+K  0.0      2
2954   HMDB0008896+H  0.0      2
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_42_54_019668
Printing histogram!
hist_plots/2020_04_06_19_42_54_194308
joining X 375
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_42_54_602754
splitting 375
X_df_shapes:
(1305, 1826)
(1042, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 11, 'X': 0}
modeling 375
ml :  XGBoost
train_y: (1305,)
train_w: (1305,)
train_X: (1305, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_43_11_966466
start 376
filtering 376
aggregating 376
checking aggregation: 
            hmdb_ids    y  n_obs
6832  HMDB0050521+Na  0.0      1
6565   HMDB0048637+K  0.0     59
5149  HMDB0012412+Na  0.0      1
3687   HMDB0009420+K  8.0     42
2350   HMDB0008414+H  0.0      3
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_43_13_985974
Printing histogram!
hist_plots/2020_04_06_19_43_14_164869
joining X 376
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_43_14_438418
splitting 376
X_df_shapes:
(1860, 1024)
(720, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 6, 'X': 36360}
modeling 376
ml :  XGBoost
train_y: (1860,)
train_w: (1860,)
train_X: (1860, 1024)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_43_26_527132
start 377
filtering 377
aggregating 377
checking aggregation: 
            hmdb_ids    y  n_obs
355    HMDB0001480+H  1.0      3
6502  HMDB0047915+Na  0.0     38
8469   HMDB0115299+K  1.0     85
6881   HMDB0050858+H  0.0      1
2511   HMDB0008512+H  0.0      3
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_43_28_585708
Printing histogram!
hist_plots/2020_04_06_19_43_28_759416
joining X 377
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_43_29_027791
splitting 377
X_df_shapes:
(1442, 1024)
(1031, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 41229}
modeling 377
ml :  XGBoost
train_y: (1442,)
train_w: (1442,)
train_X: (1442, 1024)
Weighting:  10_y_bins
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_43_38_580972
start 378
filtering 

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_43_52_201682
start 379
filtering 379
aggregating 379
checking aggregation: 
            hmdb_ids    y  n_obs
6024   HMDB0044482+H  0.0      2
6604   HMDB0049040+H  0.0      1
5550  HMDB0042068+Na  0.0      2
6512  HMDB0048026+Na  0.0     38
8558  HMDB0115502+Na  0.0      5
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_43_54_261338
Printing histogram!
hist_plots/2020_04_06_19_43_54_433242
joining X 379
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_43_54_698124
splitting 379
X_df_shapes:
(1902, 1024)
(951, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 5, 'X': 66902}
modeling 379
ml :  XGBoost
train_y: (1902,)
train_w: (1902,)
train_X: (1902, 1024)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_44_06_744494
start 380
filtering 380
aggregating 380
checking aggregation: 
           hmdb_ids    y  n_obs
1613  HMDB0008081+K  4.0     95
8019  HMDB0112772+K  0.0      7
4419  HMDB0010724+K  1.0      1
3852  HMDB0009589+K  1.0      5
5262  HMDB0013418+H  0.0     73
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_44_08_758172
Printing histogram!
hist_plots/2020_04_06_19_44_08_932822
joining X 380
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_44_09_200883
splitting 380
X_df_shapes:
(1311, 1024)
(1042, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 8, 'X': 37515}
modeling 380
ml :  XGBoost
train_y: (1311,)
train_w: (1311,)
train_X: (1311, 1024)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_44_18_203900
start 381
filtering 381
aggregating 381
checking aggregation: 
            hmdb_ids    y  n_obs
6400  HMDB0046659+Na  0.0     15
2201   HMDB0008339+H  5.0     94
138    HMDB0000387+K  1.0      1
611    HMDB0005383+K  1.0     14
7985  HMDB0112711+Na  3.0      3
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_44_20_191765
Printing histogram!
hist_plots/2020_04_06_19_44_20_365488
joining X 381
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_44_20_637402
splitting 381
X_df_shapes:
(1513, 1024)
(884, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 6, 'X': 36217}
modeling 381
ml :  XGBoost
train_y: (1513,)
train_w: (1513,)
train_X: (1513, 1024)
Weighting:  10_y_bins_W_isobar
Printing regression actual versus predicted output!
model_plots/2020_04_06_19_44_30_451845
start 382
fil

  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_44_49_206329
start 383
filtering 383
aggregating 383
checking aggregation: 
            hmdb_ids    y  n_obs
5523   HMDB0039229+H  0.0      2
5584   HMDB0042207+K  0.0      1
2565  HMDB0008547+Na  0.0      1
5662  HMDB0042517+Na  0.0      2
382   HMDB0002043+Na  1.0      1
(8750, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_44_51_196985
Printing histogram!
hist_plots/2020_04_06_19_44_51_371846
joining X 383
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_44_51_645282
splitting 383
X_df_shapes:
(1858, 1024)
(724, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 34458}
modeling 383
ml :  XGBoost
train_y: (1858,)
train_w: (1858,)
train_X: (1858, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_45_03_776878
start 384
filtering 384
aggregating 384
checking aggregation: 
            hmdb_ids     y  n_obs
7242   HMDB0062242+K   0.0      1
8399   HMDB0115331+H   0.0     10
7043  HMDB0053839+Na   0.0      1
2506   HMDB0008536+H  14.0     19
1591  HMDB0008083+Na   6.0     74
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_45_05_756600
Printing histogram!
hist_plots/2020_04_06_19_45_05_932479
joining X 384
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_45_06_446119
splitting 384


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4369, 1826)
(2215, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 11, 'X': 2}
modeling 384
ml :  XGBoost
train_y: (4369,)
train_w: (4369,)
train_X: (4369, 1826)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_45_59_318227
start 385
filtering 385
aggregating 385
checking aggregation: 
            hmdb_ids    y  n_obs
6688  HMDB0049940+Na  0.0      2
8153   HMDB0115007+H  0.0     18
8064  HMDB0114929+Na  0.0     10
207    HMDB0000715+H  4.0      4
5923  HMDB0044362+Na  0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_46_01_365373
Printing histogram!
hist_plots/2020_04_06_19_46_01_544999
joining X 385
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_46_02_047832
splitting 385


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4468, 1826)
(2040, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 3}
modeling 385
ml :  XGBoost
train_y: (4468,)
train_w: (4468,)
train_X: (4468, 1826)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_46_56_752831
start 386
filtering 386
aggregating 386
checking aggregation: 
            hmdb_ids    y  n_obs
4295   HMDB0010595+H  0.0      1
3631  HMDB0009405+Na  1.0      1
5327  HMDB0031043+Na  1.0      1
5101   HMDB0012426+K  0.0      7
8608   HMDB0116754+H  0.0      3
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_46_58_741983
Printing histogram!
hist_plots/2020_04_06_19_46_58_924007
joining X 386
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_46_59_422727
splitting 386


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4278, 1826)
(2226, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 2}
modeling 386
ml :  XGBoost
train_y: (4278,)
train_w: (4278,)
train_X: (4278, 1826)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_47_52_972793
start 387
filtering 387
aggregating 387
checking aggregation: 
            hmdb_ids    y  n_obs
989   HMDB0007475+Na  0.0      4
2986  HMDB0008946+Na  3.0     15
4328  HMDB0010635+Na  0.0      1
2317  HMDB0008422+Na  0.0     27
537    HMDB0005060+H  0.0     10
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_47_54_967674
Printing histogram!
hist_plots/2020_04_06_19_47_55_144257
joining X 387
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_47_55_643767
splitting 387


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4125, 1826)
(2518, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 17, 'X': 3}
modeling 387
ml :  XGBoost
train_y: (4125,)
train_w: (4125,)
train_X: (4125, 1826)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_48_46_885393
start 388
filtering 388
aggregating 388
checking aggregation: 
            hmdb_ids    y  n_obs
6163  HMDB0045667+Na  0.0     49
2957   HMDB0008933+K  0.0      4
3904   HMDB0009816+H  0.0      2
7270   HMDB0062437+H  0.0      2
6989   HMDB0053367+K  0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_48_48_885098
Printing histogram!
hist_plots/2020_04_06_19_48_49_064620
joining X 388
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_48_49_569283
splitting 388


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4085, 1826)
(2525, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 0}
modeling 388
ml :  XGBoost
train_y: (4085,)
train_w: (4085,)
train_X: (4085, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_49_41_909861
start 389
filtering 389
aggregating 389
checking aggregation: 
            hmdb_ids    y  n_obs
5306   HMDB0029815+K  0.0      1
4109  HMDB0010407+Na  0.0      9
2029  HMDB0008275+Na  0.0      7
6144   HMDB0045532+K  0.0     55
4563   HMDB0011279+K  0.0      8
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_49_44_004515
Printing histogram!
hist_plots/2020_04_06_19_49_44_183722
joining X 389
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_49_44_680095
splitting 389


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(3953, 1826)
(2101, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 0}
modeling 389
ml :  XGBoost
train_y: (3953,)
train_w: (3953,)
train_X: (3953, 1826)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_50_31_349452
start 390
filtering 390
aggregating 390
checking aggregation: 
            hmdb_ids     y  n_obs
6622  HMDB0049632+Na   0.0      1
3545  HMDB0009349+Na   0.0      1
1928   HMDB0008225+K   0.0      1
7402  HMDB0068377+Na   0.0      5
1508  HMDB0008050+Na  29.0     73
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_50_33_331334
Printing histogram!
hist_plots/2020_04_06_19_50_33_515646
joining X 390
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_50_34_012379
splitting 390


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4642, 1826)
(2018, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 11, 'X': 2}
modeling 390
ml :  XGBoost
train_y: (4642,)
train_w: (4642,)
train_X: (4642, 1826)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_51_29_952456
start 391
filtering 391
aggregating 391
checking aggregation: 
            hmdb_ids    y  n_obs
1462  HMDB0008034+Na  8.0     31
1073   HMDB0007863+K  0.0     44
7481  HMDB0105588+Na  0.0      2
1879   HMDB0008200+K  0.0      1
2304   HMDB0008410+K  0.0      5
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_51_32_011086
Printing histogram!
hist_plots/2020_04_06_19_51_32_189168
joining X 391
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_51_32_688543
splitting 391


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


X_df_shapes:
(4772, 1826)
(1947, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 11, 'X': 0}
modeling 391
ml :  XGBoost
train_y: (4772,)
train_w: (4772,)
train_X: (4772, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_52_30_884438
start 392
filtering 392
aggregating 392
checking aggregation: 
            hmdb_ids    y  n_obs
6248   HMDB0046104+H  0.0     12
7936   HMDB0112793+H  1.0      1
4261  HMDB0010565+Na  1.0    144
1595   HMDB0008085+K  0.0      1
2927  HMDB0008910+Na  2.0      7
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_52_32_881896
Printing histogram!
hist_plots/2020_04_06_19_52_33_058805
joining X 392
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_52_33_382582
splitting 392
X_df_shapes:
(4154, 1024)
(2031, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 8, 'X': 115651}
modeling 392
ml :  XGBoost
train_y: (4154,)
train_w: (4154,)
train_X: (4154, 1024)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_52_59_724586
start 393
filtering 393
aggregating 393
checking aggregation: 
            hmdb_ids    y  n_obs
8392  HMDB0115326+Na  6.0     34
6624  HMDB0049683+Na  0.0      2
7562  HMDB0110114+Na  0.0      3
3007  HMDB0008963+Na  0.0      1
7085  HMDB0054281+Na  0.0      7
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_53_01_742324
Printing histogram!
hist_plots/2020_04_06_19_53_01_920245
joining X 393
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_53_02_233441
splitting 393
X_df_shapes:
(4480, 1024)
(2279, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 15, 'X': 162005}
modeling 393
ml :  XGBoost
train_y: (4480,)
train_w: (4480,)
train_X: (4480, 1024)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_53_30_645128
start 394
filtering 394
aggregating 394
checking aggregation: 
            hmdb_ids    y  n_obs
3139   HMDB0009055+K  0.0      1
387    HMDB0002314+H  0.0      1
5699  HMDB0042960+Na  0.0      3
5563   HMDB0042436+K  0.0     13
7392  HMDB0067687+Na  0.0      7
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_53_32_700183
Printing histogram!
hist_plots/2020_04_06_19_53_32_881706
joining X 394
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_53_33_198478
splitting 394
X_df_shapes:
(4100, 1024)
(2543, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 11, 'X': 177517}
modeling 394
ml :  XGBoost
train_y: (4100,)
train_w: (4100,)
train_X: (4100, 1024)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_53_59_449413
start 395
filtering 395
aggregating 395
checking aggregation: 
            hmdb_ids    y  n_obs
5816   HMDB0044071+K  0.0     34
6585  HMDB0049288+Na  0.0     49
3144   HMDB0009057+H  0.0     41
512   HMDB0004886+Na  0.0      1
6463   HMDB0048496+H  0.0     21
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_54_01_481997
Printing histogram!
hist_plots/2020_04_06_19_54_01_661515
joining X 395
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_54_01_975065
splitting 395
X_df_shapes:
(4042, 1024)
(1846, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 8, 'X': 113249}
modeling 395
ml :  XGBoost
train_y: (4042,)
train_w: (4042,)
train_X: (4042, 1024)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_54_27_469592
start 396
filtering 396
aggregating 396
checking aggregation: 
            hmdb_ids    y  n_obs
991   HMDB0007476+Na  3.0     11
8308  HMDB0115209+Na  0.0     56
5576  HMDB0042480+Na  0.0      1
4022  HMDB0010167+Na  0.0      2
3101   HMDB0009018+K  0.0      4
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_54_29_457316
Printing histogram!
hist_plots/2020_04_06_19_54_29_633879
joining X 396
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_54_29_952219
splitting 396
X_df_shapes:
(4240, 1024)
(1977, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 98099}
modeling 396
ml :  XGBoost
train_y: (4240,)
train_w: (4240,)
train_X: (4240, 1024)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_54_56_849278
start 397
filtering 397
aggregating 397
checking aggregation: 
            hmdb_ids     y  n_obs
5131   HMDB0013166+H   1.0      1
1835   HMDB0008177+H   8.0     29
851   HMDB0007186+Na   2.0      6
2682  HMDB0008661+Na   1.0     72
2577  HMDB0008594+Na  14.0     42
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_54_58_854829
Printing histogram!
hist_plots/2020_04_06_19_54_59_030771
joining X 397
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_54_59_341495
splitting 397
X_df_shapes:
(4471, 1024)
(2079, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 10, 'X': 139341}
modeling 397
ml :  XGBoost
train_y: (4471,)
train_w: (4471,)
train_X: (4471, 1024)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_55_28_071110
start 398
filtering 398
aggregating 398
checking aggregation: 
            hmdb_ids    y  n_obs
2201  HMDB0008360+Na  0.0     13
3567  HMDB0009359+Na  3.0     15
1490  HMDB0008044+Na  0.0      6
1733   HMDB0008135+K  4.0     79
1206  HMDB0007922+Na  7.0     13
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_55_30_070655
Printing histogram!
hist_plots/2020_04_06_19_55_30_246876
joining X 398
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_55_30_562186
splitting 398
X_df_shapes:
(3800, 1024)
(2718, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 180278}
modeling 398
ml :  XGBoost
train_y: (3800,)
train_w: (3800,)
train_X: (3800, 1024)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_55_55_344270
start 399
filtering 399
aggregating 399
checking aggregation: 
            hmdb_ids    y  n_obs
7618   HMDB0112142+H  0.0      1
5800  HMDB0043949+Na  0.0      2
8543   HMDB0115557+H  0.0      9
7056   HMDB0053942+H  0.0      1
4801   HMDB0011486+H  0.0      5
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_55_57_364641
Printing histogram!
hist_plots/2020_04_06_19_55_57_545684
joining X 399
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_55_57_859059
splitting 399
X_df_shapes:
(4213, 1024)
(2327, 1024)


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 10, 'X': 140031}
modeling 399
ml :  XGBoost
train_y: (4213,)
train_w: (4213,)
train_X: (4213, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_56_25_161192
start 400
filtering 400
aggregating 400
checking aggregation: 
            hmdb_ids    y  n_obs
3189  HMDB0009075+Na  6.0      6
7764   HMDB0112504+H  1.0      1
14    HMDB0000026+Na  0.0      1
7085  HMDB0054281+Na  0.0      7
5326   HMDB0031014+K  0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_56_27_155496
Printing histogram!
hist_plots/2020_04_06_19_56_27_333114
joining X 400
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_56_27_801906
splitting 400
X_df_shapes:
(2722, 1826)
(1050, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 14, 'X': 0}
modeling 400
ml :  XGBoost
train_y: (2722,)
train_w: (2722,)
train_X: (2722, 1826)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_56_59_444359
start 401
filtering 401
aggregating 401
checking aggregation: 
            hmdb_ids     y  n_obs
3442  HMDB0009259+Na  32.0     32
5456  HMDB0040266+Na   0.0      1
4535   HMDB0011268+K   0.0      3
3026   HMDB0008976+H   0.0      4
420   HMDB0002931+Na   2.0      2
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_57_01_467202
Printing histogram!
hist_plots/2020_04_06_19_57_01_645314
joining X 401
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_57_02_097191
splitting 401
X_df_shapes:
(2146, 1826)
(1238, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 8, 'X': 1}
modeling 401
ml :  XGBoost
train_y: (2146,)
train_w: (2146,)
train_X: (2146, 1826)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_57_29_710835
start 402
filtering 402
aggregating 402
checking aggregation: 
            hmdb_ids    y  n_obs
3882  HMDB0009782+Na  0.0      2
2908   HMDB0008899+H  0.0    159
5192   HMDB0013415+H  0.0     68
2901  HMDB0008892+Na  0.0     52
3959  HMDB0009873+Na  0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_57_31_701826
Printing histogram!
hist_plots/2020_04_06_19_57_31_880761
joining X 402
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_57_32_337721
splitting 402
X_df_shapes:
(2824, 1826)
(1010, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 0}
modeling 402
ml :  XGBoost
train_y: (2824,)
train_w: (2824,)
train_X: (2824, 1826)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_58_05_668087
start 403
filtering 403
aggregating 403
checking aggregation: 
            hmdb_ids    y  n_obs
8477   HMDB0115514+H  0.0      1
3547  HMDB0009350+Na  0.0      1
6303   HMDB0046459+K  0.0     13
5797   HMDB0043937+K  0.0     60
1531  HMDB0008061+Na  1.0      5
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_58_07_669394
Printing histogram!
hist_plots/2020_04_06_19_58_07_846412
joining X 403
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_58_08_297492
splitting 403
X_df_shapes:
(2588, 1826)
(1040, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 12, 'X': 1}
modeling 403
ml :  XGBoost
train_y: (2588,)
train_w: (2588,)
train_X: (2588, 1826)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_58_37_599516
start 404
filtering 404
aggregating 404
checking aggregation: 
            hmdb_ids    y  n_obs
6983  HMDB0053270+Na  0.0     15
968   HMDB0007414+Na  2.0      6
2691   HMDB0008685+H  0.0     78
6169  HMDB0045673+Na  0.0     49
4735  HMDB0011390+Na  0.0      3
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_58_39_602680
Printing histogram!
hist_plots/2020_04_06_19_58_39_780087
joining X 404
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_58_40_226929
splitting 404
X_df_shapes:
(2377, 1826)
(963, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 0}
modeling 404
ml :  XGBoost
train_y: (2377,)
train_w: (2377,)
train_X: (2377, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_59_11_169599
start 405
filtering 405
aggregating 405
checking aggregation: 
            hmdb_ids    y  n_obs
2627  HMDB0008628+Na  1.0     32
2678   HMDB0008660+K  0.0     39
4381  HMDB0011131+Na  0.0      2
6108   HMDB0045321+H  0.0     12
2661   HMDB0008653+K  0.0      2
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_59_13_214871
Printing histogram!
hist_plots/2020_04_06_19_59_13_393888
joining X 405
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_59_13_849190
splitting 405
X_df_shapes:
(2431, 1826)
(1375, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 0}
modeling 405
ml :  XGBoost
train_y: (2431,)
train_w: (2431,)
train_X: (2431, 1826)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_19_59_42_541334
start 406
filtering 406
aggregating 406
checking aggregation: 
            hmdb_ids    y  n_obs
4465  HMDB0011234+Na  0.0     11
4579  HMDB0011286+Na  0.0     19
7215  HMDB0061691+Na  0.0      1
6539   HMDB0049117+K  0.0     31
4138   HMDB0010424+K  0.0      4
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_19_59_44_564165
Printing histogram!
hist_plots/2020_04_06_19_59_44_754696
joining X 406
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_19_59_45_210290
splitting 406
X_df_shapes:
(2447, 1826)
(1467, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 11, 'X': 1}
modeling 406
ml :  XGBoost
train_y: (2447,)
train_w: (2447,)
train_X: (2447, 1826)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_00_15_990771
start 407
filtering 407
aggregating 407
checking aggregation: 
            hmdb_ids     y  n_obs
6915   HMDB0052530+H   0.0      1
4690   HMDB0011352+H   0.0     17
8189  HMDB0115047+Na   0.0     68
2411   HMDB0008471+H  14.0     19
7999  HMDB0114845+Na   1.0     48
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_00_18_005593
Printing histogram!
hist_plots/2020_04_06_20_00_18_184295
joining X 407
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_00_18_627735
splitting 407
X_df_shapes:
(2332, 1826)
(1348, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 0}
modeling 407
ml :  XGBoost
train_y: (2332,)
train_w: (2332,)
train_X: (2332, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_00_48_334864
start 408
filtering 408
aggregating 408
checking aggregation: 
            hmdb_ids    y  n_obs
180    HMDB0000573+K  0.0      1
3567  HMDB0009359+Na  3.0     15
3486   HMDB0009285+K  0.0      5
5781   HMDB0043909+H  0.0      1
4264  HMDB0010566+Na  0.0     21
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_00_50_349384
Printing histogram!
hist_plots/2020_04_06_20_00_50_526054
joining X 408
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_00_50_823678
splitting 408
X_df_shapes:
(2389, 1024)
(1179, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 58672}
modeling 408
ml :  XGBoost
train_y: (2389,)
train_w: (2389,)
train_X: (2389, 1024)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_01_06_120443
start 409
filtering 409
aggregating 409
checking aggregation: 
            hmdb_ids    y  n_obs
7705   HMDB0112382+H  1.0      1
870    HMDB0007215+K  0.0      8
1932  HMDB0008226+Na  0.0     27
3261  HMDB0009130+Na  2.0      2
4228   HMDB0010481+K  0.0     11
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_01_08_102296
Printing histogram!
hist_plots/2020_04_06_20_01_08_279843
joining X 409
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_01_08_554064
splitting 409
X_df_shapes:
(2763, 1024)
(1141, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 78966}
modeling 409
ml :  XGBoost
train_y: (2763,)
train_w: (2763,)
train_X: (2763, 1024)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_01_26_372373
start 410
filtering 410
aggregating 410
checking aggregation: 
            hmdb_ids    y  n_obs
2715   HMDB0008694+K  0.0     67
6845  HMDB0051304+Na  0.0      1
7825  HMDB0112558+Na  0.0      4
8253  HMDB0115101+Na  6.0     34
2774  HMDB0008752+Na  0.0      6
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_01_28_363993
Printing histogram!
hist_plots/2020_04_06_20_01_28_541433
joining X 410
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_01_28_820902
splitting 410
X_df_shapes:
(2664, 1024)
(1062, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 49265}
modeling 410
ml :  XGBoost
train_y: (2664,)
train_w: (2664,)
train_X: (2664, 1024)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_01_45_737570
start 411
filtering 411
aggregating 411
checking aggregation: 
            hmdb_ids    y  n_obs
1155  HMDB0007896+Na  1.0     12
6420   HMDB0047915+K  0.0     34
199    HMDB0000673+K  0.0      1
7763   HMDB0112498+H  0.0      1
3763  HMDB0009557+Na  6.0      6
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_01_47_717961
Printing histogram!
hist_plots/2020_04_06_20_01_47_897600
joining X 411
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_01_48_169238
splitting 411
X_df_shapes:
(2462, 1024)
(1308, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 8, 'X': 77511}
modeling 411
ml :  XGBoost
train_y: (2462,)
train_w: (2462,)
train_X: (2462, 1024)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_02_03_887036
start 412
filtering 412
aggregating 412
checking aggregation: 
            hmdb_ids    y  n_obs
2017  HMDB0008270+Na  4.0     12
4498   HMDB0011247+K  0.0      8
2477   HMDB0008522+H  0.0     78
8618  HMDB0116792+Na  0.0      1
5383   HMDB0033780+K  0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_02_05_926564
Printing histogram!
hist_plots/2020_04_06_20_02_06_106373
joining X 412
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_02_06_380984
splitting 412
X_df_shapes:
(2254, 1024)
(1447, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 81167}
modeling 412
ml :  XGBoost
train_y: (2254,)
train_w: (2254,)
train_X: (2254, 1024)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_02_21_089301
start 413
filtering 413
aggregating 413
checking aggregation: 
            hmdb_ids    y  n_obs
2758   HMDB0008734+H  0.0      2
6047  HMDB0044923+Na  0.0      1
997    HMDB0007494+K  0.0      1
8538   HMDB0115550+H  0.0      5
703    HMDB0006270+K  0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_02_23_069685
Printing histogram!
hist_plots/2020_04_06_20_02_23_249745
joining X 413
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_02_23_523014
splitting 413
X_df_shapes:
(2486, 1024)
(1237, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 13, 'X': 67149}
modeling 413
ml :  XGBoost
train_y: (2486,)
train_w: (2486,)
train_X: (2486, 1024)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_02_39_749569
start 414
filtering 414
aggregating 414
checking aggregation: 
            hmdb_ids     y  n_obs
1227   HMDB0007935+K   0.0      1
5939   HMDB0044464+H   0.0      1
33     HMDB0000061+H  13.0     19
3420   HMDB0009252+H   0.0    107
5774  HMDB0043895+Na   0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_02_41_779769
Printing histogram!
hist_plots/2020_04_06_20_02_41_960686
joining X 414
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_02_42_239087
splitting 414
X_df_shapes:
(2301, 1024)
(1313, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 11, 'X': 69058}
modeling 414
ml :  XGBoost
train_y: (2301,)
train_w: (2301,)
train_X: (2301, 1024)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_02_57_459195
start 415
filtering 415
aggregating 415
checking aggregation: 
            hmdb_ids    y  n_obs
4139  HMDB0010424+Na  0.0     17
7252  HMDB0062313+Na  0.0      4
8314   HMDB0115214+H  0.0      1
1189   HMDB0007914+K  0.0      1
7594   HMDB0112110+K  0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_02_59_451759
Printing histogram!
hist_plots/2020_04_06_20_02_59_633110
joining X 415
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_02_59_924678
splitting 415
X_df_shapes:
(2561, 1024)
(1105, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 46379}
modeling 415
ml :  XGBoost
train_y: (2561,)
train_w: (2561,)
train_X: (2561, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_03_16_661528
start 416
filtering 416
aggregating 416
checking aggregation: 
            hmdb_ids    y  n_obs
1982   HMDB0008255+H  0.0      1
658   HMDB0005445+Na  0.0      1
1932  HMDB0008226+Na  0.0     27
5415  HMDB0036344+Na  1.0      1
323    HMDB0001348+K  0.0    116
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_03_18_649282
Printing histogram!
hist_plots/2020_04_06_20_03_18_828952
joining X 416
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_03_19_256255
splitting 416
X_df_shapes:
(1682, 1826)
(763, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 0}
modeling 416
ml :  XGBoost
train_y: (1682,)
train_w: (1682,)
train_X: (1682, 1826)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_03_38_651768
start 417
filtering 417
aggregating 417
checking aggregation: 
            hmdb_ids     y  n_obs
5803   HMDB0043962+K   0.0     34
1318   HMDB0007980+H  32.0     49
2824  HMDB0008826+Na   0.0      6
6316   HMDB0046634+K   0.0      3
6461   HMDB0048488+K   0.0     34
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_03_40_719343
Printing histogram!
hist_plots/2020_04_06_20_03_40_890884
joining X 417
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_03_41_297437
splitting 417
X_df_shapes:
(1476, 1826)
(936, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 8, 'X': 1}
modeling 417
ml :  XGBoost
train_y: (1476,)
train_w: (1476,)
train_X: (1476, 1826)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_03_58_888384
start 418
filtering 418
aggregating 418
checking aggregation: 
            hmdb_ids    y  n_obs
3392  HMDB0009233+Na  1.0      4
530    HMDB0005045+H  1.0      1
3035   HMDB0008988+K  0.0      8
105    HMDB0000251+K  0.0      1
1820   HMDB0008170+K  0.0    133
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_04_00_862778
Printing histogram!
hist_plots/2020_04_06_20_04_01_039774
joining X 418
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_04_01_452089
splitting 418
X_df_shapes:
(1563, 1826)
(739, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 5, 'X': 1}
modeling 418
ml :  XGBoost
train_y: (1563,)
train_w: (1563,)
train_X: (1563, 1826)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_04_20_774955
start 419
filtering 419
aggregating 419
checking aggregation: 
            hmdb_ids    y  n_obs
6668   HMDB0049849+H  0.0     12
1810   HMDB0008167+H  0.0     11
6030  HMDB0044888+Na  0.0     40
6061   HMDB0044997+H  0.0      2
8365   HMDB0115299+K  0.0     84
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_04_22_739488
Printing histogram!
hist_plots/2020_04_06_20_04_22_912665
joining X 419
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_04_23_318926
splitting 419
X_df_shapes:
(1521, 1826)
(744, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 5, 'X': 0}
modeling 419
ml :  XGBoost
train_y: (1521,)
train_w: (1521,)
train_X: (1521, 1826)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_04_41_417020
start 420
filtering 420
aggregating 420
checking aggregation: 
            hmdb_ids    y  n_obs
7318   HMDB0063486+K  0.0     15
893    HMDB0007246+K  0.0      8
780   HMDB0007092+Na  0.0      2
11    HMDB0000020+Na  0.0      2
3225  HMDB0009098+Na  2.0      2
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_04_43_423129
Printing histogram!
hist_plots/2020_04_06_20_04_43_599435
joining X 420
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_04_44_007733
splitting 420
X_df_shapes:
(1779, 1826)
(891, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 4, 'X': 0}
modeling 420
ml :  XGBoost
train_y: (1779,)
train_w: (1779,)
train_X: (1779, 1826)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_05_04_954692
start 421
filtering 421
aggregating 421
checking aggregation: 
            hmdb_ids    y  n_obs
6637  HMDB0049716+Na  0.0     15
6267   HMDB0046204+H  0.0      1
5913  HMDB0044334+Na  0.0     39
7831  HMDB0112570+Na  1.0      5
3544   HMDB0009349+H  0.0      3
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_05_06_956819
Printing histogram!
hist_plots/2020_04_06_20_05_07_135064
joining X 421
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_05_07_543238
splitting 421
X_df_shapes:
(1632, 1826)
(759, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 6, 'X': 0}
modeling 421
ml :  XGBoost
train_y: (1632,)
train_w: (1632,)
train_X: (1632, 1826)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_05_25_266515
start 422
filtering 422
aggregating 422
checking aggregation: 
            hmdb_ids     y  n_obs
2232   HMDB0008373+H  26.0     26
3620   HMDB0009398+K   1.0      5
1012   HMDB0007533+K   0.0      5
5594  HMDB0042554+Na   0.0      1
5791   HMDB0043929+K   0.0     34
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_05_27_231298
Printing histogram!
hist_plots/2020_04_06_20_05_27_406110
joining X 422
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_05_27_807003
splitting 422
X_df_shapes:
(1673, 1826)
(801, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 6, 'X': 0}
modeling 422
ml :  XGBoost
train_y: (1673,)
train_w: (1673,)
train_X: (1673, 1826)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_05_48_132734
start 423
filtering 423
aggregating 423
checking aggregation: 
            hmdb_ids     y  n_obs
1494   HMDB0008046+H  16.0     20
8216   HMDB0115072+H   0.0      2
8143  HMDB0114994+Na   0.0     22
856   HMDB0007189+Na   0.0      2
5552   HMDB0042415+H   0.0     13
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_05_50_295607
Printing histogram!
hist_plots/2020_04_06_20_05_50_471681
joining X 423
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_05_50_872975
splitting 423
X_df_shapes:
(1615, 1826)
(988, 1826)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 5, 'X': 0}
modeling 423
ml :  XGBoost
train_y: (1615,)
train_w: (1615,)
train_X: (1615, 1826)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_06_11_445718
start 424
filtering 424
aggregating 424
checking aggregation: 
            hmdb_ids    y  n_obs
3184   HMDB0009072+K  0.0      4
4604   HMDB0011301+K  0.0     14
6374  HMDB0047222+Na  0.0      2
5580  HMDB0042510+Na  0.0      1
5429  HMDB0037742+Na  0.0      1
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_06_13_450219
Printing histogram!
hist_plots/2020_04_06_20_06_13_625631
joining X 424
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_06_13_888285
splitting 424
X_df_shapes:
(1603, 1024)
(927, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 5, 'X': 44117}
modeling 424
ml :  XGBoost
train_y: (1603,)
train_w: (1603,)
train_X: (1603, 1024)
Weighting:  False


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_06_23_997932
start 425
filtering 425
aggregating 425
checking aggregation: 
            hmdb_ids    y  n_obs
5174   HMDB0013407+H  0.0     57
7189   HMDB0060014+K  0.0      4
1817   HMDB0008169+K  0.0    133
4861   HMDB0011520+H  1.0      1
7772  HMDB0112515+Na  1.0     38
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_06_26_004951
Printing histogram!
hist_plots/2020_04_06_20_06_26_183639
joining X 425
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_06_26_438517
splitting 425
X_df_shapes:
(1428, 1024)
(1080, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 43003}
modeling 425
ml :  XGBoost
train_y: (1428,)
train_w: (1428,)
train_X: (1428, 1024)
Weighting:  10_y_bins


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_06_35_933701
start 426
filtering 426
aggregating 426
checking aggregation: 
            hmdb_ids    y  n_obs
4494   HMDB0011246+H  0.0     57
3697  HMDB0009487+Na  0.0      5
501   HMDB0004844+Na  0.0      3
4459   HMDB0011227+K  0.0     13
6402  HMDB0047822+Na  0.0      2
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_06_37_920107
Printing histogram!
hist_plots/2020_04_06_20_06_38_096947
joining X 426
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_06_38_354086
splitting 426
X_df_shapes:
(1974, 1024)
(541, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 1, 'X': 20643}
modeling 426
ml :  XGBoost
train_y: (1974,)
train_w: (1974,)
train_X: (1974, 1024)
Weighting:  n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_06_50_878014
start 427
filtering 427
aggregating 427
checking aggregation: 
            hmdb_ids    y  n_obs
1625   HMDB0008096+K  0.0      2
6730  HMDB0050342+Na  0.0     38
7051   HMDB0053875+H  0.0     18
4204  HMDB0010453+Na  0.0     56
6327  HMDB0046708+Na  0.0     15
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_06_52_856252
Printing histogram!
hist_plots/2020_04_06_20_06_53_028473
joining X 427
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_06_53_282881
splitting 427
X_df_shapes:
(1716, 1024)
(932, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 5, 'X': 54419}
modeling 427
ml :  XGBoost
train_y: (1716,)
train_w: (1716,)
train_X: (1716, 1024)
Weighting:  isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_07_03_912409
start 428
filtering 428
aggregating 428
checking aggregation: 
            hmdb_ids    y  n_obs
1859   HMDB0008192+H  0.0      6
7505  HMDB0106373+Na  0.0      7
3759  HMDB0009555+Na  3.0      5
7133   HMDB0055325+H  0.0      1
6532  HMDB0049077+Na  0.0      2
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_07_05_914028
Printing histogram!
hist_plots/2020_04_06_20_07_06_087979
joining X 428
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_07_06_346042
splitting 428
X_df_shapes:
(1524, 1024)
(744, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 7, 'X': 25090}
modeling 428
ml :  XGBoost
train_y: (1524,)
train_w: (1524,)
train_X: (1524, 1024)
Weighting:  10_y_bins_W_n_obs


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_07_16_443494
start 429
filtering 429
aggregating 429
checking aggregation: 
            hmdb_ids     y  n_obs
8540  HMDB0115550+Na   0.0     22
442   HMDB0003265+Na  12.0     13
5166  HMDB0013402+Na   0.0      4
3979  HMDB0009889+Na   0.0     25
2864  HMDB0008861+Na   0.0      2
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_07_18_430844
Printing histogram!
hist_plots/2020_04_06_20_07_18_610822
joining X 429
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_07_18_875824
splitting 429
X_df_shapes:
(1357, 1024)
(1014, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 8, 'X': 40192}
modeling 429
ml :  XGBoost
train_y: (1357,)
train_w: (1357,)
train_X: (1357, 1024)
Weighting:  10_y_bins_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_07_28_023343
start 430
filtering 430
aggregating 430
checking aggregation: 
            hmdb_ids    y  n_obs
5973   HMDB0044662+K  0.0      1
1722   HMDB0008131+H  0.0      7
8135  HMDB0114986+Na  6.0     34
6664   HMDB0049843+K  0.0     13
1770   HMDB0008148+H  3.0     92
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_07_29_971404
Printing histogram!
hist_plots/2020_04_06_20_07_30_143730
joining X 430
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_07_30_401984
splitting 430
X_df_shapes:
(1476, 1024)
(608, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 5, 'X': 28184}
modeling 430
ml :  XGBoost
train_y: (1476,)
train_w: (1476,)
train_X: (1476, 1024)
Weighting:  n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_07_40_253309
start 431
filtering 431
aggregating 431
checking aggregation: 
            hmdb_ids    y  n_obs
1581   HMDB0008080+K  0.0     79
3741   HMDB0009547+H  0.0     34
2987   HMDB0008951+H  0.0      1
3779   HMDB0009580+K  0.0     10
5290  HMDB0029109+Na  0.0      3
(8641, 7)
Printing histogram!
n_obs_plots/2020_04_06_20_07_42_251860
Printing histogram!
hist_plots/2020_04_06_20_07_42_425997
joining X 431
['hmdb_ids', 'formula', 'intensity_avg', 'intensity_nl', 'y', 'w', 'n_obs']
Printing intensities parent and NL!
int_plots/2020_04_06_20_07_42_674818
splitting 431
X_df_shapes:
(1708, 1024)
(902, 1024)
Testing split, n overlap train/test: 
{'formula': 0, 'hmdb_ids': 0, 'y': 9, 'X': 46775}
modeling 431
ml :  XGBoost
train_y: (1708,)
train_w: (1708,)
train_X: (1708, 1024)
Weighting:  10_y_bins_W_n_obs_W_isobar


  weights = 1 / density


Printing regression actual versus predicted output!
model_plots/2020_04_06_20_07_53_818586
Elapsed time:


Executed without error

13.566563844680786


# To do:
1. Add M and M.
2. nl_03 code review.
3. Generate +/- results.
4. Discuss and predict classifier development.
5. Find reasonable result and split by instrument/lab

In [32]:
model_results.round(3)

Unnamed: 0,target,polarity,fdr,coloc,min_n_obs,model,submodel,params,one_id_only,X,...,dataset,rmse_train,rmse_test,r2_test,rmse_train_w,rmse_test_w,r2_test_w,n_test,plt_path,time
0,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,...,all_ds,0.213,0.273,0.130,,,,4115,model_plots/2020_04_06_14_42_31_150253,114
1,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,...,all_ds,0.271,0.325,-0.127,0.173,0.304,-0.025,4564,model_plots/2020_04_06_14_44_19_667845,108
2,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,...,all_ds,0.232,0.272,0.140,0.183,0.225,0.339,4358,model_plots/2020_04_06_14_46_16_983509,117
3,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,...,all_ds,0.208,0.281,0.208,0.213,0.310,0.119,4023,model_plots/2020_04_06_14_48_16_657519,119
4,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,...,all_ds,0.268,0.306,-0.010,0.186,0.297,-0.094,4070,model_plots/2020_04_06_14_50_15_009077,118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,H2O,1,0.05,0.75,10,ml,XGBoost,,False,bits,...,all_ds,0.170,0.183,-0.009,0.173,0.224,-0.066,932,model_plots/2020_04_06_20_07_03_912409,13
428,H2O,1,0.05,0.75,10,ml,XGBoost,,False,bits,...,all_ds,0.203,0.303,-0.441,0.200,0.374,-0.404,744,model_plots/2020_04_06_20_07_16_443494,12
429,H2O,1,0.05,0.75,10,ml,XGBoost,,False,bits,...,all_ds,0.302,0.290,-0.669,0.158,0.418,-0.396,1014,model_plots/2020_04_06_20_07_28_023343,11
430,H2O,1,0.05,0.75,10,ml,XGBoost,,False,bits,...,all_ds,0.196,0.271,0.045,0.173,0.254,-0.026,608,model_plots/2020_04_06_20_07_40_253309,12


In [40]:
model_results.sort_values(by=['r2_test_w'], ascending=False).round(3)

Unnamed: 0,target,polarity,fdr,coloc,min_n_obs,model,submodel,params,one_id_only,X,...,dataset,rmse_train,rmse_test,r2_test,rmse_train_w,rmse_test_w,r2_test_w,n_test,plt_path,time
18,H2O,1,0.20,0.00,4,ml,XGBoost,,False,mord_norm,...,all_ds,0.209,0.252,0.259,0.192,0.235,0.347,2760,model_plots/2020_04_06_15_07_04_411765,69
2,H2O,1,0.20,0.00,1,ml,XGBoost,,False,mord_norm,...,all_ds,0.232,0.272,0.140,0.183,0.225,0.339,4358,model_plots/2020_04_06_14_46_16_983509,117
290,H2O,1,0.05,0.00,1,ml,XGBoost,,False,mord_norm,...,all_ds,0.212,0.290,0.133,0.185,0.247,0.325,2402,model_plots/2020_04_06_18_58_35_638956,62
226,H2O,1,0.10,0.50,10,ml,XGBoost,,False,mord_norm,...,all_ds,0.165,0.175,0.301,0.157,0.205,0.297,1786,model_plots/2020_04_06_18_14_26_126220,36
338,H2O,1,0.05,0.50,1,ml,XGBoost,,False,mord_norm,...,all_ds,0.147,0.266,0.034,0.168,0.261,0.294,2283,model_plots/2020_04_06_19_24_09_665985,61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,H2O,1,0.05,0.75,1,ml,XGBoost,,False,bits,...,all_ds,0.213,0.247,0.137,,,,2031,model_plots/2020_04_06_19_52_59_724586,28
400,H2O,1,0.05,0.75,4,ml,XGBoost,,False,mord_norm,...,all_ds,0.147,0.227,0.081,,,,1050,model_plots/2020_04_06_19_56_59_444359,34
408,H2O,1,0.05,0.75,4,ml,XGBoost,,False,bits,...,all_ds,0.190,0.183,-0.256,,,,1179,model_plots/2020_04_06_20_01_06_120443,17
416,H2O,1,0.05,0.75,10,ml,XGBoost,,False,mord_norm,...,all_ds,0.133,0.268,0.085,,,,763,model_plots/2020_04_06_20_03_38_651768,21


In [35]:
model_results.to_pickle('all_model_results.pickle')

In [34]:
# Regression results:
#all_model_results = pd.read_pickle('all_model_results.pickle')
#all_model_results = pd.concat([all_model_results, model_results])
all_model_results.to_pickle('all_model_results.pickle')
all_model_results.round(3)

NameError: name 'all_model_results' is not defined

In [None]:
all_model_results.sort_values(by=['rmse_test']).round(3)

In [None]:
temp = dict(all_model_results[(all_model_results.index == 90) & 
                  (all_model_results.X == 'mord_norm')].round(2))

In [None]:
temp