In [1]:
## packages

import os
import random
import numpy as np
import pandas as pd

from merf.merf import MERF

from sklearn.model_selection import GroupKFold
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr
from src.utils import get_data, create_model_features, write_performance, reload_performance


import warnings
warnings.filterwarnings('ignore')

In [2]:
## set seeds
seed = 5555
random.seed(seed)
np.random.seed(seed)

In [3]:
def merf_core(model_type, X_train_val, X_test, y_train_val, y_test, y_median_train_val, y_median_test, features_gene, features_guide, performance, n_folds_inner, fold, plot_fold, group_ids_train_val, group_ids_test):

    X_train_val.reset_index(inplace=True,drop=True)
    X_test.reset_index(inplace=True,drop=True)
    y_train_val.reset_index(inplace=True,drop=True)
    y_test.reset_index(inplace=True,drop=True)
    y_median_test.reset_index(inplace=True,drop=True)
    
    #Divide Gene and Guide features for fixed effect and mixed effect
    X_train_val_guide = X_train_val[features_guide]
    X_train_val_gene = X_train_val[features_gene]
    
    X_test_guide = X_test[features_guide]
    X_test_gene = X_test[features_gene]
        
    
    predictions_train = np.zeros((X_train_val.shape[0], n_folds_inner))
    predictions_test = np.zeros((X_test.shape[0], n_folds_inner))
    
    gkf_inner = GroupKFold(n_splits=n_folds_inner)
    fold_inner = 0
    
    for index_train, index_val in gkf_inner.split(X_train_val, y_train_val, groups=group_ids_train_val):
        
        X_train_guide, X_val_guide = X_train_val_guide.iloc[index_train], X_train_val_guide.iloc[index_val]
        X_train_gene, X_val_gene = X_train_val_gene.iloc[index_train], X_train_val_gene.iloc[index_val]
        X_train_guide.reset_index(inplace=True,drop=True)
        X_val_guide.reset_index(inplace=True,drop=True)
        X_train_gene.reset_index(inplace=True,drop=True)
        X_val_gene.reset_index(inplace=True,drop=True)
        
        y_train, y_val = y_train_val.iloc[index_train], y_train_val.iloc[index_val]
        y_train.reset_index(inplace=True,drop=True)
        y_val.reset_index(inplace=True,drop=True)
        
        group_ids_train = [group_ids_train_val[i] for i in index_train]
        group_ids_val = [group_ids_train_val[i] for i in index_val]
        
        ## Train MERF model with chosen nonlinear model 
    
        if model_type == 'RF':
            model = RandomForestRegressor(n_estimators = 2000, max_features = 'log2', max_depth = 10, min_samples_split = 2, min_samples_leaf = 5, bootstrap = True, max_samples = 0.8, random_state = seed)
        elif model_type == 'GBM':    
            model = GradientBoostingRegressor(n_estimators = 2000, max_depth = 3, learning_rate = 0.01, random_state = seed)

        merf = MERF(model, max_iterations = 10)
        merf.fit(X_train_guide, X_train_gene, pd.Series(group_ids_train), y_train, X_val_guide, X_val_gene, pd.Series(group_ids_val), y_val)

        # predict and calculate MSE and spearman
        y_pred_train = merf.predict(X_train_val_guide, X_train_val_gene, pd.Series(group_ids_train_val))
        y_pred_test = merf.predict(X_test_guide, X_test_gene, pd.Series(group_ids_test))
        
        predictions_train[:,fold_inner] = y_pred_train
        predictions_test[:,fold_inner] = y_pred_test
        fold_inner +=1
        
    y_hat_train = predictions_train.mean(axis=1)
    y_hat_test = predictions_test.mean(axis=1)

    performance.iloc[fold,:] =  [round(mean_squared_error(y_train_val,y_hat_train),2), round(mean_squared_error(y_test,y_hat_test),2), 
                                 round(spearmanr(y_train_val,y_hat_train)[0],4), round(spearmanr(y_test,y_hat_test)[0],4)]
        
    return performance

In [4]:
def train_merf(model_type, X, Y, Y_median, features_gene, features_guide, gkf, group_ids, n_folds_outer, n_folds_inner, X_add = None, Y_add = None, Y_median_add = None, group_ids_add = None):

    fold = 0
    plot_fold = []
    performance = pd.DataFrame({'mse_train': np.zeros(n_folds_outer), 'mse_test': np.zeros(n_folds_outer), 
                                'spearmanR_train': np.zeros(n_folds_outer), 'spearmanR_test': np.zeros(n_folds_outer)})
    
    for index_train_val, index_test in gkf.split(X, Y, groups=group_ids):

        X_train_val, X_test = X.iloc[index_train_val], X.iloc[index_test]
        y_train_val, y_test = Y.iloc[index_train_val], Y.iloc[index_test]
        y_median_train_val, y_median_test = Y_median.iloc[index_train_val], Y_median.iloc[index_test]
        group_ids_train_val = [group_ids[i] for i in index_train_val]
        group_ids_test = [group_ids[i] for i in index_test]
        
        if (X_add is not None) & (Y_add is not None):
            
            gene_idx_filter = [i for i, gene in enumerate(group_ids_add) if gene in group_ids_test]
            group_ids_add_filtered = [i for j, i in enumerate(group_ids_add) if j not in gene_idx_filter]
        
            X_add_filtered = X_add.copy().drop(gene_idx_filter)
            X_add_filtered.reset_index(inplace=True,drop=True)
    
            Y_add_filtered = Y_add.copy().drop(gene_idx_filter)
            Y_add_filtered.reset_index(inplace=True,drop=True)
            
            Y_median_add_filtered = Y_median_add.copy().drop(gene_idx_filter)
            Y_median_add_filtered.reset_index(inplace=True,drop=True)
            
            X_train_val = pd.concat([X_train_val,X_add_filtered],axis=0)
            y_train_val = pd.concat([y_train_val,Y_add_filtered],axis=0)
            y_median_train_val = pd.concat([y_median_train_val,Y_median_add_filtered],axis=0)
            
            group_ids_train_val = group_ids_train_val + group_ids_add_filtered
        
        performance = merf_core(model_type, X_train_val, X_test, y_train_val, y_test, y_median_train_val, y_median_test, features_gene, features_guide, performance, n_folds_inner, fold, plot_fold, group_ids_train_val, group_ids_test)
        fold += 1
          
    print("---")
    print("Training with {}-fold grouped CV".format(n_folds_outer))
    performance_mean = pd.DataFrame({'mean':performance.mean(axis=0).round(3), 'variance':performance.var(axis=0).round(3)}).transpose()
    print(performance_mean)
    print(performance)
        
    return performance

In [5]:
output_performance = "../reports/performance_training/merf"

file_data_wang = '../datasets/data_wang.pickle'
file_one_hot_encoding_wang = '../datasets/one_hot_encoding_ML_wang.pickle'
file_kmer_wang = '../datasets/kmer_wang.pickle'

file_data_rousset_E18 = '../datasets/data_rousset_E18.pickle'
file_one_hot_encoding_rousset_E18 = '../datasets/one_hot_encoding_ML_rousset_E18.pickle'
file_kmer_rousset_E18 = '../datasets/kmer_rousset_E18.pickle'

file_data_rousset_E75 = '../datasets/data_rousset_E75.pickle'
file_one_hot_encoding_rousset_E75 = '../datasets/one_hot_encoding_ML_rousset_E75.pickle'
file_kmer_rousset_E75 = '../datasets/kmer_rousset_E75.pickle'

In [6]:
## setup parameters

n_folds_outer = 5
n_folds_inner = 5
features_gene_guide = ["gene_length", "gene_GC_content", "distance_operon", "operon_downstream_genes", "ess_gene_operon", "gene_expression_min", "gene_expression_max",
               "guide_GC_content", "distance_start_codon", "homopolymers", "MFE_hybrid_full", "MFE_hybrid_seed", "MFE_homodimer_guide", "MFE_monomer_guide", 
               "off_target_90_100", "off_target_80_90", "off_target_70_80", "off_target_60_70"]

features_gene = ["gene_length", "gene_GC_content", "distance_operon", "operon_downstream_genes", "ess_gene_operon", "gene_expression_min", "gene_expression_max"]

features_guide = ["guide_GC_content", "distance_start_codon", "homopolymers", "MFE_hybrid_full", "MFE_hybrid_seed", "MFE_homodimer_guide", "MFE_monomer_guide", "off_target_90_100", "off_target_80_90", "off_target_70_80", "off_target_60_70"]

datasets = ["wang_orig_guide-genes","wang_orig_guide","wang_median-sub_guide-genes","wang_median-sub_guide","wang_rank_guide-genes","wang_rank_guide",
            "rousset_E18_orig_guide-genes","rousset_E18_orig_guide","rousset_E18_median-sub_guide-genes","rousset_E18_median-sub_guide","rousset_E18_rank_guide-genes","rousset_E18_rank_guide",
            "rousset_E75_orig_guide-genes","rousset_E75_orig_guide","rousset_E75_median-sub_guide-genes","rousset_E75_median-sub_guide","rousset_E75_rank_guide-genes","rousset_E75_rank_guide",
            "wang_rousset_E18_orig_guide-genes","wang_rousset_E18_orig_guide","wang_rousset_E18_median-sub_guide-genes","wang_rousset_E18_median-sub_guide","wang_rousset_E18_rank_guide-genes","wang_rousset_E18_rank_guide",
            "wang_rousset_E75_orig_guide-genes","wang_rousset_E75_orig_guide","wang_rousset_E75_median-sub_guide-genes","wang_rousset_E75_median-sub_guide","wang_rousset_E75_rank_guide-genes","wang_rousset_E75_rank_guide",
            "wang_rousset_E18_rousset_E75_orig_guide-genes","wang_rousset_E18_rousset_E75_orig_guide","wang_rousset_E18_rousset_E75_median-sub_guide-genes","wang_rousset_E18_rousset_E75_median-sub_guide","wang_rousset_E18_rousset_E75_rank_guide-genes","wang_rousset_E18_rousset_E75_rank_guide"]

models = ["RF", "GBM"]

In [8]:
## setup performance tables
perf = pd.DataFrame(columns = models, index = datasets)

# mse_train, mse_test, spearmanR_train, spearmanR_test
performance_table = [perf,perf,perf,perf]
performance_table = reload_performance(output_performance)

In [9]:
## load data

data_wang = get_data(file_data_wang)
one_hot_encoding_wang = get_data(file_one_hot_encoding_wang)
kmer_wang = get_data(file_kmer_wang)

data_rousset_E18 = get_data(file_data_rousset_E18)
one_hot_encoding_rousset_E18 = get_data(file_one_hot_encoding_rousset_E18)
kmer_rousset_E18 = get_data(file_kmer_rousset_E18)

data_rousset_E75 = get_data(file_data_rousset_E75)
one_hot_encoding_rousset_E75 = get_data(file_one_hot_encoding_rousset_E75)
kmer_rousset_E75 = get_data(file_kmer_rousset_E75)

In [10]:
## Create X and Y variables

model_features_wang = create_model_features(data_wang,one_hot_encoding_wang,kmer_wang,features_gene_guide,features_guide)
model_features_rousset_E18 = create_model_features(data_rousset_E18,one_hot_encoding_rousset_E18,kmer_rousset_E18,features_gene_guide,features_guide)
model_features_rousset_E75 = create_model_features(data_rousset_E75,one_hot_encoding_rousset_E75,kmer_rousset_E75,features_gene_guide,features_guide)

In [None]:
%matplotlib
models_skip = ["RF"]

for model in models:
    
    if model in models_skip:
        print("skipped: " + model)
        continue
    else:
        print("---")
        print(model)


        for dataset in datasets:

            # get correct dataset
            if "wang_rousset_E18" in dataset:
                print(dataset)
                modl_features = model_features_wang
                model_features_add = model_features_rousset_E18
                X_add = model_features_add["X_gene_guide"].copy()
                Y_add = model_features_add["Y_orig"].copy()
                Y_median_add = model_features_add["Y_median_sub"].copy()
                group_ids_add = model_features_add["geneid"]

            elif "wang" in dataset:
                print(dataset)
                model_features = model_features_wang
                X_add = None
                Y_add = None
                Y_median_add = None
                group_ids_add = None

            elif "rousset_E18" in dataset:
                print(dataset)
                model_features = model_features_rousset_E18
                X_add = None
                Y_add = None
                Y_median_add = None
                group_ids_add = None

            elif "rousset_E75" in dataset:
                print(dataset)
                model_features = model_features_rousset_E75
                X_add = None
                Y_add = None
                Y_median_add = None
                group_ids_add = None


            performance = train_merf(model, model_features["X_gene_guide"].copy(), model_features["Y_orig"].copy(), model_features["Y_median_sub"].copy(), 
                                     features_gene, features_guide, GroupKFold(n_splits=n_folds_outer), model_features["geneid"], n_folds_outer, n_folds_inner,
                                     X_add = X_add, Y_add = Y_add, Y_median_add = Y_median_add, group_ids_add = group_ids_add)


            performance_table = write_performance(performance_table, performance, model, dataset, outfiles = output_performance)


Using matplotlib backend: agg
skipped: RF
---
GBM
wang_orig_guide-genes


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_orig_guide


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_median-sub_guide-genes


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_median-sub_guide


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_rank_guide-genes


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_rank_guide


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
rousset_E18_orig_guide-genes


INFO     [merf.py:307] Training GLL is 1134.458841558929 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 5.949643169736601 at iteration 1.
INFO     [merf.py:307] Training GLL is 893.0877114724403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 5.635631537750689 at iteration 2.
INFO     [merf.py:307] Training GLL is 709.4279874213573 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 5.866607595863462 at iteration 3.
INFO     [merf.py:307] Training GLL is 559.8642756156587 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 5.929393374922684 at iteration 4.
INFO     [merf.py:307] Training GLL is 418.85369903171954 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 5.980631530199781 at iteration 5.
INFO     [merf.py:307] Training GLL is 291.30048143913564 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 5.902925089479467 at iteration 6.
INFO     [merf.py:307] Training GLL is 161.89105545373332 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          1.820     5.902            0.854           0.057
variance      0.007     1.011            0.000           0.006
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.71      6.30           0.8604          0.0932
1       1.91      7.07           0.8372          0.0430
2       1.86      4.32           0.8636          0.1149
3       1.86      5.81           0.8511          0.1073
4       1.76      6.01           0.8588         -0.0751
rousset_E18_orig_guide


INFO     [merf.py:307] Training GLL is 1134.458841558929 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 5.949643169736601 at iteration 1.
INFO     [merf.py:307] Training GLL is 893.0877114724403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 5.635631537750689 at iteration 2.
INFO     [merf.py:307] Training GLL is 709.4279874213573 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 5.866607595863462 at iteration 3.
INFO     [merf.py:307] Training GLL is 559.8642756156587 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 5.929393374922684 at iteration 4.
INFO     [merf.py:307] Training GLL is 418.85369903171954 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 5.980631530199781 at iteration 5.
INFO     [merf.py:307] Training GLL is 291.30048143913564 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 5.902925089479467 at iteration 6.
INFO     [merf.py:307] Training GLL is 161.89105545373332 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          1.820     5.902            0.854           0.057
variance      0.007     1.011            0.000           0.006
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.71      6.30           0.8604          0.0932
1       1.91      7.07           0.8372          0.0430
2       1.86      4.32           0.8636          0.1149
3       1.86      5.81           0.8511          0.1073
4       1.76      6.01           0.8588         -0.0751
rousset_E18_median-sub_guide-genes


INFO     [merf.py:307] Training GLL is 1134.458841558929 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 5.949643169736601 at iteration 1.
INFO     [merf.py:307] Training GLL is 893.0877114724403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 5.635631537750689 at iteration 2.
INFO     [merf.py:307] Training GLL is 709.4279874213573 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 5.866607595863462 at iteration 3.
INFO     [merf.py:307] Training GLL is 559.8642756156587 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 5.929393374922684 at iteration 4.
INFO     [merf.py:307] Training GLL is 418.85369903171954 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 5.980631530199781 at iteration 5.
INFO     [merf.py:307] Training GLL is 291.30048143913564 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 5.902925089479467 at iteration 6.
INFO     [merf.py:307] Training GLL is 161.89105545373332 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          1.820     5.902            0.854           0.057
variance      0.007     1.011            0.000           0.006
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.71      6.30           0.8604          0.0932
1       1.91      7.07           0.8372          0.0430
2       1.86      4.32           0.8636          0.1149
3       1.86      5.81           0.8511          0.1073
4       1.76      6.01           0.8588         -0.0751
rousset_E18_median-sub_guide


INFO     [merf.py:307] Training GLL is 1134.458841558929 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 5.949643169736601 at iteration 1.
INFO     [merf.py:307] Training GLL is 893.0877114724403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 5.635631537750689 at iteration 2.
INFO     [merf.py:307] Training GLL is 709.4279874213573 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 5.866607595863462 at iteration 3.
INFO     [merf.py:307] Training GLL is 559.8642756156587 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 5.929393374922684 at iteration 4.
INFO     [merf.py:307] Training GLL is 418.85369903171954 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 5.980631530199781 at iteration 5.
INFO     [merf.py:307] Training GLL is 291.30048143913564 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 5.902925089479467 at iteration 6.
INFO     [merf.py:307] Training GLL is 161.89105545373332 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          1.820     5.902            0.854           0.057
variance      0.007     1.011            0.000           0.006
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.71      6.30           0.8604          0.0932
1       1.91      7.07           0.8372          0.0430
2       1.86      4.32           0.8636          0.1149
3       1.86      5.81           0.8511          0.1073
4       1.76      6.01           0.8588         -0.0751
rousset_E18_rank_guide-genes


INFO     [merf.py:307] Training GLL is 1134.458841558929 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 5.949643169736601 at iteration 1.
INFO     [merf.py:307] Training GLL is 893.0877114724403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 5.635631537750689 at iteration 2.
INFO     [merf.py:307] Training GLL is 709.4279874213573 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 5.866607595863462 at iteration 3.
INFO     [merf.py:307] Training GLL is 559.8642756156587 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 5.929393374922684 at iteration 4.
INFO     [merf.py:307] Training GLL is 418.85369903171954 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 5.980631530199781 at iteration 5.
INFO     [merf.py:307] Training GLL is 291.30048143913564 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 5.902925089479467 at iteration 6.
INFO     [merf.py:307] Training GLL is 161.89105545373332 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          1.820     5.902            0.854           0.057
variance      0.007     1.011            0.000           0.006
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.71      6.30           0.8604          0.0932
1       1.91      7.07           0.8372          0.0430
2       1.86      4.32           0.8636          0.1149
3       1.86      5.81           0.8511          0.1073
4       1.76      6.01           0.8588         -0.0751
rousset_E18_rank_guide


INFO     [merf.py:307] Training GLL is 1134.458841558929 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 5.949643169736601 at iteration 1.
INFO     [merf.py:307] Training GLL is 893.0877114724403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 5.635631537750689 at iteration 2.
INFO     [merf.py:307] Training GLL is 709.4279874213573 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 5.866607595863462 at iteration 3.
INFO     [merf.py:307] Training GLL is 559.8642756156587 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 5.929393374922684 at iteration 4.
INFO     [merf.py:307] Training GLL is 418.85369903171954 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 5.980631530199781 at iteration 5.
INFO     [merf.py:307] Training GLL is 291.30048143913564 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 5.902925089479467 at iteration 6.
INFO     [merf.py:307] Training GLL is 161.89105545373332 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          1.820     5.902            0.854           0.057
variance      0.007     1.011            0.000           0.006
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.71      6.30           0.8604          0.0932
1       1.91      7.07           0.8372          0.0430
2       1.86      4.32           0.8636          0.1149
3       1.86      5.81           0.8511          0.1073
4       1.76      6.01           0.8588         -0.0751
rousset_E75_orig_guide-genes


INFO     [merf.py:307] Training GLL is 1134.1047352059352 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.821173142923705 at iteration 1.
INFO     [merf.py:307] Training GLL is 979.4049110712215 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.872072389378954 at iteration 2.
INFO     [merf.py:307] Training GLL is 798.4919544213133 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.805400769408874 at iteration 3.
INFO     [merf.py:307] Training GLL is 692.6008301585981 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.7516244275781805 at iteration 4.
INFO     [merf.py:307] Training GLL is 573.6150761252288 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.632889660631143 at iteration 5.
INFO     [merf.py:307] Training GLL is 439.2363493822372 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.565105960734729 at iteration 6.
INFO     [merf.py:307] Training GLL is 327.8124117327506 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.054     6.544            0.864           0.112
variance      0.010     1.472            0.000           0.005
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.94      7.72           0.8657          0.0698
1       1.98      7.68           0.8689          0.0504
2       2.17      4.81           0.8649          0.2139
3       2.03      6.11           0.8675          0.0713
4       2.15      6.40           0.8533          0.1532
rousset_E75_orig_guide


INFO     [merf.py:307] Training GLL is 1134.1047352059352 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.821173142923705 at iteration 1.
INFO     [merf.py:307] Training GLL is 979.4049110712215 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.872072389378954 at iteration 2.
INFO     [merf.py:307] Training GLL is 798.4919544213133 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.805400769408874 at iteration 3.
INFO     [merf.py:307] Training GLL is 692.6008301585981 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.7516244275781805 at iteration 4.
INFO     [merf.py:307] Training GLL is 573.6150761252288 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.632889660631143 at iteration 5.
INFO     [merf.py:307] Training GLL is 439.2363493822372 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.565105960734729 at iteration 6.
INFO     [merf.py:307] Training GLL is 327.8124117327506 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.054     6.544            0.864           0.112
variance      0.010     1.472            0.000           0.005
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.94      7.72           0.8657          0.0698
1       1.98      7.68           0.8689          0.0504
2       2.17      4.81           0.8649          0.2139
3       2.03      6.11           0.8675          0.0713
4       2.15      6.40           0.8533          0.1532
rousset_E75_median-sub_guide-genes


INFO     [merf.py:307] Training GLL is 1134.1047352059352 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.821173142923705 at iteration 1.
INFO     [merf.py:307] Training GLL is 979.4049110712215 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.872072389378954 at iteration 2.
INFO     [merf.py:307] Training GLL is 798.4919544213133 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.805400769408874 at iteration 3.
INFO     [merf.py:307] Training GLL is 692.6008301585981 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.7516244275781805 at iteration 4.
INFO     [merf.py:307] Training GLL is 573.6150761252288 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.632889660631143 at iteration 5.
INFO     [merf.py:307] Training GLL is 439.2363493822372 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.565105960734729 at iteration 6.
INFO     [merf.py:307] Training GLL is 327.8124117327506 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.054     6.544            0.864           0.112
variance      0.010     1.472            0.000           0.005
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.94      7.72           0.8657          0.0698
1       1.98      7.68           0.8689          0.0504
2       2.17      4.81           0.8649          0.2139
3       2.03      6.11           0.8675          0.0713
4       2.15      6.40           0.8533          0.1532
rousset_E75_median-sub_guide


INFO     [merf.py:307] Training GLL is 1134.1047352059352 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.821173142923705 at iteration 1.
INFO     [merf.py:307] Training GLL is 979.4049110712215 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.872072389378954 at iteration 2.
INFO     [merf.py:307] Training GLL is 798.4919544213133 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.805400769408874 at iteration 3.
INFO     [merf.py:307] Training GLL is 692.6008301585981 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.7516244275781805 at iteration 4.
INFO     [merf.py:307] Training GLL is 573.6150761252288 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.632889660631143 at iteration 5.
INFO     [merf.py:307] Training GLL is 439.2363493822372 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.565105960734729 at iteration 6.
INFO     [merf.py:307] Training GLL is 327.8124117327506 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.054     6.544            0.864           0.112
variance      0.010     1.472            0.000           0.005
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.94      7.72           0.8657          0.0698
1       1.98      7.68           0.8689          0.0504
2       2.17      4.81           0.8649          0.2139
3       2.03      6.11           0.8675          0.0713
4       2.15      6.40           0.8533          0.1532
rousset_E75_rank_guide-genes


INFO     [merf.py:307] Training GLL is 1134.1047352059352 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.821173142923705 at iteration 1.
INFO     [merf.py:307] Training GLL is 979.4049110712215 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.872072389378954 at iteration 2.
INFO     [merf.py:307] Training GLL is 798.4919544213133 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.805400769408874 at iteration 3.
INFO     [merf.py:307] Training GLL is 692.6008301585981 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.7516244275781805 at iteration 4.
INFO     [merf.py:307] Training GLL is 573.6150761252288 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.632889660631143 at iteration 5.
INFO     [merf.py:307] Training GLL is 439.2363493822372 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.565105960734729 at iteration 6.
INFO     [merf.py:307] Training GLL is 327.8124117327506 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.054     6.544            0.864           0.112
variance      0.010     1.472            0.000           0.005
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.94      7.72           0.8657          0.0698
1       1.98      7.68           0.8689          0.0504
2       2.17      4.81           0.8649          0.2139
3       2.03      6.11           0.8675          0.0713
4       2.15      6.40           0.8533          0.1532
rousset_E75_rank_guide


INFO     [merf.py:307] Training GLL is 1134.1047352059352 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.821173142923705 at iteration 1.
INFO     [merf.py:307] Training GLL is 979.4049110712215 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.872072389378954 at iteration 2.
INFO     [merf.py:307] Training GLL is 798.4919544213133 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.805400769408874 at iteration 3.
INFO     [merf.py:307] Training GLL is 692.6008301585981 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.7516244275781805 at iteration 4.
INFO     [merf.py:307] Training GLL is 573.6150761252288 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.632889660631143 at iteration 5.
INFO     [merf.py:307] Training GLL is 439.2363493822372 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.565105960734729 at iteration 6.
INFO     [merf.py:307] Training GLL is 327.8124117327506 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.054     6.544            0.864           0.112
variance      0.010     1.472            0.000           0.005
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       1.94      7.72           0.8657          0.0698
1       1.98      7.68           0.8689          0.0504
2       2.17      4.81           0.8649          0.2139
3       2.03      6.11           0.8675          0.0713
4       2.15      6.40           0.8533          0.1532
wang_rousset_E18_orig_guide-genes


INFO     [merf.py:307] Training GLL is 2742.3098112422445 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.368116940404753 at iteration 1.
INFO     [merf.py:307] Training GLL is 2479.31150564583 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.243671636888498 at iteration 2.
INFO     [merf.py:307] Training GLL is 2340.9799658861 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.281385861979417 at iteration 3.
INFO     [merf.py:307] Training GLL is 2174.5378545121685 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.2705083183378365 at iteration 4.
INFO     [merf.py:307] Training GLL is 2047.2490409152272 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.22789090981109 at iteration 5.
INFO     [merf.py:307] Training GLL is 1905.8986793527536 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.173272565355697 at iteration 6.
INFO     [merf.py:307] Training GLL is 1748.7983592602664 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.208     6.500            0.828           0.107
variance      0.006     1.289            0.000           0.003
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.13      7.58           0.8270          0.0879
1       2.14      7.58           0.8294          0.0589
2       2.27      4.88           0.8349          0.1804
3       2.20      6.07           0.8325          0.0715
4       2.30      6.39           0.8169          0.1355
wang_rousset_E18_orig_guide


INFO     [merf.py:307] Training GLL is 2742.3098112422445 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.368116940404753 at iteration 1.
INFO     [merf.py:307] Training GLL is 2479.31150564583 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.243671636888498 at iteration 2.
INFO     [merf.py:307] Training GLL is 2340.9799658861 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.281385861979417 at iteration 3.
INFO     [merf.py:307] Training GLL is 2174.5378545121685 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.2705083183378365 at iteration 4.
INFO     [merf.py:307] Training GLL is 2047.2490409152272 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.22789090981109 at iteration 5.
INFO     [merf.py:307] Training GLL is 1905.8986793527536 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.173272565355697 at iteration 6.
INFO     [merf.py:307] Training GLL is 1748.7983592602664 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.208     6.500            0.828           0.107
variance      0.006     1.289            0.000           0.003
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.13      7.58           0.8270          0.0879
1       2.14      7.58           0.8294          0.0589
2       2.27      4.88           0.8349          0.1804
3       2.20      6.07           0.8325          0.0715
4       2.30      6.39           0.8169          0.1355
wang_rousset_E18_median-sub_guide-genes


INFO     [merf.py:307] Training GLL is 2742.3098112422445 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.368116940404753 at iteration 1.
INFO     [merf.py:307] Training GLL is 2479.31150564583 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.243671636888498 at iteration 2.
INFO     [merf.py:307] Training GLL is 2340.9799658861 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.281385861979417 at iteration 3.
INFO     [merf.py:307] Training GLL is 2174.5378545121685 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.2705083183378365 at iteration 4.
INFO     [merf.py:307] Training GLL is 2047.2490409152272 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.22789090981109 at iteration 5.
INFO     [merf.py:307] Training GLL is 1905.8986793527536 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.173272565355697 at iteration 6.
INFO     [merf.py:307] Training GLL is 1748.7983592602664 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.208     6.500            0.828           0.107
variance      0.006     1.289            0.000           0.003
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.13      7.58           0.8270          0.0879
1       2.14      7.58           0.8294          0.0589
2       2.27      4.88           0.8349          0.1804
3       2.20      6.07           0.8325          0.0715
4       2.30      6.39           0.8169          0.1355
wang_rousset_E18_median-sub_guide


INFO     [merf.py:307] Training GLL is 2742.3098112422445 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.368116940404753 at iteration 1.
INFO     [merf.py:307] Training GLL is 2479.31150564583 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.243671636888498 at iteration 2.
INFO     [merf.py:307] Training GLL is 2340.9799658861 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.281385861979417 at iteration 3.
INFO     [merf.py:307] Training GLL is 2174.5378545121685 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.2705083183378365 at iteration 4.
INFO     [merf.py:307] Training GLL is 2047.2490409152272 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.22789090981109 at iteration 5.
INFO     [merf.py:307] Training GLL is 1905.8986793527536 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.173272565355697 at iteration 6.
INFO     [merf.py:307] Training GLL is 1748.7983592602664 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.208     6.500            0.828           0.107
variance      0.006     1.289            0.000           0.003
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.13      7.58           0.8270          0.0879
1       2.14      7.58           0.8294          0.0589
2       2.27      4.88           0.8349          0.1804
3       2.20      6.07           0.8325          0.0715
4       2.30      6.39           0.8169          0.1355
wang_rousset_E18_rank_guide-genes


INFO     [merf.py:307] Training GLL is 2742.3098112422445 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.368116940404753 at iteration 1.
INFO     [merf.py:307] Training GLL is 2479.31150564583 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.243671636888498 at iteration 2.
INFO     [merf.py:307] Training GLL is 2340.9799658861 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.281385861979417 at iteration 3.
INFO     [merf.py:307] Training GLL is 2174.5378545121685 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.2705083183378365 at iteration 4.
INFO     [merf.py:307] Training GLL is 2047.2490409152272 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.22789090981109 at iteration 5.
INFO     [merf.py:307] Training GLL is 1905.8986793527536 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.173272565355697 at iteration 6.
INFO     [merf.py:307] Training GLL is 1748.7983592602664 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.208     6.500            0.828           0.107
variance      0.006     1.289            0.000           0.003
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.13      7.58           0.8270          0.0879
1       2.14      7.58           0.8294          0.0589
2       2.27      4.88           0.8349          0.1804
3       2.20      6.07           0.8325          0.0715
4       2.30      6.39           0.8169          0.1355
wang_rousset_E18_rank_guide


INFO     [merf.py:307] Training GLL is 2742.3098112422445 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 6.368116940404753 at iteration 1.
INFO     [merf.py:307] Training GLL is 2479.31150564583 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.243671636888498 at iteration 2.
INFO     [merf.py:307] Training GLL is 2340.9799658861 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.281385861979417 at iteration 3.
INFO     [merf.py:307] Training GLL is 2174.5378545121685 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.2705083183378365 at iteration 4.
INFO     [merf.py:307] Training GLL is 2047.2490409152272 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.22789090981109 at iteration 5.
INFO     [merf.py:307] Training GLL is 1905.8986793527536 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.173272565355697 at iteration 6.
INFO     [merf.py:307] Training GLL is 1748.7983592602664 at iteration 7.
INFO   

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.208     6.500            0.828           0.107
variance      0.006     1.289            0.000           0.003
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.13      7.58           0.8270          0.0879
1       2.14      7.58           0.8294          0.0589
2       2.27      4.88           0.8349          0.1804
3       2.20      6.07           0.8325          0.0715
4       2.30      6.39           0.8169          0.1355
wang_rousset_E75_orig_guide-genes


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_rousset_E75_orig_guide


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_rousset_E75_median-sub_guide-genes


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_rousset_E75_median-sub_guide


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_rousset_E75_rank_guide-genes


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_rousset_E75_rank_guide


INFO     [merf.py:307] Training GLL is 3877.56301795128 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.122134649094318 at iteration 1.
INFO     [merf.py:307] Training GLL is 3592.4942981304403 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 6.831437258602628 at iteration 2.
INFO     [merf.py:307] Training GLL is 3372.6004853985137 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 6.862252857558604 at iteration 3.
INFO     [merf.py:307] Training GLL is 3158.048925398552 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 6.827856554442818 at iteration 4.
INFO     [merf.py:307] Training GLL is 2947.4908222886556 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 6.854831164527326 at iteration 5.
INFO     [merf.py:307] Training GLL is 2743.127962102622 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 6.821543295343552 at iteration 6.
INFO     [merf.py:307] Training GLL is 2518.6663429918954 at iteration 7.
INFO  

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.798     6.442            0.743           0.135
variance      0.007     0.712            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.68      6.88           0.7549          0.1621
1       2.86      5.83           0.7432          0.1296
2       2.75      6.97           0.7320          0.0916
3       2.89      5.28           0.7527          0.1174
4       2.81      7.25           0.7301          0.1739
wang_rousset_E18_rousset_E75_orig_guide-genes


INFO     [merf.py:307] Training GLL is 5701.098188135601 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.285874809326848 at iteration 1.
INFO     [merf.py:307] Training GLL is 5254.587645995076 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 7.182457609216132 at iteration 2.
INFO     [merf.py:307] Training GLL is 4967.933916754606 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 7.153430877630719 at iteration 3.
INFO     [merf.py:307] Training GLL is 4699.046537669024 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 7.126878544626028 at iteration 4.
INFO     [merf.py:307] Training GLL is 4439.900801093334 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 7.113920287283835 at iteration 5.
INFO     [merf.py:307] Training GLL is 4180.416448242112 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 7.119139299073636 at iteration 6.
INFO     [merf.py:307] Training GLL is 3928.1182006534764 at iteration 7.
INFO    

---
Training with 5-fold grouped CV
          mse_train  mse_test  spearmanR_train  spearmanR_test
mean          2.954     6.406            0.739           0.132
variance      0.003     0.676            0.000           0.001
   mse_train  mse_test  spearmanR_train  spearmanR_test
0       2.86      6.88           0.7478          0.1459
1       2.97      5.81           0.7419          0.1326
2       2.96      6.89           0.7311          0.0863
3       3.00      5.27           0.7450          0.1180
4       2.98      7.18           0.7313          0.1749
wang_rousset_E18_rousset_E75_orig_guide


INFO     [merf.py:307] Training GLL is 5701.098188135601 at iteration 1.
INFO     [merf.py:328] Validation MSE Loss is 7.285874809326848 at iteration 1.
INFO     [merf.py:307] Training GLL is 5254.587645995076 at iteration 2.
INFO     [merf.py:328] Validation MSE Loss is 7.182457609216132 at iteration 2.
INFO     [merf.py:307] Training GLL is 4967.933916754606 at iteration 3.
INFO     [merf.py:328] Validation MSE Loss is 7.153430877630719 at iteration 3.
INFO     [merf.py:307] Training GLL is 4699.046537669024 at iteration 4.
INFO     [merf.py:328] Validation MSE Loss is 7.126878544626028 at iteration 4.
INFO     [merf.py:307] Training GLL is 4439.900801093334 at iteration 5.
INFO     [merf.py:328] Validation MSE Loss is 7.113920287283835 at iteration 5.
INFO     [merf.py:307] Training GLL is 4180.416448242112 at iteration 6.
INFO     [merf.py:328] Validation MSE Loss is 7.119139299073636 at iteration 6.
INFO     [merf.py:307] Training GLL is 3928.1182006534764 at iteration 7.
INFO    