# Experiment 1: Embedded variable selection algorithms testing

### (1) Importing and defining all required functions

In [1]:
"""
Importing required libraries and defining key functions
"""
import numpy as np
from functools import partial
import time
from IPython.display import clear_output
import inspect
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

# Algorithm functions
import os
os.chdir('C:/Users/hughw/Documents/MSC project/GP algorithms/Master function files')
from GP_funcs_ZTMFSS import kernel_funcs
from GP_funcs_ZTMFSS import draw_GP
from GP_funcs_ZTMFSS import simulations
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
os.chdir('C:/Users/hughw/Documents/MSC project/Simulation results')

### (2) Setting simulation parameters and models

In [2]:
"""
Importing in rpy2
"""

os.environ['R_USER'] = 'D:\Anaconda3\Lib\site-packages\rpy2'
import rpy2
print(rpy2.__version__)
import rpy2.robjects as robjects

from rpy2.robjects.packages import importr
# import R's "base" package
base = importr('base')
base.R_home()
# import R's "utils" package
utils = importr('utils')

# import rpy2's package module
import rpy2.robjects.packages as rpackages

# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list

3.4.5


<rpy2.rinterface_lib.sexp.NULLType object at 0x00000251D2DE47C0> [RTYPES.NILSXP]

In [3]:
"""
Simulation controls
"""
# Simulation settings
n=300
ntest=100
p=100
q=5
corr=0.5
r2=0.9
sigma_X=1
strue=1
lsmean=0.25 # toggle between 0.25 and 1
ltrue =np.repeat(lsmean/q**0.5,p)
kern = kernel_funcs.gaussian
nruns = 100
m = 9


# Storage objects
Runtime=np.zeros((nruns, m))
MSE_F=np.zeros((nruns, m))
MSE_Y=np.zeros((nruns,m))
TPR=np.zeros((nruns,m))
TNR=np.zeros((nruns,m))
PPV=np.zeros((nruns,m))
NPV=np.zeros((nruns,m))
MCC=np.zeros((nruns,m))
RF_oob = np.zeros((nruns,3))

### (3) Running algorithms

In [5]:
np.random.seed(8750)
runlist = np.random.choice(1000,100,False) # Choose 100 random trials
othermodels=True
SBGAM=False

for run in range(len(runlist)):
    
    """
    Generating data and scaling data
    """
    lselect=[]
    np.random.seed(runlist[run]) # Fixing trial seed
    t=time.time()
    Y,F,X,e,lselect,strue,sigma,select=draw_GP.draw_GP_ARD_lm(n,ntest,p,q,sigma_X,corr,strue,ltrue,plot_YX=True,kern=kern,cop=False,r2=r2)
    
    Y = Y.reshape(n+ntest,1)
    F = F.reshape(n+ntest,1)
    
    Y = (Y-Y.mean())/Y.var()**0.5
    F = (F-F.mean())/F.var()**0.5
    X = (X-X.mean(0))/X.var(0)**0.5

    # Getting training and test set
    ytest=Y[n:]
    Xtest=X[n:]
    ftest=F[n:]
    y=Y[:n]
    X=X[:n]
    f=F[:n]
    print("data generated")
    print("Noise variance is: ",sigma**2)
    print("Average data variance is: ", np.mean(np.var(X,0)))
    print(time.time()-t)
    
    if othermodels:
    
        """
        Running Random forest with VS+CV
        """
        
        # Setting up storage objects
        folds=5
        thresholds = ["1*mean", "2*mean", "4*mean", "8*mean", "16*mean"]
        errcv = np.zeros(len(thresholds))
        Selected_features = []
        
        t = time.time()
        
        # Shuffling the data and splitting into folds
        shuffled_indexes = np.random.choice(n,n,False)
        y_shuffle = y[shuffled_indexes]
        X_shuffle = X[shuffled_indexes]
        n_per_fold = int(n/folds)
        
        # Doing CV over thresholds
        for i in range(len(thresholds)):
            
            # Getting selected features          
            embedded_rf_selector = SelectFromModel(RandomForestRegressor(n_estimators=100), threshold = thresholds[i])
            embedded_rf_selector.fit(X, y.reshape(n,))
            embedded_rf_support = embedded_rf_selector.get_support()
            print(str(np.sum(embedded_rf_support)), 'selected features')
            selected_features = np.where(embedded_rf_support)[0]
            if len(selected_features)==0:
                selected_features = np.random.choice(p,1,False)
            Selected_features.append(selected_features)

            for f in range(folds):

                # Getting CVtraining and CVtest set
                y_cvtest = y_shuffle[(f*n_per_fold):min((f+1)*n_per_fold, n)]
                X_cvtest = X_shuffle[(f*n_per_fold):min((f+1)*n_per_fold, n)]

                if f>0:
                    y_cvtrain = y_shuffle[:(f*n_per_fold)]
                    X_cvtrain = X_shuffle[:(f*n_per_fold)]
                if f<(folds-1):
                    if f>0:
                        y_cvtrain = np.append(y_cvtrain,y_shuffle[(f+1)*n_per_fold:],0)
                        X_cvtrain = np.append(X_cvtrain,X_shuffle[(f+1)*n_per_fold:],0)
                    else:
                        y_cvtrain = y_shuffle[(f+1)*n_per_fold:]
                        X_cvtrain = X_shuffle[(f+1)*n_per_fold:]

                # Running training and predicting with RF
                model = RandomForestRegressor(n_estimators=100).fit(X_cvtrain[:,selected_features],y_cvtrain.reshape(len(y_cvtrain),))
                preds = model.predict(X_cvtest[:,selected_features])
                errcv[i] += np.mean((y_cvtest - preds)**2)
        
        # Getting best threshold and running final model
        best_threshold =np.where(errcv==np.min(errcv))[0][0]
        model = RandomForestRegressor(n_estimators=100).fit(X[:,Selected_features[best_threshold]],y.reshape(n,))
        preds = model.predict(Xtest[:,Selected_features[best_threshold]])
        Runtime[run,0] = time.time()-t
        
        # MSE
        MSE_Y[run,0] = simulations.MSE_pc(preds.reshape(ntest,1),ytest)
        MSE_F[run,0] = simulations.MSE_pc(preds.reshape(ntest,1),ftest)

        # VS accuracy
        forest_select = np.zeros(p)
        forest_select[Selected_features[best_threshold]]=1
        PPV[run,0]=np.mean(select[forest_select>0])
        NPV[run,0]=np.mean((1-select[forest_select==0]))
        TPR[run,0]=np.mean(forest_select[select>0])
        TNR[run,0]=np.mean((1-forest_select[select==0]))

        TP = np.sum(forest_select[select>0])
        TN = np.sum(1-forest_select[select==0])
        FP = np.sum(1-select[forest_select>0])
        FN = np.sum(select[forest_select==0])

        MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))

        if (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)==0:
            MCC[run,0]=0


        """
        Running Gboost with VS-CV
        """

        # Setting up storage objects
        folds=5
        thresholds = ["0.00625*mean", "0.125*mean", "0.25*mean", "0.5*mean", "1*mean", "2*mean", "4*mean", "8*mean", "16*mean"]
        errcv = np.zeros(len(thresholds))
        Selected_features = []
        
        t = time.time()
        
        # Shuffling the data and splitting into folds
        shuffled_indexes = np.random.choice(n,n,False)
        y_shuffle = y[shuffled_indexes]
        X_shuffle = X[shuffled_indexes]
        n_per_fold = int(n/folds)
        
        # Doing CV over thresholds
        for i in range(len(thresholds)):
            
            # Getting selected features          
            embedded_rf_selector = SelectFromModel(GradientBoostingRegressor(n_estimators=100,learning_rate=0.01), threshold = thresholds[i])
            embedded_rf_selector.fit(X, y.reshape(n,))
            embedded_rf_support = embedded_rf_selector.get_support()
            print(str(np.sum(embedded_rf_support)), 'selected features')
            selected_features = np.where(embedded_rf_support)[0]
            if len(selected_features)==0:
                selected_features = np.random.choice(p,1,False)
            Selected_features.append(selected_features)

            for f in range(folds):

                # Getting CVtraining and CVtest set
                y_cvtest = y_shuffle[(f*n_per_fold):min((f+1)*n_per_fold, n)]
                X_cvtest = X_shuffle[(f*n_per_fold):min((f+1)*n_per_fold, n)]

                if f>0:
                    y_cvtrain = y_shuffle[:(f*n_per_fold)]
                    X_cvtrain = X_shuffle[:(f*n_per_fold)]
                if f<(folds-1):
                    if f>0:
                        y_cvtrain = np.append(y_cvtrain,y_shuffle[(f+1)*n_per_fold:],0)
                        X_cvtrain = np.append(X_cvtrain,X_shuffle[(f+1)*n_per_fold:],0)
                    else:
                        y_cvtrain = y_shuffle[(f+1)*n_per_fold:]
                        X_cvtrain = X_shuffle[(f+1)*n_per_fold:]

                # Running training and predicting with RF
                model = GradientBoostingRegressor(n_estimators=100,learning_rate=0.01).fit(X_cvtrain[:,selected_features],y_cvtrain.reshape(len(y_cvtrain),))
                preds = model.predict(X_cvtest[:,selected_features])
                errcv[i] += np.mean((y_cvtest - preds)**2)
        
        # Getting best threshold and running final model
        best_threshold =np.where(errcv==np.min(errcv))[0][0]
        model = GradientBoostingRegressor(n_estimators=100,learning_rate=0.01).fit(X[:,Selected_features[best_threshold]],y.reshape(n,))
        preds = model.predict(Xtest[:,Selected_features[best_threshold]])
        Runtime[run,1] = time.time()-t
        
        # MSE
        MSE_Y[run,1] = simulations.MSE_pc(preds.reshape(ntest,1),ytest)
        MSE_F[run,1] = simulations.MSE_pc(preds.reshape(ntest,1),ftest)

        # VS accuracy
        forest_select = np.zeros(p)
        forest_select[Selected_features[best_threshold]]=1
        PPV[run,1]=np.mean(select[forest_select>0])
        NPV[run,1]=np.mean((1-select[forest_select==0]))
        TPR[run,1]=np.mean(forest_select[select>0])
        TNR[run,1]=np.mean((1-forest_select[select==0]))

        TP = np.sum(forest_select[select>0])
        TN = np.sum(1-forest_select[select==0])
        FP = np.sum(1-select[forest_select>0])
        FN = np.sum(select[forest_select==0])

        MCC[run,1]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))

        if (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)==0:
            MCC[run,1]=0


    """
    Printing out current results
    """
    print("RUN {0}".format(run))
    print("Runtime mean is:", Runtime[:run+1].mean(0))
    print("TPR mean is:", TPR[:run+1].mean(0))
    print("PPV mean is:", PPV[:run+1].mean(0))
    print("MCC mean is:", MCC[:run+1].mean(0))
    print("MSE_F mean is:", MSE_F[:run+1].mean(0))
    print("MSE_Y mean is:", MSE_Y[:run+1].mean(0), "\n")

R2= 0.8993636862772934
data generated
Noise variance is:  0.006067570867112703
Average data variance is:  0.9972629072424785
0.03487586975097656
100 selected features
97 selected features
42 selected features
11 selected features
4 selected features
3 selected features
2 selected features
2 selected features
2 selected features
21 selected features
11 selected features
9 selected features
6 selected features
5 selected features
4 selected features
3 selected features
2 selected features
2 selected features
RUN 0
Runtime mean is: [16.53687048  5.64980245  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [1.  0.6 0.  0.  0.  0.  0.  0.  0. ]
PPV mean is: [0.05154639 1.         0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.04034577 0.76656954 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.27604646 0.4031223  0.         0.         0.         0.
 0.         0.         

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


27 selected features
13 selected features
7 selected features
3 selected features
2 selected features
2 selected features
2 selected features
2 selected features
2 selected features
RUN 1
Runtime mean is: [16.09138775  5.73203564  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [1.  0.6 0.  0.  0.  0.  0.  0.  0. ]
PPV mean is: [0.0507732  0.71428571 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02017288 0.62155983 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.21078181 0.35349458 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.31391444 0.41780773 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9080835121498346
data generated
Noise variance is:  0.0054187767092858845
Average data variance is:  1.012614884699872
0.03271150588989258
100 selected features
99 selected features
57 selected feature

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


23 selected features
17 selected features
11 selected features
7 selected features
3 selected features
3 selected features
2 selected features
2 selected features
2 selected features
RUN 4
Runtime mean is: [16.33247108  5.76737924  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.96 0.6  0.   0.   0.   0.   0.   0.   0.  ]
PPV mean is: [0.05331351 0.64047619 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02969582 0.55879555 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.21034144 0.37708304 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.31114326 0.43524984 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9014392609465416
data generated
Noise variance is:  0.008110831557296446
Average data variance is:  1.0078797080723603
0.033643484115600586
100 selected features
96 selected features
35 selec

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


16 selected features
11 selected features
7 selected features
6 selected features
3 selected features
2 selected features
2 selected features
2 selected features
2 selected features
RUN 6
Runtime mean is: [16.32078617  5.70990477  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.94285714 0.62857143 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05117632 0.54081633 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [-0.00554829  0.51475955  0.          0.          0.          0.
  0.          0.          0.        ]
MSE_F mean is: [0.20511007 0.37229594 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29727211 0.42936262 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9079214417775396
data generated
Noise variance is:  0.0021687632745124607
Average data variance is:  0.9541907076797911
0.03189063072204

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


27 selected features
14 selected features
12 selected features
8 selected features
6 selected features
4 selected features
3 selected features
3 selected features
2 selected features
RUN 8
Runtime mean is: [16.57469463  5.82187276  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.95555556 0.68888889 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05108674 0.45554797 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [1.67523682e-04 4.69289342e-01 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00]
MSE_F mean is: [0.21596996 0.38137303 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29560806 0.43098745 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8979958758988987
data generated
Noise variance is:  0.007785898722104241
Average data variance is: 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


27 selected features
17 selected features
13 selected features
9 selected features
6 selected features
5 selected features
4 selected features
3 selected features
2 selected features
RUN 11
Runtime mean is: [16.57066572  5.8264402   0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.96666667 0.68333333 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05238561 0.45485171 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.01635515 0.46450053 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.20456108 0.37114201 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.28259895 0.42164702 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9068948324007206
data generated
Noise variance is:  0.0054431382742102475
Average data variance is:  0.9863259559898612
0.032631635665893555
10

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


22 selected features
11 selected features
9 selected features
7 selected features
4 selected features
3 selected features
3 selected features
3 selected features
3 selected features
RUN 12
Runtime mean is: [16.65362807  5.83159643  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.96923077 0.70769231 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.0522021  0.43734563 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.01509706 0.46199861 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.21599585 0.37966288 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29426904 0.43132783 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.902512231221621
data generated
Noise variance is:  0.0019365726581941085
Average data variance is:  0.9842049167013058
0.033682823181152344
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


23 selected features
17 selected features
12 selected features
9 selected features
5 selected features
4 selected features
3 selected features
3 selected features
3 selected features
RUN 15
Runtime mean is: [16.7816792   5.93517159  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.975  0.7625 0.     0.     0.     0.     0.     0.     0.    ]
PPV mean is: [0.05240445 0.38714662 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02033809 0.44089418 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22171879 0.38641858 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29869387 0.43611449 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9014179569481476
data generated
Noise variance is:  0.005582173804730154
Average data variance is:  1.0286954832376727
0.033116817474365234
100 selected features
98 selecte

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


34 selected features
25 selected features
13 selected features
10 selected features
6 selected features
3 selected features
2 selected features
2 selected features
1 selected features
RUN 19
Runtime mean is: [16.81605982  5.93262196  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98 0.77 0.   0.   0.   0.   0.   0.   0.  ]
PPV mean is: [0.05218348 0.35106974 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02255802 0.42049055 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22195493 0.38570883 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29753717 0.43402069 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8959112372422909
data generated
Noise variance is:  0.010693509801023179
Average data variance is:  0.997937410517146
0.03096461296081543
100 selected features
100 selected features
92 sele

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


20 selected features
10 selected features
8 selected features
5 selected features
3 selected features
1 selected features
1 selected features
1 selected features
1 selected features
RUN 27
Runtime mean is: [17.11294133  6.02106136  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98571429 0.82857143 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05401363 0.36048737 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03566663 0.45683422 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.23385202 0.39842133 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30496469 0.44287687 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8999651054654823
data generated
Noise variance is:  0.004061397595216192
Average data variance is:  0.9930693123364678
0.03251481056213379
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


29 selected features
12 selected features
10 selected features
7 selected features
4 selected features
4 selected features
4 selected features
4 selected features
3 selected features
RUN 28
Runtime mean is: [17.1609256   6.03761238  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.9862069  0.82758621 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05387523 0.35955102 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03443675 0.45763528 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.24037896 0.40510964 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.31166557 0.44969176 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8924689073291786
data generated
Noise variance is:  0.007201556428210239
Average data variance is:  1.004123790558249
0.037897586822509766
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


18 selected features
9 selected features
7 selected features
4 selected features
3 selected features
2 selected features
2 selected features
2 selected features
2 selected features
RUN 30
Runtime mean is: [17.15655991  6.02600189  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98709677 0.82580645 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05364151 0.37936493 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.0329588  0.47355682 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.24090977 0.40371469 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.31186596 0.44780436 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9019097049076382
data generated
Noise variance is:  0.012850229561027092
Average data variance is:  1.0322939432198759
0.03289937973022461
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


32 selected features
18 selected features
12 selected features
6 selected features
4 selected features
3 selected features
2 selected features
2 selected features
2 selected features
RUN 31
Runtime mean is: [17.12317016  6.033932    0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.9875  0.83125 0.      0.      0.      0.      0.      0.      0.     ]
PPV mean is: [0.05352771 0.37239259 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03192884 0.46920904 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.23801157 0.4009835  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.31015448 0.44559132 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9100690015201579
data generated
Noise variance is:  0.023191447209473623
Average data variance is:  0.9620751845797189
0.031076908111572266
100 selected features
7

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


28 selected features
16 selected features
12 selected features
10 selected features
7 selected features
4 selected features
3 selected features
1 selected features
1 selected features
RUN 32
Runtime mean is: [17.07825838  6.04004062  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98787879 0.83030303 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05342081 0.36543697 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.0309613  0.46304191 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.23434209 0.39776044 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30660047 0.44235632 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8968884861128648
data generated
Noise variance is:  0.006805348355749112
Average data variance is:  0.9828351413426459
0.03173947334289551
100

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


54 selected features
24 selected features
16 selected features
8 selected features
7 selected features
6 selected features
4 selected features
2 selected features
2 selected features
RUN 33
Runtime mean is: [17.09624885  6.07089203  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98823529 0.83529412 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.0533202  0.36081628 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03005067 0.46143033 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2311597  0.39521437 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30458875 0.44066501 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.903127707316271
data generated
Noise variance is:  0.004376420105171569
Average data variance is:  0.9888273589178876
0.03246712684631348
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


20 selected features
12 selected features
9 selected features
6 selected features
4 selected features
3 selected features
3 selected features
3 selected features
2 selected features
RUN 36
Runtime mean is: [17.11635163  6.07281332  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98918919 0.82162162 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05325981 0.38700096 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03088439 0.4712177  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2352662  0.40533066 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.31062679 0.45113361 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8999721091994395
data generated
Noise variance is:  0.0058596167314240045
Average data variance is:  0.9875898503933194
0.029551029205322266
100

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


17 selected features
8 selected features
7 selected features
6 selected features
4 selected features
3 selected features
3 selected features
3 selected features
3 selected features
RUN 37
Runtime mean is: [17.13420805  6.06410526  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98947368 0.82105263 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05317403 0.39185432 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03007164 0.4760904  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.23676391 0.40755211 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.31174405 0.45350994 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8934778923249928
data generated
Noise variance is:  0.0071332686367954175
Average data variance is:  1.0276968274607974
0.03328657150268555
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


22 selected features
13 selected features
6 selected features
3 selected features
2 selected features
1 selected features
1 selected features
1 selected features
1 selected features
RUN 38
Runtime mean is: [17.07557928  6.05068256  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98974359 0.82564103 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05309264 0.3916687  0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02930057 0.47910057 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.23251952 0.40303461 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30759651 0.44924566 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8841087494497937
data generated
Noise variance is:  0.004288710109263468
Average data variance is:  0.9945737933609891
0.030272722244262695
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


36 selected features
16 selected features
9 selected features
6 selected features
3 selected features
3 selected features
2 selected features
2 selected features
2 selected features
RUN 39
Runtime mean is: [17.05359788  6.05207906  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99  0.825 0.    0.    0.    0.    0.    0.    0.   ]
PPV mean is: [0.05301532 0.38465476 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02856806 0.4723805  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2293537  0.39941824 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30632613 0.44696447 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8915281978791828
data generated
Noise variance is:  0.006724157214255451
Average data variance is:  0.9896134339176236
0.03401517868041992
100 selected features
93 selected features


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


40 selected features
21 selected features
15 selected features
9 selected features
6 selected features
4 selected features
3 selected features
1 selected features
1 selected features
RUN 40
Runtime mean is: [17.06842078  6.06228177  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.9902439  0.82926829 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05294178 0.38108014 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02787128 0.47171186 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22871107 0.39890835 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30578323 0.44669894 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8824170242301309
data generated
Noise variance is:  0.0027590732233232396
Average data variance is:  1.01439490814788
0.03277850151062012
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


19 selected features
7 selected features
5 selected features
4 selected features
2 selected features
2 selected features
2 selected features
1 selected features
1 selected features
RUN 42
Runtime mean is: [17.04959335  6.05032784  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98604651 0.82325581 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.0527906  0.38262459 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02643894 0.47330325 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.23005398 0.39998154 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30644629 0.44759259 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9000828965363512
data generated
Noise variance is:  0.008241382299646103
Average data variance is:  1.000059731567623
0.030719757080078125
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


22 selected features
12 selected features
8 selected features
5 selected features
4 selected features
3 selected features
2 selected features
2 selected features
2 selected features
RUN 43
Runtime mean is: [17.0308794   6.04199473  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98636364 0.82272727 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05272717 0.39211039 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02583805 0.48048894 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22831999 0.39813415 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30398952 0.4451326  0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.896669883497315
data generated
Noise variance is:  0.009247065565051525
Average data variance is:  1.0028504453122076
0.03291177749633789
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


23 selected features
16 selected features
15 selected features
9 selected features
4 selected features
3 selected features
3 selected features
3 selected features
2 selected features
RUN 44
Runtime mean is: [17.04888173  6.04269483  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98666667 0.82222222 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05266657 0.38726156 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02526387 0.47671661 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22802603 0.39908905 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30325444 0.44568647 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8918649344747405
data generated
Noise variance is:  0.013249980882835995
Average data variance is:  0.9835232421909359
0.0359041690826416
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


31 selected features
13 selected features
9 selected features
6 selected features
4 selected features
2 selected features
2 selected features
2 selected features
1 selected features
RUN 46
Runtime mean is: [17.00663607  6.03929026  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98723404 0.81702128 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05290771 0.37748738 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02700697 0.46763194 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2229678  0.39440534 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29893122 0.44121785 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9004310513358629
data generated
Noise variance is:  0.0025055877021633362
Average data variance is:  1.0123550226773308
0.03248715400695801
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


33 selected features
10 selected features
8 selected features
6 selected features
5 selected features
3 selected features
3 selected features
2 selected features
2 selected features
RUN 48
Runtime mean is: [17.04956614  6.05101198  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.9877551 0.8244898 0.        0.        0.        0.        0.
 0.        0.       ]
PPV mean is: [0.05299803 0.37537596 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02802355 0.46926204 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22484101 0.39748992 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.2999402  0.44368477 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9140911683895284
data generated
Noise variance is:  0.002128524518729549
Average data variance is:  1.002786104780445
0.032048940658569336
100 selected 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


22 selected features
15 selected features
12 selected features
9 selected features
4 selected features
4 selected features
3 selected features
2 selected features
1 selected features
RUN 49
Runtime mean is: [17.04400605  6.04811487  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.988 0.824 0.    0.    0.    0.    0.    0.    0.   ]
PPV mean is: [0.05293807 0.38786844 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02746308 0.47767193 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22582141 0.39685489 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30028903 0.44298525 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8929928626911633
data generated
Noise variance is:  0.016330365854337902
Average data variance is:  1.019269024193722
0.03265500068664551
100 selected features
77 selected features


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


30 selected features
20 selected features
12 selected features
8 selected features
7 selected features
4 selected features
3 selected features
2 selected features
2 selected features
RUN 50
Runtime mean is: [17.01838397  6.05436794  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98823529 0.82352941 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05288046 0.38287755 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02692459 0.47321391 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22337399 0.39408241 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29872968 0.44088793 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8871011950555595
data generated
Noise variance is:  0.0021915362348025698
Average data variance is:  0.9800035530590767
0.03296160697937012
100

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


24 selected features
13 selected features
7 selected features
7 selected features
5 selected features
4 selected features
4 selected features
2 selected features
2 selected features
RUN 53
Runtime mean is: [17.05419924  6.05788711  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98888889 0.81481481 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05280775 0.39045273 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02727065 0.47199778 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22292159 0.39541315 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29921235 0.44238472 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.905601883173271
data generated
Noise variance is:  0.006176650250846322
Average data variance is:  1.0118385990361953
0.03490567207336426
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


24 selected features
13 selected features
9 selected features
5 selected features
4 selected features
4 selected features
4 selected features
3 selected features
1 selected features
RUN 54
Runtime mean is: [17.06721992  6.05440978  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98909091 0.81454545 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.0527567 0.388948  0.        0.        0.        0.        0.
 0.        0.       ]
MCC mean is: [0.02677482 0.47172606 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22428375 0.39687458 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30062786 0.44404007 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8859695993766232
data generated
Noise variance is:  0.0040238066012819795
Average data variance is:  1.0222399385813707
0.034973859786987305
100 selecte

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


24 selected features
18 selected features
12 selected features
9 selected features
4 selected features
3 selected features
3 selected features
3 selected features
2 selected features
RUN 55
Runtime mean is: [17.06945918  6.05667078  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98928571 0.81785714 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05270747 0.38572273 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.0262967  0.47059253 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22551112 0.3971446  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30195285 0.44410799 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9064069144989667
data generated
Noise variance is:  0.012075470137771603
Average data variance is:  1.0258968630060326
0.0359039306640625
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


24 selected features
12 selected features
10 selected features
7 selected features
3 selected features
2 selected features
2 selected features
2 selected features
1 selected features
RUN 58
Runtime mean is: [17.03757208  6.04639863  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98983051 0.81016949 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05282627 0.40969325 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02779584 0.48216352 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22543647 0.40095385 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30197763 0.44774206 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9021444363956652
data generated
Noise variance is:  0.0020912558229771844
Average data variance is:  1.018478553570807
0.04020380973815918
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


24 selected features
12 selected features
9 selected features
5 selected features
4 selected features
3 selected features
3 selected features
3 selected features
2 selected features
RUN 60
Runtime mean is: [17.00483305  6.04203705  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99016393 0.81311475 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05284538 0.40757997 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02827331 0.48354615 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22225472 0.39836249 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29925216 0.44533957 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9121653634037267
data generated
Noise variance is:  0.002239957725942645
Average data variance is:  1.0013321518636078
0.03366446495056152
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


25 selected features
16 selected features
11 selected features
8 selected features
4 selected features
3 selected features
3 selected features
3 selected features
1 selected features
RUN 63
Runtime mean is: [17.00957164  6.03806723  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.990625 0.809375 0.       0.       0.       0.       0.       0.
 0.      ]
PPV mean is: [0.05282643 0.40966606 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02863197 0.47915919 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22372866 0.40023596 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30130435 0.44705136 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8940127699620057
data generated
Noise variance is:  0.0031055544086292276
Average data variance is:  1.0171849953376697
0.033113956451416016
100 selected featu

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


16 selected features
7 selected features
6 selected features
5 selected features
5 selected features
4 selected features
4 selected features
2 selected features
2 selected features
RUN 65
Runtime mean is: [16.99907278  6.02917181  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99090909 0.81212121 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05275624 0.40462183 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02826091 0.47731182 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2244806  0.39991524 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30107565 0.44647011 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8954730171146331
data generated
Noise variance is:  0.004362475467654147
Average data variance is:  1.0029009412732122
0.03390955924987793
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


26 selected features
15 selected features
10 selected features
8 selected features
4 selected features
3 selected features
3 selected features
3 selected features
2 selected features
RUN 66
Runtime mean is: [16.9949967   6.02963971  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99104478 0.80597015 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.0527151  0.41350807 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02783911 0.47948179 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22402132 0.39942803 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30160585 0.44667454 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8991434935066773
data generated
Noise variance is:  0.002868752121056666
Average data variance is:  0.967036651590988
0.0359044075012207
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


19 selected features
10 selected features
8 selected features
4 selected features
2 selected features
2 selected features
2 selected features
2 selected features
2 selected features
RUN 68
Runtime mean is: [17.03669991  6.03565034  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99130435 0.80869565 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05274468 0.41693042 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02831742 0.4838904  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22621757 0.40125309 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30313783 0.44811034 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8871595688314826
data generated
Noise variance is:  0.010404476094237941
Average data variance is:  1.0296061675373505
0.03336381912231445
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


23 selected features
15 selected features
11 selected features
9 selected features
6 selected features
5 selected features
4 selected features
3 selected features
1 selected features
RUN 70
Runtime mean is: [17.06437518  6.04524082  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.9915493  0.81126761 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05267448 0.42430059 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.0278445  0.49079995 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22779319 0.40218733 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30508369 0.44928776 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9062227314374209
data generated
Noise variance is:  0.005705471464142282
Average data variance is:  0.9965585203587716
0.03173542022705078
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


21 selected features
13 selected features
10 selected features
5 selected features
4 selected features
3 selected features
3 selected features
3 selected features
2 selected features
RUN 71
Runtime mean is: [17.06076775  6.04361115  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99166667 0.80833333 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05263733 0.42882419 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02745777 0.49308898 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22737525 0.40196438 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30437944 0.44898806 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9168692809901399
data generated
Noise variance is:  0.004710548217863849
Average data variance is:  0.9611605553474449
0.034493446350097656
100

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


20 selected features
9 selected features
8 selected features
6 selected features
5 selected features
4 selected features
4 selected features
4 selected features
2 selected features
RUN 74
Runtime mean is: [17.06118525  6.03733083  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98933333 0.808      0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05271916 0.42357598 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02810716 0.48998106 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2260151  0.40068073 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30242696 0.44710556 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9045521133976235
data generated
Noise variance is:  0.001197886333089588
Average data variance is:  1.0173837345840624
0.03241777420043945
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


20 selected features
9 selected features
8 selected features
4 selected features
4 selected features
3 selected features
3 selected features
2 selected features
2 selected features
RUN 75
Runtime mean is: [17.04992384  6.03590743  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98947368 0.81052632 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05268339 0.42129209 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02773733 0.48957119 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22586535 0.40063689 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30191086 0.44672928 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8922634111567298
data generated
Noise variance is:  0.014322001076579379
Average data variance is:  0.9952097036187412
0.04288792610168457
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


21 selected features
10 selected features
6 selected features
6 selected features
3 selected features
2 selected features
2 selected features
2 selected features
2 selected features
RUN 76
Runtime mean is: [17.03299823  6.03346049  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98961039 0.80779221 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05264854 0.41767605 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.0273771  0.48606594 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22461245 0.39908192 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30047526 0.44489709 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.908931794030566
data generated
Noise variance is:  0.0017452561047888154
Average data variance is:  0.9823210663841239
0.032557010650634766
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


22 selected features
12 selected features
9 selected features
7 selected features
5 selected features
4 selected features
3 selected features
3 selected features
3 selected features
RUN 80
Runtime mean is: [17.03006356  6.0414246   0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99012346 0.80740741 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.0532297  0.40880786 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03053605 0.48020695 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22296623 0.39568596 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29874566 0.4418321  0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8980380814167195
data generated
Noise variance is:  0.005203041292692965
Average data variance is:  1.003805163833143
0.033425331115722656
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


23 selected features
15 selected features
12 selected features
5 selected features
2 selected features
2 selected features
2 selected features
2 selected features
1 selected features
RUN 81
Runtime mean is: [17.03397419  6.03990502  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.9902439  0.80487805 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05319032 0.40626143 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03016366 0.47787664 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22146003 0.39418634 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29748416 0.44043479 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8955345946902755
data generated
Noise variance is:  0.0008548391763005245
Average data variance is:  0.95512769273552
0.0307769775390625
100 se

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


23 selected features
13 selected features
10 selected features
7 selected features
4 selected features
4 selected features
3 selected features
3 selected features
3 selected features
RUN 82
Runtime mean is: [17.05920673  6.04222453  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99036145 0.80481928 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05315188 0.40618599 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02980024 0.47856853 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22152345 0.39400844 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29773717 0.44042407 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8965803448785251
data generated
Noise variance is:  0.0036909269294253906
Average data variance is:  1.0229586906947452
0.03314089775085449
100

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


16 selected features
8 selected features
5 selected features
5 selected features
3 selected features
2 selected features
2 selected features
2 selected features
2 selected features
RUN 84
Runtime mean is: [17.0765335   6.04745274  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99058824 0.80235294 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05366595 0.4098281  0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03179807 0.47787168 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2218292 0.3936409 0.        0.        0.        0.        0.
 0.        0.       ]
MSE_Y mean is: [0.29860506 0.44038764 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8982735484211057
data generated
Noise variance is:  0.001662938839478932
Average data variance is:  1.0102664827468886
0.03457450866699219
100 selected f

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


34 selected features
22 selected features
16 selected features
9 selected features
4 selected features
3 selected features
3 selected features
3 selected features
2 selected features
RUN 86
Runtime mean is: [17.08259234  6.05731032  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.9908046 0.8       0.        0.        0.        0.        0.
 0.        0.       ]
PPV mean is: [0.05358748 0.40308529 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.0313321  0.47149078 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22241738 0.3942831  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.2999077  0.44141891 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9095580667095777
data generated
Noise variance is:  0.00373053151127046
Average data variance is:  0.99162552240675
0.03389167785644531
100 selected fe

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


18 selected features
6 selected features
4 selected features
2 selected features
2 selected features
2 selected features
2 selected features
2 selected features
1 selected features
RUN 87
Runtime mean is: [17.06739687  6.05289908  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99090909 0.79318182 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05354672 0.40986841 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03097605 0.47111117 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22159656 0.3935553  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29924419 0.44101483 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8937279369110388
data generated
Noise variance is:  0.004384409531259157
Average data variance is:  1.0235548981292915
0.032862186431884766
100 s

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


32 selected features
18 selected features
10 selected features
6 selected features
3 selected features
3 selected features
3 selected features
3 selected features
3 selected features
RUN 88
Runtime mean is: [17.08530365  6.0562885   0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99101124 0.79101124 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05350687 0.40863393 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.03062801 0.47011396 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2223224  0.39438968 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.30013291 0.44196596 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9054940587641246
data generated
Noise variance is:  0.0050691659923975405
Average data variance is:  1.0192768442228635
0.032495737075805664
10

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


23 selected features
6 selected features
5 selected features
4 selected features
3 selected features
3 selected features
3 selected features
2 selected features
2 selected features
RUN 89
Runtime mean is: [17.08745403  6.0533872   0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99111111 0.78888889 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.0534679  0.41520466 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.0302877  0.47340792 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.2222541 0.394271  0.        0.        0.        0.        0.
 0.        0.       ]
MSE_Y mean is: [0.29929916 0.44141634 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8942435494775831
data generated
Noise variance is:  0.013512819621190858
Average data variance is:  1.0051205219128057
0.0359034538269043
100 selected fe

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


18 selected features
14 selected features
10 selected features
8 selected features
6 selected features
5 selected features
4 selected features
3 selected features
3 selected features
RUN 90
Runtime mean is: [17.09737164  6.05386523  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99120879 0.79120879 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05342979 0.41369448 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02995486 0.4735865  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22203973 0.39434579 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29854211 0.44101217 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8925141486043392
data generated
Noise variance is:  0.004294297225524785
Average data variance is:  1.0088226049962938
0.01990199089050293
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


28 selected features
19 selected features
15 selected features
12 selected features
7 selected features
4 selected features
4 selected features
4 selected features
2 selected features
RUN 91
Runtime mean is: [17.11244188  6.06124378  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.99130435 0.79347826 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05339251 0.41282099 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02962927 0.47437489 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22265442 0.39511006 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29864017 0.44136853 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9019360252922963
data generated
Noise variance is:  0.011078161757092688
Average data variance is:  0.978109682059105
0.03291177749633789
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


21 selected features
12 selected features
8 selected features
7 selected features
4 selected features
4 selected features
4 selected features
4 selected features
2 selected features
RUN 96
Runtime mean is: [17.12142628  6.0712757   0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98762887 0.79381443 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05330454 0.40670459 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02919062 0.47091839 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22007489 0.3924549  0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29585951 0.4386573  0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.8980860028775616
data generated
Noise variance is:  0.004503056471917673
Average data variance is:  0.9984008731786534
0.031722068786621094
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


52 selected features
15 selected features
12 selected features
10 selected features
6 selected features
5 selected features
4 selected features
3 selected features
3 selected features
RUN 97
Runtime mean is: [17.14262597  6.08402557  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.9877551  0.79591837 0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05327082 0.4035357  0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02889275 0.46836224 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22181713 0.39419951 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.2975877  0.44028974 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.9060210673725113
data generated
Noise variance is:  0.022898177536325882
Average data variance is:  0.9815890054080494
0.0362391471862793
100 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))


34 selected features
16 selected features
10 selected features
5 selected features
3 selected features
3 selected features
3 selected features
2 selected features
1 selected features
RUN 98
Runtime mean is: [17.13658993  6.08622668  0.          0.          0.          0.
  0.          0.          0.        ]
TPR mean is: [0.98787879 0.7959596  0.         0.         0.         0.
 0.         0.         0.        ]
PPV mean is: [0.05323779 0.40064794 0.         0.         0.         0.
 0.         0.         0.        ]
MCC mean is: [0.02860091 0.46588158 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_F mean is: [0.22095597 0.39333322 0.         0.         0.         0.
 0.         0.         0.        ]
MSE_Y mean is: [0.29670551 0.43937832 0.         0.         0.         0.
 0.         0.         0.        ] 

R2= 0.900112364354712
data generated
Noise variance is:  0.004758340715958492
Average data variance is:  1.010378106428452
0.03362727165222168
100 se

In [10]:
np.random.seed(8750)
runlist = np.random.choice(1000,100,False) # Choose 100 random trials
othermodels=False
SBGAM=True

for run in range(len(runlist)):
    
    """
    Generating data and scaling data
    """
    lselect=[]
    np.random.seed(runlist[run]) # Fixing trial seed
    t=time.time()
    Y,F,X,e,lselect,strue,sigma,select=draw_GP.draw_GP_ARD_lm(n,ntest,p,q,sigma_X,corr,strue,ltrue,plot_YX=True,kern=kern,cop=False,r2=r2)
    
    Y = Y.reshape(n+ntest,1)
    F = F.reshape(n+ntest,1)
    
    Y = (Y-Y.mean())/Y.var()**0.5
    F = (F-F.mean())/F.var()**0.5
    X = (X-X.mean(0))/X.var(0)**0.5

    # Getting training and test set
    ytest=Y[n:]
    Xtest=X[n:]
    ftest=F[n:]
    y=Y[:n]
    X=X[:n]
    f=F[:n]
    print("data generated")
    print("Noise variance is: ",sigma**2)
    print("Average data variance is: ", np.mean(np.var(X,0)))
    print(time.time()-t)
    
    """
    Exporting data to enable running of R scripts
    """
    os.chdir('C:/Users/hughw/Documents/MSC project/R scripts')
    np.save("y", y)
    np.save("f", f)
    np.save("X", X)
    np.save("ytest", ytest)
    np.save("ftest", ftest)
    np.save("Xtest", Xtest)
    np.save("select", select.astype(float))   
    
    if othermodels:
    
        """
        Running Random forest with VS+CV
        """
        
        # Setting up storage objects
        folds=5
        thresholds = ["1*mean", "2*mean", "4*mean", "8*mean", "16*mean"]
        errcv = np.zeros(len(thresholds))
        Selected_features = []
        
        t = time.time()
        
        # Shuffling the data and splitting into folds
        shuffled_indexes = np.random.choice(n,n,False)
        y_shuffle = y[shuffled_indexes]
        X_shuffle = X[shuffled_indexes]
        n_per_fold = int(n/folds)
        
        # Doing CV over thresholds
        for i in range(len(thresholds)):
            
            # Getting selected features          
            embedded_rf_selector = SelectFromModel(RandomForestRegressor(n_estimators=100), threshold = thresholds[i])
            embedded_rf_selector.fit(X, y.reshape(n,))
            embedded_rf_support = embedded_rf_selector.get_support()
            print(str(np.sum(embedded_rf_support)), 'selected features')
            selected_features = np.where(embedded_rf_support)[0]
            if len(selected_features)==0:
                selected_features = np.random.choice(p,1,False)
            Selected_features.append(selected_features)

            for f in range(folds):

                # Getting CVtraining and CVtest set
                y_cvtest = y_shuffle[(f*n_per_fold):min((f+1)*n_per_fold, n)]
                X_cvtest = X_shuffle[(f*n_per_fold):min((f+1)*n_per_fold, n)]

                if f>0:
                    y_cvtrain = y_shuffle[:(f*n_per_fold)]
                    X_cvtrain = X_shuffle[:(f*n_per_fold)]
                if f<(folds-1):
                    if f>0:
                        y_cvtrain = np.append(y_cvtrain,y_shuffle[(f+1)*n_per_fold:],0)
                        X_cvtrain = np.append(X_cvtrain,X_shuffle[(f+1)*n_per_fold:],0)
                    else:
                        y_cvtrain = y_shuffle[(f+1)*n_per_fold:]
                        X_cvtrain = X_shuffle[(f+1)*n_per_fold:]

                # Running training and predicting with RF
                model = RandomForestRegressor(n_estimators=100).fit(X_cvtrain[:,selected_features],y_cvtrain.reshape(len(y_cvtrain),))
                preds = model.predict(X_cvtest[:,selected_features])
                errcv[i] += np.mean((y_cvtest - preds)**2)
        
        # Getting best threshold and running final model
        best_threshold =np.where(errcv==np.min(errcv))[0][0]
        model = RandomForestRegressor(n_estimators=100).fit(X[:,Selected_features[best_threshold]],y.reshape(n,))
        preds = model.predict(Xtest[:,Selected_features[best_threshold]])
        Runtime[run,0] = time.time()-t
        
        # MSE
        MSE_Y[run,0] = simulations.MSE_pc(preds.reshape(ntest,1),ytest)
        MSE_F[run,0] = simulations.MSE_pc(preds.reshape(ntest,1),ftest)

        # VS accuracy
        forest_select = np.zeros(p)
        forest_select[Selected_features[best_threshold]]=1
        PPV[run,0]=np.mean(select[forest_select>0])
        NPV[run,0]=np.mean((1-select[forest_select==0]))
        TPR[run,0]=np.mean(forest_select[select>0])
        TNR[run,0]=np.mean((1-forest_select[select==0]))

        TP = np.sum(forest_select[select>0])
        TN = np.sum(1-forest_select[select==0])
        FP = np.sum(1-select[forest_select>0])
        FN = np.sum(select[forest_select==0])

        MCC[run,0]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))

        if (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)==0:
            MCC[run,0]=0


        """
        Running Gboost with VS-CV
        """

        # Setting up storage objects
        folds=5
        thresholds = ["1*mean", "2*mean", "4*mean", "8*mean", "16*mean"]
        errcv = np.zeros(len(thresholds))
        Selected_features = []
        
        t = time.time()
        
        # Shuffling the data and splitting into folds
        shuffled_indexes = np.random.choice(n,n,False)
        y_shuffle = y[shuffled_indexes]
        X_shuffle = X[shuffled_indexes]
        n_per_fold = int(n/folds)
        
        # Doing CV over thresholds
        for i in range(len(thresholds)):
            
            # Getting selected features          
            embedded_rf_selector = SelectFromModel(GradientBoostingRegressor(n_estimators=100,learning_rate=0.01), threshold = thresholds[i])
            embedded_rf_selector.fit(X, y.reshape(n,))
            embedded_rf_support = embedded_rf_selector.get_support()
            print(str(np.sum(embedded_rf_support)), 'selected features')
            selected_features = np.where(embedded_rf_support)[0]
            if len(selected_features)==0:
                selected_features = np.random.choice(p,1,False)
            Selected_features.append(selected_features)

            for f in range(folds):

                # Getting CVtraining and CVtest set
                y_cvtest = y_shuffle[(f*n_per_fold):min((f+1)*n_per_fold, n)]
                X_cvtest = X_shuffle[(f*n_per_fold):min((f+1)*n_per_fold, n)]

                if f>0:
                    y_cvtrain = y_shuffle[:(f*n_per_fold)]
                    X_cvtrain = X_shuffle[:(f*n_per_fold)]
                if f<(folds-1):
                    if f>0:
                        y_cvtrain = np.append(y_cvtrain,y_shuffle[(f+1)*n_per_fold:],0)
                        X_cvtrain = np.append(X_cvtrain,X_shuffle[(f+1)*n_per_fold:],0)
                    else:
                        y_cvtrain = y_shuffle[(f+1)*n_per_fold:]
                        X_cvtrain = X_shuffle[(f+1)*n_per_fold:]

                # Running training and predicting with RF
                model = GradientBoostingRegressor(n_estimators=100,learning_rate=0.01).fit(X_cvtrain[:,selected_features],y_cvtrain.reshape(len(y_cvtrain),))
                preds = model.predict(X_cvtest[:,selected_features])
                errcv[i] += np.mean((y_cvtest - preds)**2)
        
        # Getting best threshold and running final model
        best_threshold =np.where(errcv==np.min(errcv))[0][0]
        model = GradientBoostingRegressor(n_estimators=100,learning_rate=0.01).fit(X[:,Selected_features[best_threshold]],y.reshape(n,))
        preds = model.predict(Xtest[:,Selected_features[best_threshold]])
        Runtime[run,1] = time.time()-t
        
        # MSE
        MSE_Y[run,1] = simulations.MSE_pc(preds.reshape(ntest,1),ytest)
        MSE_F[run,1] = simulations.MSE_pc(preds.reshape(ntest,1),ftest)

        # VS accuracy
        forest_select = np.zeros(p)
        forest_select[Selected_features[best_threshold]]=1
        PPV[run,1]=np.mean(select[forest_select>0])
        NPV[run,1]=np.mean((1-select[forest_select==0]))
        TPR[run,1]=np.mean(forest_select[select>0])
        TNR[run,1]=np.mean((1-forest_select[select==0]))

        TP = np.sum(forest_select[select>0])
        TN = np.sum(1-forest_select[select==0])
        FP = np.sum(1-select[forest_select>0])
        FN = np.sum(select[forest_select==0])

        MCC[run,1]=(TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))

        if (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)==0:
            MCC[run,1]=0
            
        """
        Running ncvreg
        """
        r=robjects.r
        output = r.source("ncvreg_code.R")
        Runtime[run,2:5]=output[0][0]
        MSE_F[run,2:5]=output[0][1]
        MSE_Y[run,2:5]=output[0][2]
        TPR[run,2:5]=output[0][3]
        TNR[run,2:5]=output[0][4]
        PPV[run,2:5]=output[0][5]
        NPV[run,2:5]=output[0][6]
        MCC[run,2:5]=output[0][7]


        """
        Running sfgam
        """
        r=robjects.r
        output = r.source("sparseGAM_code.R")
        Runtime[run,5:8]=output[0][0]
        MSE_F[run,5:8]=output[0][1]
        MSE_Y[run,5:8]=output[0][2]
        TPR[run,5:8]=output[0][3]
        TNR[run,5:8]=output[0][4]
        PPV[run,5:8]=output[0][5]
        NPV[run,5:8]=output[0][6]
        MCC[run,5:8]=output[0][7]

    """
    Runnig sbgam
    """
    if SBGAM:
        r=robjects.r
        output = r.source("sparseBayesGAM_code.R")
        Runtime[run,8]=np.array(output[0][0])
        MSE_F[run,8]=np.array(output[0][1])
        MSE_Y[run,8]=np.array(output[0][2])
        TPR[run,8]=np.array(output[0][3])
        TNR[run,8]=np.array(output[0][4])
        PPV[run,8]=np.array(output[0][5])
        NPV[run,8]=np.array(output[0][6])
        MCC[run,8]=np.array(output[0][7])
    
    """
    Removing files from directory
    """
    os.remove("y.npy")
    os.remove("f.npy")
    os.remove("X.npy")
    os.remove("ytest.npy")
    os.remove("ftest.npy")
    os.remove("Xtest.npy")
    os.remove("select.npy")

    """
    Printing out current results
    """
    print("RUN {0}".format(run))
    print("Runtime mean is:", Runtime[:run+1].mean(0))
    print("TPR mean is:", TPR[:run+1].mean(0))
    print("PPV mean is:", PPV[:run+1].mean(0))
    print("MCC mean is:", MCC[:run+1].mean(0))
    print("MSE_F mean is:", MSE_F[:run+1].mean(0))
    print("MSE_Y mean is:", MSE_Y[:run+1].mean(0), "\n")
    

R2= 0.8993636862772934
data generated
Noise variance is:  0.006067570867112703
Average data variance is:  0.9972629072424785
0.07080984115600586
Fold number 1 
Fold number 2 
Fold number 3 
lambda0 =  60 
RUN 0
Runtime mean is: [0.         0.         0.         0.         0.         0.
 0.         0.         1.05049987]
TPR mean is: [0.  0.  0.  0.  0.  0.  0.  0.  0.6]
PPV mean is: [0. 0. 0. 0. 0. 0. 0. 0. 1.]
MCC mean is: [0.         0.         0.         0.         0.         0.
 0.         0.         0.76656954]
MSE_F mean is: [0.         0.         0.         0.         0.         0.
 0.         0.         0.23702995]
MSE_Y mean is: [0.         0.         0.         0.         0.         0.
 0.         0.         0.36967159] 

R2= 0.9021920644185265
data generated
Noise variance is:  0.0056107843331200755
Average data variance is:  1.0170416340411146
0.059836626052856445
Fold number 1 
Fold number 2 
Fold number 3 
lambda0 =  60 
RUN 1
Runtime mean is: [0.         0.         0.   

In [12]:
namelist = ["Runtime", "MSE_F", "MSE_Y", "TPR", "TNR", "PPV", "NPV", "MCC"]
objlist = [Runtime, MSE_F, MSE_Y, TPR, TNR, PPV, NPV, MCC]
#iters = np.random.choice(1000,100,False)
iters = np.linspace(0,99,100).astype(int)

for i in range(len(objlist)):
    print("Mean {0} is:".format(namelist[i]), np.mean(objlist[i][iters],0))

print("\n")
for i in range(len(objlist)):
    print("Median {0} is:".format(namelist[i]), np.median(objlist[i][iters],0))

print("\n")
quant = 0.25
for i in range(len(objlist)):
    if namelist[i] in ["Runtime", "MSE_F", "MSE_Y"]:
        print("{1} quantile {0} is:".format(namelist[i], quant), np.quantile(objlist[i][iters],1-quant,0))
    else:
        print("{1} quantile {0} is:".format(namelist[i], quant), np.quantile(objlist[i][iters],quant,0))

Mean Runtime is: [ 0.          0.          0.          0.          0.          0.
  0.          0.         36.37381007]
Mean MSE_F is: [0.         0.         0.         0.         0.         0.
 0.         0.         0.24034256]
Mean MSE_Y is: [0.         0.         0.         0.         0.         0.
 0.         0.         0.34837011]
Mean TPR is: [0.    0.    0.    0.    0.    0.    0.    0.    0.606]
Mean TNR is: [0. 0. 0. 0. 0. 0. 0. 0. 1.]
Mean PPV is: [0. 0. 0. 0. 0. 0. 0. 0. 1.]
Mean NPV is: [0.         0.         0.         0.         0.         0.
 0.         0.         0.97978341]
Mean MCC is: [0.         0.         0.         0.         0.         0.
 0.         0.         0.76041299]


Median Runtime is: [ 0.          0.          0.          0.          0.          0.
  0.          0.         39.97057593]
Median MSE_F is: [0.         0.         0.         0.         0.         0.
 0.         0.         0.19172483]
Median MSE_Y is: [0.         0.         0.         0.       

# REMEMBER TO MULTIPLY RUNTIME BY 60

In [13]:
Output = {"Runtime" : Runtime, "MSE_F" : MSE_F
        , "MSE_Y" : MSE_Y, "TPR" :TPR, "TNR" : TNR, "PPV" : PPV, "NPV" : NPV, "MCC" : MCC}
String = "Stage1_GP_lsmean={0}_p={1}_embedded_sslasso".format(lsmean,p)
np.save(String, Output) # saving