In [2]:
from time import perf_counter
import numpy as np,os
import pandas as pd
from collections import defaultdict
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from imblearn.ensemble import BalancedBaggingClassifier, RUSBoostClassifier
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, EditedNearestNeighbours, TomekLinks
from imblearn.pipeline import Pipeline

from sklearn.metrics import matthews_corrcoef, precision_recall_curve, auc, accuracy_score, precision_score, recall_score
import seaborn as sns,matplotlib.pyplot as plt

from library.configs import IMBS, CLFS, ENSEMBLES, CV, SCORERS
from library.utils import evaluate, read_data
from library.cleaners import kDN, ih_prob,FilteringEstimator

In [3]:
DATASETS = ['groovy-1_5_7.csv','jruby-1.4.0.csv','lucene-2.9.0.csv','jruby-1.7.0.preview1.csv','groovy-1_6_BETA_1.csv',
        'derby-10.2.1.6.csv','wicket-1.5.3.csv','camel-2.9.0.csv','camel-1.4.0.csv','activemq-5.8.0.csv']
DATASETS = [f for f in os.listdir("JIRA/") if 'csv' in f]
len(DATASETS)

32

In [4]:
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin, clone
class CLNI(BaseEstimator, ClassifierMixin):
    def __init__(self, estimator, detector, K=5, threshold=.999, random_state=None):
        self.estimator = estimator
        self.detector = detector
        self.threshold = threshold
        self.K = K
        self.random_state = random_state
        
    def clean(self,X,Y, sample_weight):
        N,alpha = 5,.60
        Xt,Yt = X.copy(),Y.copy()
        while True:
            ne = self.detector(Xt,Yt,K=self.K)
            cidx = ne<=alpha
            #print(cidx.sum(),len(Xt),cidx.sum()/len(Xt))
            N = len(Xt)
            Xt,Yt = Xt[cidx],Yt[cidx]
            try:
                sample_weight = sample_weight[cidx]
            except:
                pass
            if cidx.sum()/N>=.99:
                break
        return Xt,Yt,sample_weight

    def fit(self, X, Y,sample_weight=None):
        Xf,Yf,sample_weight = self.clean(X, Y, sample_weight)
#         a,b = np.unique(Y,return_counts=True)[1],np.unique(Yf,return_counts=True)[1]
#         print(a.max()/a.min(),b.max()/b.min())
        assert len(np.unique(Yf))==2,"Pos class completely filtered out"
        try:
            self.estimator = self.estimator.fit(Xf, Yf,sample_weight=sample_weight)
        except TypeError as e:
            self.estimator = self.estimator.fit(Xf, Yf)
        return self

    @property
    def classes_(self):
        return self.estimator.classes_

    def predict(self, X):
        return self.estimator.predict(X)
    
    def predict_proba(self, X):
        return self.estimator.predict_proba(X)

In [5]:
models = {}
for im,samp in IMBS.items():
    for c,clf in CLFS.items():
        models[(im,c)] = Pipeline([('samp',samp),('clf',CLNI(clf,kDN))])
    
models.keys(),len(models)

(dict_keys([('smote', 'dt'), ('smote', 'lr'), ('smote', 'nb'), ('smote', 'svm'), ('smote', 'knn'), ('smote', 'rf'), ('rus', 'dt'), ('rus', 'lr'), ('rus', 'nb'), ('rus', 'svm'), ('rus', 'knn'), ('rus', 'rf'), ('wilson', 'dt'), ('wilson', 'lr'), ('wilson', 'nb'), ('wilson', 'svm'), ('wilson', 'knn'), ('wilson', 'rf'), ('tomek', 'dt'), ('tomek', 'lr'), ('tomek', 'nb'), ('tomek', 'svm'), ('tomek', 'knn'), ('tomek', 'rf'), ('None', 'dt'), ('None', 'lr'), ('None', 'nb'), ('None', 'svm'), ('None', 'knn'), ('None', 'rf')]),
 30)

In [6]:
path = "CLNI_consensus.csv"
cols = pd.MultiIndex.from_product([IMBS.keys(),CLFS.keys(),[f.__name__ for f in SCORERS]],names=['imb','clf','metric'])
#df = pd.DataFrame(index=DATASETS,columns=cols)
df = pd.read_csv(path,header=[0,1,2],index_col=0)

In [7]:
for it,d in enumerate(DATASETS):
    print(it)
    X,y_noisy,y_real = read_data(d,stats=True)
    if df.loc[d,:].isna().sum()==0:
        print(f"Skipping {d}")
        continue
    for k in models:
        print(k)
        sd = perf_counter()
        r = evaluate(models[k],X,y_noisy,y_real,CV,SCORERS)
        for f in r:
            df.loc[d,(k[0],k[1],f)] = np.nanmean(r[f])
        print(round(perf_counter()-sd,2),[round(r[f].mean(),3) for f in r])
    print()
    df.to_csv(path)

0
activemq-5.8.0.csv noise:0.058, imb:15.847,203,3217, Shape:(3420, 65)
Skipping activemq-5.8.0.csv
1
groovy-1_6_BETA_1.csv noise:0.128, imb:6.017,117,704, Shape:(821, 65)
Skipping groovy-1_6_BETA_1.csv
2
activemq-5.3.0.csv noise:0.094, imb:15.669,142,2225, Shape:(2367, 65)
Skipping activemq-5.3.0.csv
3
wicket-1.3.0-incubating-beta-1.csv noise:0.164, imb:4.806,288,1384, Shape:(1672, 65)
Skipping wicket-1.3.0-incubating-beta-1.csv
4
jruby-1.1.csv noise:0.175, imb:3.540,161,570, Shape:(731, 65)
Skipping jruby-1.1.csv
5
jruby-1.4.0.csv noise:0.190, imb:3.890,200,778, Shape:(978, 65)
Skipping jruby-1.4.0.csv
6
lucene-2.3.0.csv noise:0.204, imb:4.031,160,645, Shape:(805, 65)
Skipping lucene-2.3.0.csv
7
hbase-0.95.2.csv noise:0.260, imb:15.088,114,1720, Shape:(1834, 65)
('smote', 'dt')
11.93 [0.173, 0.45]
('smote', 'lr')
14.31 [0.193, 0.425]
('smote', 'nb')
12.35 [0.318, 0.522]
('smote', 'svm')
45.81 [0.184, 0.42]
('smote', 'knn')
11.81 [0.187, 0.457]
('smote', 'rf')
16.96 [0.245, 0.476]
('r

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.94 [0.158, 0.451]
('None', 'dt')
7.73 [0.142, 0.534]
('None', 'lr')
6.93 [0.111, 0.448]
('None', 'nb')
9.1 [0.239, 0.491]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.37 [0.138, 0.289]
('None', 'knn')
8.06 [0.102, 0.497]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.4 [0.14, 0.449]

8
lucene-3.0.0.csv noise:0.185, imb:6.037,190,1147, Shape:(1337, 65)
('smote', 'dt')
14.95 [0.236, 0.425]
('smote', 'lr')
13.32 [0.319, 0.347]
('smote', 'nb')
17.88 [0.307, 0.334]
('smote', 'svm')
24.79 [0.329, 0.339]
('smote', 'knn')
15.05 [0.245, 0.454]
('smote', 'rf')
15.0 [0.337, 0.295]
('rus', 'dt')
10.24 [0.27, 0.497]
('rus', 'lr')
12.43 [0.328, 0.34]
('rus', 'nb')
10.67 [0.29, 0.359]
('rus', 'svm')
10.39 [0.32, 0.32]
('rus', 'knn')
10.13 [0.257, 0.396]
('rus', 'rf')
11.41 [0.348, 0.316]
('wilson', 'dt')
8.92 [0.304, 0.46]
('wilson', 'lr')
10.96 [0.33, 0.337]
('wilson', 'nb')
8.88 [0.305, 0.358]
('wilson', 'svm')
10.43 [0.3, 0.312]
('wilson', 'knn')
9.11 [0.31, 0.359]
('wilson', 'rf')
10.6 [0.321, 0.313]
('tomek', 'dt')
12.14 [0.173, 0.305]
('tomek', 'lr')
13.92 [0.196, 0.301]
('tomek', 'nb')
12.23 [0.238, 0.348]
('tomek', 'svm')
13.74 [0.118, 0.251]
('tomek', 'knn')
12.55 [0.136, 0.233]
('tomek', 'rf')
13.9 [0.107, 0.275]
('None', 'dt')
10.7 [0.162, 0.294]
('N

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.93 [0.069, 0.266]

9
camel-2.9.0.csv noise:0.044, imb:34.600,200,6920, Shape:(7120, 65)
('smote', 'dt')
65.44 [0.185, 0.211]
('smote', 'lr')
77.27 [0.176, 0.202]
('smote', 'nb')
62.5 [0.221, 0.37]
('smote', 'svm')
739.79 [0.177, 0.164]
('smote', 'knn')
68.28 [0.187, 0.305]
('smote', 'rf')
84.05 [0.27, 0.199]
('rus', 'dt')
10.99 [0.177, 0.381]
('rus', 'lr')
13.65 [0.205, 0.196]
('rus', 'nb')
10.91 [0.23, 0.372]
('rus', 'svm')
11.64 [0.22, 0.191]
('rus', 'knn')
11.95 [0.217, 0.335]
('rus', 'rf')
12.66 [0.22, 0.272]
('wilson', 'dt')
53.75 [0.253, 0.286]
('wilson', 'lr')
52.61 [0.296, 0.268]
('wilson', 'nb')
50.49 [0.277, 0.329]
('wilson', 'svm')
56.64 [0.27, 0.224]
('wilson', 'knn')
60.06 [0.276, 0.275]
('wilson', 'rf')
56.45 [0.25, 0.245]
('tomek', 'dt')
51.78 [0.178, 0.28]
('tomek', 'lr')
54.62 [0.212, 0.261]
('tomek', 'nb')
50.21 [0.285, 0.294]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


56.95 [0.178, 0.168]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


60.55 [0.18, 0.256]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


54.0 [0.16, 0.241]
('None', 'dt')
28.8 [0.159, 0.263]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


30.19 [0.172, 0.263]
('None', 'nb')
28.32 [0.283, 0.283]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


33.79 [0.138, 0.156]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


36.27 [0.144, 0.255]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


31.23 [0.128, 0.239]

10
wicket-1.5.3.csv noise:0.064, imb:26.720,93,2485, Shape:(2578, 65)
('smote', 'dt')
21.05 [0.117, 0.153]
('smote', 'lr')
22.07 [0.16, 0.145]
('smote', 'nb')
17.85 [0.209, 0.345]
('smote', 'svm')
81.23 [0.163, 0.131]
('smote', 'knn')
20.09 [0.154, 0.25]
('smote', 'rf')
25.44 [0.144, 0.138]
('rus', 'dt')
8.82 [0.199, 0.431]
('rus', 'lr')
9.78 [0.205, 0.178]
('rus', 'nb')
9.03 [0.216, 0.389]
('rus', 'svm')
9.3 [0.217, 0.154]
('rus', 'knn')
9.88 [0.195, 0.338]
('rus', 'rf')
11.29 [0.213, 0.252]
('wilson', 'dt')
11.07 [0.156, 0.212]
('wilson', 'lr')
13.34 [0.126, 0.196]
('wilson', 'nb')
10.74 [0.224, 0.291]
('wilson', 'svm')
13.85 [0.133, 0.144]
('wilson', 'knn')
11.73 [0.102, 0.15]
('wilson', 'rf')
12.94 [0.112, 0.186]
('tomek', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.06 [0.096, 0.229]
('tomek', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.26 [0.084, 0.177]
('tomek', 'nb')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.06 [0.162, 0.269]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.97 [0.099, 0.121]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.56 [0.094, 0.136]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.86 [0.105, 0.156]
('None', 'dt')
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
7.75 [0.078, 0.349]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.3 [0.063, 0.171]
('None', 'nb')
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
8.19 [0.13, 0.291]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


6.94 [0.039, 0.107]
('None', 'knn')
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
8.03 [0.033, 0.153]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.14 [0.054, 0.149]

11
lucene-3.1.csv noise:0.120, imb:7.477,331,2475, Shape:(2806, 65)
('smote', 'dt')
19.92 [0.148, 0.312]
('smote', 'lr')
24.55 [0.153, 0.232]
('smote', 'nb')
17.8 [0.179, 0.274]
('smote', 'svm')
70.69 [0.142, 0.129]
('smote', 'knn')
19.97 [0.142, 0.325]
('smote', 'rf')
27.53 [0.176, 0.158]
('rus', 'dt')
9.63 [0.147, 0.356]
('rus', 'lr')
12.17 [0.163, 0.204]
('rus', 'nb')
10.32 [0.191, 0.29]
('rus', 'svm')
10.9 [0.157, 0.137]
('rus', 'knn')
10.65 [0.163, 0.314]
('rus', 'rf')
11.12 [0.162, 0.196]
('wilson', 'dt')
11.46 [0.168, 0.296]
('wilson', 'lr')
15.06 [0.202, 0.227]
('wilson', 'nb')
11.32 [0.19, 0.273]
('wilson', 'svm')
17.3 [0.195, 0.167]
('wilson', 'knn')
12.38 [0.161, 0.229]
('wilson', 'rf')
13.86 [0.189, 0.169]
('tomek', 'dt')
11.71 [0.173, 0.247]
('tomek', 'lr')
13.69 [0.166, 0.183]
('tomek', 'nb')
11.38 [0.18, 0.251]
('tomek', 'svm')
18.9 [0.159, 0.171]
('tomek', 'knn')
12.87 [0.168, 0.152]
('tomek', 'rf')
14.7 [0.172, 0.151]
('None', 'dt')
7.64 [0.17, 0.2

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.35 [0.283, 0.336]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.95 [0.253, 0.481]
('None', 'rf')
8.64 [0.247, 0.442]

14
groovy-1_5_7.csv noise:0.085, imb:8.463,80,677, Shape:(757, 65)
('smote', 'dt')
10.67 [0.205, 0.383]
('smote', 'lr')
12.46 [0.183, 0.374]
('smote', 'nb')
9.69 [0.225, 0.452]
('smote', 'svm')
15.28 [0.172, 0.208]
('smote', 'knn')
10.33 [0.151, 0.399]
('smote', 'rf')
12.35 [0.285, 0.415]
('rus', 'dt')
10.1 [0.202, 0.414]
('rus', 'lr')
11.06 [0.22, 0.399]
('rus', 'nb')
9.44 [0.236, 0.448]
('rus', 'svm')
9.64 [0.235, 0.339]
('rus', 'knn')
10.46 [0.228, 0.429]
('rus', 'rf')
11.3 [0.217, 0.411]
('wilson', 'dt')
8.37 [0.311, 0.416]
('wilson', 'lr')
9.57 [0.365, 0.454]
('wilson', 'nb')
8.34 [0.26, 0.454]
('wilson', 'svm')
8.72 [0.396, 0.451]
('wilson', 'knn')
8.5 [0.399, 0.466]
('wilson', 'rf')
9.74 [0.386, 0.418]
('tomek', 'dt')
7.43 [0.308, 0.362]
('tomek', 'lr')
7.84 [0.36, 0.462]
('tomek', 'nb')
7.35 [0.293, 0.441]
('tomek', 'svm')
7.81 [0.413, 0.495]
('tomek', 'knn')
7.47 [0.426, 0.487]
('tomek', 'rf')
8.82 [0.395, 0.43]
('None', 

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.93 [0.121, 0.467]
('tomek', 'nb')
8.24 [0.207, 0.412]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.57 [0.147, 0.323]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.45 [0.127, 0.545]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.53 [0.134, 0.437]
('None', 'dt')
7.04 [0.123, 0.495]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


6.53 [0.11, 0.452]
('None', 'nb')
11.04 [0.196, 0.41]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.93 [0.119, 0.298]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.37 [0.105, 0.531]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.14 [0.113, 0.418]

16
camel-2.11.0.csv noise:0.024, imb:43.230,200,8646, Shape:(8846, 65)
('smote', 'dt')
78.3 [0.208, 0.181]
('smote', 'lr')
90.45 [0.165, 0.142]
('smote', 'nb')
66.49 [0.171, 0.293]
('smote', 'svm')
990.93 [0.168, 0.119]
('smote', 'knn')
81.47 [0.18, 0.281]
('smote', 'rf')
97.18 [0.31, 0.26]
('rus', 'dt')
11.02 [0.159, 0.408]
('rus', 'lr')
12.48 [0.177, 0.142]
('rus', 'nb')
10.43 [0.161, 0.314]
('rus', 'svm')
11.26 [0.175, 0.141]
('rus', 'knn')
11.79 [0.168, 0.353]
('rus', 'rf')
11.73 [0.192, 0.23]
('wilson', 'dt')
75.83 [0.264, 0.293]
('wilson', 'lr')
78.23 [0.211, 0.19]
('wilson', 'nb')
75.99 [0.199, 0.275]
('wilson', 'svm')
87.22 [0.199, 0.175]
('wilson', 'knn')
85.52 [0.229, 0.207]
('wilson', 'rf')
79.11 [0.21, 0.248]
('tomek', 'dt')
69.37 [0.169, 0.26]
('tomek', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


70.15 [0.124, 0.179]
('tomek', 'nb')
68.89 [0.119, 0.183]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


76.93 [0.126, 0.143]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


80.08 [0.166, 0.194]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


71.12 [0.086, 0.191]
('None', 'dt')
38.77 [0.146, 0.265]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


40.64 [0.086, 0.172]
('None', 'nb')
37.09 [0.109, 0.174]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


44.16 [0.119, 0.135]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


43.89 [0.174, 0.209]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


40.24 [0.093, 0.185]

17
jruby-1.7.0.preview1.csv noise:0.099, imb:8.902,163,1451, Shape:(1614, 65)
('smote', 'dt')
12.45 [0.237, 0.39]
('smote', 'lr')
13.83 [0.236, 0.327]
('smote', 'nb')
15.13 [0.291, 0.458]
('smote', 'svm')
31.96 [0.258, 0.24]
('smote', 'knn')
12.73 [0.208, 0.412]
('smote', 'rf')
15.12 [0.292, 0.32]
('rus', 'dt')
9.53 [0.268, 0.457]
('rus', 'lr')
11.51 [0.291, 0.317]
('rus', 'nb')
9.95 [0.304, 0.456]
('rus', 'svm')
9.29 [0.297, 0.283]
('rus', 'knn')
10.04 [0.277, 0.422]
('rus', 'rf')
10.87 [0.284, 0.368]
('wilson', 'dt')
8.49 [0.32, 0.418]
('wilson', 'lr')
10.43 [0.299, 0.334]
('wilson', 'nb')
8.35 [0.302, 0.44]
('wilson', 'svm')
9.46 [0.31, 0.302]
('wilson', 'knn')
8.75 [0.295, 0.356]
('wilson', 'rf')
10.06 [0.325, 0.348]
('tomek', 'dt')
8.57 [0.24, 0.302]
('tomek', 'lr')
10.74 [0.257, 0.274]
('tomek', 'nb')
8.54 [0.309, 0.391]
('tomek', 'svm')
9.7 [0.285, 0.27]
('tomek', 'knn')
8.74 [0.255, 0.277]
('tomek', 'rf')
10.23 [0.294, 0.334]
('None', 'dt')
7.71 [0.226, 0.

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.9 [0.097, 0.455]
('tomek', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.47 [0.104, 0.39]
('tomek', 'nb')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.86 [0.133, 0.401]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.11 [0.127, 0.139]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.04 [0.088, 0.405]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.91 [0.08, 0.335]
('None', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.51 [0.077, 0.452]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


6.45 [0.078, 0.393]
('None', 'nb')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.72 [0.061, 0.388]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.41 [0.068, 0.14]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.42 [0.071, 0.457]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.77 [0.037, 0.329]

19
camel-2.10.0.csv noise:0.053, imb:24.447,311,7603, Shape:(7914, 65)
('smote', 'dt')
80.57 [0.186, 0.26]
('smote', 'lr')
91.72 [0.176, 0.205]
('smote', 'nb')
73.36 [0.235, 0.39]
('smote', 'svm')
808.6 [0.165, 0.159]
('smote', 'knn')
84.71 [0.175, 0.31]
('smote', 'rf')
102.92 [0.253, 0.223]
('rus', 'dt')
11.51 [0.178, 0.406]
('rus', 'lr')
14.14 [0.209, 0.205]
('rus', 'nb')
11.31 [0.234, 0.396]
('rus', 'svm')
13.25 [0.22, 0.178]
('rus', 'knn')
12.1 [0.195, 0.332]
('rus', 'rf')
13.15 [0.216, 0.273]
('wilson', 'dt')
60.8 [0.237, 0.273]
('wilson', 'lr')
63.66 [0.279, 0.227]
('wilson', 'nb')
62.06 [0.277, 0.369]
('wilson', 'svm')
68.05 [0.275, 0.209]
('wilson', 'knn')
69.2 [0.265, 0.265]
('wilson', 'rf')
65.61 [0.261, 0.221]
('tomek', 'dt')
58.32 [0.167, 0.215]
('tomek', 'lr')
60.41 [0.191, 0.232]
('tomek', 'nb')
56.06 [0.281, 0.344]
('tomek', 'svm')
68.5 [0.187, 0.154]
('tomek', 'knn')
66.48 [0.161, 0.202]
('tomek', 'rf')
63.61 [0.143, 0.191]
('None', 'dt')
32.68 [0.1

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


34.73 [0.156, 0.227]
('None', 'nb')
31.09 [0.28, 0.343]
('None', 'svm')
40.11 [0.173, 0.138]
('None', 'knn')
38.4 [0.153, 0.197]
('None', 'rf')
34.49 [0.123, 0.181]

20
derby-10.2.1.6.csv noise:0.290, imb:9.906,180,1783, Shape:(1963, 65)
('smote', 'dt')
12.17 [0.272, 0.582]
('smote', 'lr')
17.64 [0.352, 0.624]
('smote', 'nb')
13.4 [0.444, 0.682]
('smote', 'svm')
46.16 [0.4, 0.627]
('smote', 'knn')
12.31 [0.321, 0.603]
('smote', 'rf')
16.52 [0.387, 0.669]
('rus', 'dt')
10.49 [0.381, 0.659]
('rus', 'lr')
13.17 [0.415, 0.666]
('rus', 'nb')
10.66 [0.441, 0.682]
('rus', 'svm')
11.22 [0.424, 0.687]
('rus', 'knn')
10.75 [0.408, 0.663]
('rus', 'rf')
12.39 [0.425, 0.671]
('wilson', 'dt')
9.63 [0.317, 0.63]
('wilson', 'lr')
12.71 [0.326, 0.687]
('wilson', 'nb')
9.53 [0.433, 0.679]
('wilson', 'svm')
11.84 [0.321, 0.666]
('wilson', 'knn')
10.05 [0.322, 0.647]
('wilson', 'rf')
11.65 [0.304, 0.689]
('tomek', 'dt')
11.7 [0.199, 0.634]
('tomek', 'lr')
14.1 [0.173, 0.638]
('tomek', 'nb')
11.5 [0.419, 0

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.18 [0.121, 0.524]
('None', 'knn')
12.79 [0.123, 0.614]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.0 [0.102, 0.645]

21
jruby-1.5.0.csv noise:0.218, imb:3.098,276,855, Shape:(1131, 65)
('smote', 'dt')
11.17 [0.27, 0.506]
('smote', 'lr')
12.88 [0.289, 0.428]
('smote', 'nb')
10.07 [0.329, 0.531]
('smote', 'svm')
18.03 [0.297, 0.318]
('smote', 'knn')
11.31 [0.254, 0.503]
('smote', 'rf')
13.87 [0.33, 0.474]
('rus', 'dt')
11.14 [0.293, 0.526]
('rus', 'lr')
13.76 [0.321, 0.447]
('rus', 'nb')
10.98 [0.337, 0.521]
('rus', 'svm')
12.22 [0.334, 0.369]
('rus', 'knn')
11.51 [0.297, 0.519]
('rus', 'rf')
12.49 [0.319, 0.477]
('wilson', 'dt')
10.56 [0.295, 0.522]
('wilson', 'lr')
12.75 [0.316, 0.449]
('wilson', 'nb')
10.54 [0.322, 0.537]
('wilson', 'svm')
11.44 [0.337, 0.38]
('wilson', 'knn')
10.7 [0.307, 0.516]
('wilson', 'rf')
12.02 [0.329, 0.481]
('tomek', 'dt')
10.89 [0.361, 0.491]
('tomek', 'lr')
13.11 [0.399, 0.464]
('tomek', 'nb')
10.85 [0.371, 0.512]
('tomek', 'svm')
12.14 [0.43, 0.436]
('tomek', 'knn')
11.08 [0.406, 0.523]
('tomek', 'rf')
12.41 [0.403, 0.491]
('None', 'dt')
10.06 [0.36

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.79 [0.109, 0.529]
('tomek', 'nb')
9.94 [0.367, 0.609]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


14.51 [0.125, 0.394]
('tomek', 'knn')
10.8 [0.08, 0.477]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.94 [0.044, 0.523]
('None', 'dt')
6.89 [0.117, 0.527]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.12 [0.105, 0.508]
('None', 'nb')
9.49 [0.345, 0.586]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.98 [0.101, 0.386]
('None', 'knn')
8.28 [0.059, 0.466]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.76 [0.021, 0.517]

23
lucene-2.9.0.csv noise:0.226, imb:3.921,278,1090, Shape:(1368, 65)
('smote', 'dt')
13.69 [0.285, 0.518]
('smote', 'lr')
14.11 [0.341, 0.464]
('smote', 'nb')
14.53 [0.319, 0.493]
('smote', 'svm')
22.67 [0.36, 0.461]
('smote', 'knn')
15.15 [0.281, 0.525]
('smote', 'rf')
16.89 [0.362, 0.431]
('rus', 'dt')
10.42 [0.319, 0.563]
('rus', 'lr')
13.24 [0.356, 0.477]
('rus', 'nb')
11.22 [0.298, 0.482]
('rus', 'svm')
11.7 [0.354, 0.451]
('rus', 'knn')
10.74 [0.299, 0.509]
('rus', 'rf')
12.49 [0.347, 0.455]
('wilson', 'dt')
8.51 [0.33, 0.55]
('wilson', 'lr')
10.42 [0.375, 0.484]
('wilson', 'nb')
8.4 [0.314, 0.502]
('wilson', 'svm')
10.26 [0.363, 0.462]
('wilson', 'knn')
8.69 [0.323, 0.499]
('wilson', 'rf')
10.23 [0.379, 0.445]
('tomek', 'dt')
11.31 [0.263, 0.463]
('tomek', 'lr')
13.75 [0.273, 0.457]
('tomek', 'nb')
11.2 [0.316, 0.467]
('tomek', 'svm')
14.04 [0.27, 0.44]
('tomek', 'knn')
11.42 [0.26, 0.416]
('tomek', 'rf')
12.7 [0.285, 0.451]
('None', 'dt')
10.15 [0.259, 0.4

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.12 [0.346, 0.402]
('None', 'dt')
7.37 [0.296, 0.411]
('None', 'lr')
7.21 [0.296, 0.388]
('None', 'nb')
7.66 [0.33, 0.416]
('None', 'svm')
8.11 [0.332, 0.305]
('None', 'knn')
7.58 [0.309, 0.395]
('None', 'rf')
8.48 [0.311, 0.395]

27
activemq-5.0.0.csv noise:0.139, imb:21.976,82,1802, Shape:(1884, 65)
('smote', 'dt')
11.95 [0.372, 0.496]
('smote', 'lr')
13.45 [0.415, 0.543]
('smote', 'nb')
12.1 [0.434, 0.577]
('smote', 'svm')
38.2 [0.418, 0.535]
('smote', 'knn')
11.09 [0.363, 0.496]
('smote', 'rf')
16.2 [0.443, 0.58]
('rus', 'dt')
9.51 [0.36, 0.539]
('rus', 'lr')
10.48 [0.393, 0.531]
('rus', 'nb')
9.41 [0.409, 0.555]
('rus', 'svm')
9.93 [0.389, 0.539]
('rus', 'knn')
9.5 [0.373, 0.526]
('rus', 'rf')
10.8 [0.403, 0.565]
('wilson', 'dt')
8.59 [0.356, 0.551]
('wilson', 'lr')
11.05 [0.327, 0.541]
('wilson', 'nb')
8.55 [0.445, 0.549]
('wilson', 'svm')
9.3 [0.309, 0.442]
('wilson', 'knn')
8.94 [0.29, 0.516]
('wilson', 'rf')
10.05 [0.319, 0.548]
('tomek', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.35 [0.192, 0.531]
('tomek', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.33 [0.174, 0.484]
('tomek', 'nb')
8.4 [0.384, 0.477]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.92 [0.156, 0.247]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.75 [0.15, 0.489]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.73 [0.156, 0.487]
('None', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.47 [0.173, 0.532]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


6.36 [0.139, 0.472]
('None', 'nb')
10.98 [0.337, 0.424]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.97 [0.141, 0.236]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.62 [0.145, 0.479]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.17 [0.133, 0.472]

28
derby-10.5.1.1.csv noise:0.126, imb:14.028,180,2525, Shape:(2705, 65)
('smote', 'dt')
20.04 [0.273, 0.414]
('smote', 'lr')
23.39 [0.315, 0.457]
('smote', 'nb')
17.47 [0.378, 0.512]
('smote', 'svm')
81.76 [0.312, 0.4]
('smote', 'knn')
19.62 [0.302, 0.448]
('smote', 'rf')
26.39 [0.36, 0.461]
('rus', 'dt')
10.62 [0.293, 0.501]
('rus', 'lr')
13.28 [0.32, 0.457]
('rus', 'nb')
10.72 [0.371, 0.509]
('rus', 'svm')
11.6 [0.348, 0.463]
('rus', 'knn')
11.32 [0.316, 0.471]
('rus', 'rf')
11.66 [0.333, 0.452]
('wilson', 'dt')
11.21 [0.3, 0.445]
('wilson', 'lr')
14.53 [0.349, 0.471]
('wilson', 'nb')
10.97 [0.387, 0.509]
('wilson', 'svm')
15.16 [0.332, 0.443]
('wilson', 'knn')
11.86 [0.295, 0.436]
('wilson', 'rf')
13.62 [0.329, 0.479]
('tomek', 'dt')
10.61 [0.221, 0.423]
('tomek', 'lr')
13.28 [0.243, 0.446]
('tomek', 'nb')
10.17 [0.385, 0.499]
('tomek', 'svm')
14.41 [0.239, 0.376]
('tomek', 'knn')
11.7 [0.202, 0.417]
('tomek', 'rf')
12.9 [0.188, 0.449]
('None', 'dt')
7.02 [0.2,

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.09 [0.169, 0.425]

29
wicket-1.3.0-beta2.csv noise:0.184, imb:4.780,305,1458, Shape:(1763, 65)
('smote', 'dt')
12.43 [0.222, 0.428]
('smote', 'lr')
13.45 [0.214, 0.308]
('smote', 'nb')
11.84 [0.299, 0.449]
('smote', 'svm')
32.71 [0.213, 0.226]
('smote', 'knn')
13.59 [0.179, 0.399]
('smote', 'rf')
14.75 [0.265, 0.306]
('rus', 'dt')
10.86 [0.231, 0.445]
('rus', 'lr')
13.23 [0.251, 0.325]
('rus', 'nb')
10.48 [0.296, 0.443]
('rus', 'svm')
11.98 [0.274, 0.284]
('rus', 'knn')
10.74 [0.245, 0.407]
('rus', 'rf')
12.2 [0.275, 0.342]
('wilson', 'dt')
11.61 [0.264, 0.431]
('wilson', 'lr')
13.61 [0.28, 0.324]
('wilson', 'nb')
11.33 [0.294, 0.456]
('wilson', 'svm')
13.11 [0.3, 0.297]
('wilson', 'knn')
11.77 [0.271, 0.397]
('wilson', 'rf')
13.0 [0.278, 0.322]
('tomek', 'dt')
12.43 [0.252, 0.354]
('tomek', 'lr')
15.08 [0.28, 0.326]
('tomek', 'nb')
12.63 [0.31, 0.421]
('tomek', 'svm')
14.16 [0.298, 0.29]
('tomek', 'knn')
12.51 [0.278, 0.328]
('tomek', 'rf')
13.56 [0.26, 0.301]
('None', 'dt')
12.23 [

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.45 [0.023, 0.154]
('wilson', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.0 [0.033, 0.184]
('wilson', 'nb')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.42 [0.066, 0.181]
('wilson', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.22 [0.022, 0.093]
('wilson', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.76 [0.028, 0.131]
('wilson', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.1 [0.026, 0.166]
('tomek', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.35 [0.021, 0.384]
('tomek', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.03 [0.026, 0.181]
('tomek', 'nb')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out
9.39 [0.052, 0.203]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.05 [0.024, 0.082]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.36 [0.027, 0.139]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.66 [0.015, 0.158]
('None', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.26 [0.043, 0.403]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


6.95 [0.029, 0.177]
('None', 'nb')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out
7.92 [0.053, 0.187]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.05 [0.017, 0.073]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.73 [0.028, 0.123]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.78 [0.015, 0.17]

31
hive-0.9.0.csv noise:0.179, imb:25.717,53,1363, Shape:(1416, 65)
('smote', 'dt')
8.86 [0.275, 0.476]
('smote', 'lr')
10.78 [0.386, 0.528]
('smote', 'nb')
14.9 [0.455, 0.608]
('smote', 'svm')
20.95 [0.386, 0.525]
('smote', 'knn')
9.62 [0.313, 0.489]
('smote', 'rf')
13.2 [0.368, 0.543]
('rus', 'dt')
10.04 [0.426, 0.586]
('rus', 'lr')
9.98 [0.441, 0.593]
('rus', 'nb')
9.12 [0.429, 0.596]
('rus', 'svm')
9.38 [0.447, 0.591]
('rus', 'knn')
8.78 [0.445, 0.601]
('rus', 'rf')
11.08 [0.467, 0.604]
('wilson', 'dt')
7.93 [0.284, 0.558]
('wilson', 'lr')
10.03 [0.296, 0.578]
('wilson', 'nb')
7.92 [0.451, 0.596]
('wilson', 'svm')
8.46 [0.292, 0.431]
('wilson', 'knn')
8.13 [0.239, 0.575]
('wilson', 'rf')
9.22 [0.264, 0.567]
('tomek', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.63 [0.175, 0.555]
('tomek', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.35 [0.188, 0.562]
('tomek', 'nb')
7.67 [0.388, 0.585]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.99 [0.184, 0.301]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.89 [0.146, 0.573]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.0 [0.163, 0.543]
('None', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


6.39 [0.149, 0.563]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


6.25 [0.162, 0.56]
('None', 'nb')
6.07 [0.344, 0.585]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.26 [0.133, 0.282]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.14 [0.111, 0.578]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.79 [0.138, 0.548]

