In [2]:
from time import perf_counter
import numpy as np,os
import pandas as pd
from collections import defaultdict
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from imblearn.ensemble import BalancedBaggingClassifier, RUSBoostClassifier
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, EditedNearestNeighbours, TomekLinks
from imblearn.pipeline import Pipeline

from sklearn.metrics import matthews_corrcoef, precision_recall_curve, auc, accuracy_score, precision_score, recall_score
import seaborn as sns,matplotlib.pyplot as plt

from library.configs import IMBS, CLFS, ENSEMBLES, CV, SCORERS
from library.utils import evaluate, read_data
from library.cleaners import kDN, ih_prob,FilteringEstimator

In [3]:
DATASETS = ['groovy-1_5_7.csv','jruby-1.4.0.csv','lucene-2.9.0.csv','jruby-1.7.0.preview1.csv','groovy-1_6_BETA_1.csv',
        'derby-10.2.1.6.csv','wicket-1.5.3.csv','camel-2.9.0.csv','camel-1.4.0.csv','activemq-5.8.0.csv']
DATASETS = [f for f in os.listdir("JIRA/") if 'csv' in f]
len(DATASETS)

32

In [4]:
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin, clone
class CLNI(BaseEstimator, ClassifierMixin):
    def __init__(self, estimator, detector, K=5, threshold=.999, random_state=None):
        self.estimator = estimator
        self.detector = detector
        self.threshold = threshold
        self.K = K
        self.random_state = random_state
        
    def clean(self,X,Y, sample_weight):
        N,alpha = 5,.60
        Xt,Yt = X.copy(),Y.copy()
        while True:
            ne = self.detector(Xt,Yt,K=self.K)
            cidx = ne<=alpha
            #print(cidx.sum(),len(Xt),cidx.sum()/len(Xt))
            N = len(Xt)
            Xt,Yt = Xt[cidx],Yt[cidx]
            try:
                sample_weight = sample_weight[cidx]
            except:
                pass
            if cidx.sum()/N>=.99:
                break
        return Xt,Yt,sample_weight

    def fit(self, X, Y,sample_weight=None):
        Xf,Yf,sample_weight = self.clean(X, Y, sample_weight)
#         a,b = np.unique(Y,return_counts=True)[1],np.unique(Yf,return_counts=True)[1]
#         print(a.max()/a.min(),b.max()/b.min())
        assert len(np.unique(Yf))==2,"Pos class completely filtered out"
        try:
            self.estimator = self.estimator.fit(Xf, Yf,sample_weight=sample_weight)
        except TypeError as e:
            self.estimator = self.estimator.fit(Xf, Yf)
        return self

    @property
    def classes_(self):
        return self.estimator.classes_

    def predict(self, X):
        return self.estimator.predict(X)
    
    def predict_proba(self, X):
        return self.estimator.predict_proba(X)

In [5]:
models = {}
for im,samp in IMBS.items():
    for c,clf in CLFS.items():
        models[(im,c)] = Pipeline([('samp',samp),('clf',CLNI(clf,kDN))])
    
models.keys(),len(models)

(dict_keys([('smote', 'dt'), ('smote', 'lr'), ('smote', 'nb'), ('smote', 'svm'), ('smote', 'knn'), ('smote', 'rf'), ('rus', 'dt'), ('rus', 'lr'), ('rus', 'nb'), ('rus', 'svm'), ('rus', 'knn'), ('rus', 'rf'), ('wilson', 'dt'), ('wilson', 'lr'), ('wilson', 'nb'), ('wilson', 'svm'), ('wilson', 'knn'), ('wilson', 'rf'), ('tomek', 'dt'), ('tomek', 'lr'), ('tomek', 'nb'), ('tomek', 'svm'), ('tomek', 'knn'), ('tomek', 'rf'), ('None', 'dt'), ('None', 'lr'), ('None', 'nb'), ('None', 'svm'), ('None', 'knn'), ('None', 'rf')]),
 30)

In [6]:
path = "CLNI_consensus.csv"
cols = pd.MultiIndex.from_product([IMBS.keys(),CLFS.keys(),[f.__name__ for f in SCORERS]],names=['imb','clf','metric'])
#df = pd.DataFrame(index=DATASETS,columns=cols)
df = pd.read_csv(path,header=[0,1,2],index_col=0)

In [None]:
for it,d in enumerate(DATASETS):
    print(it)
    X,y_noisy,y_real = read_data(d,stats=True)
    if df.loc[d,:].isna().sum()==0:
        print(f"Skipping {d}")
        continue
    for k in models:
        print(k)
        sd = perf_counter()
        r = evaluate(models[k],X,y_noisy,y_real,CV,SCORERS)
        for f in r:
            df.loc[d,(k[0],k[1],f)] = np.nanmean(r[f])
        print(round(perf_counter()-sd,2),[round(r[f].mean(),3) for f in r])
    print()
    df.to_csv(path)

0
activemq-5.8.0.csv noise:0.058, imb:15.847,203,3217, Shape:(3420, 65)
Skipping activemq-5.8.0.csv
1
groovy-1_6_BETA_1.csv noise:0.128, imb:6.017,117,704, Shape:(821, 65)
Skipping groovy-1_6_BETA_1.csv
2
activemq-5.3.0.csv noise:0.094, imb:15.669,142,2225, Shape:(2367, 65)
Skipping activemq-5.3.0.csv
3
wicket-1.3.0-incubating-beta-1.csv noise:0.164, imb:4.806,288,1384, Shape:(1672, 65)
Skipping wicket-1.3.0-incubating-beta-1.csv
4
jruby-1.1.csv noise:0.175, imb:3.540,161,570, Shape:(731, 65)
Skipping jruby-1.1.csv
5
jruby-1.4.0.csv noise:0.190, imb:3.890,200,778, Shape:(978, 65)
Skipping jruby-1.4.0.csv
6
lucene-2.3.0.csv noise:0.204, imb:4.031,160,645, Shape:(805, 65)
Skipping lucene-2.3.0.csv
7
hbase-0.95.2.csv noise:0.260, imb:15.088,114,1720, Shape:(1834, 65)
('smote', 'dt')
11.93 [0.173, 0.45]
('smote', 'lr')
14.31 [0.193, 0.425]
('smote', 'nb')
12.35 [0.318, 0.522]
('smote', 'svm')
45.81 [0.184, 0.42]
('smote', 'knn')
11.81 [0.187, 0.457]
('smote', 'rf')
16.96 [0.245, 0.476]
('r

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


9.94 [0.158, 0.451]
('None', 'dt')
7.73 [0.142, 0.534]
('None', 'lr')
6.93 [0.111, 0.448]
('None', 'nb')
9.1 [0.239, 0.491]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.37 [0.138, 0.289]
('None', 'knn')
8.06 [0.102, 0.497]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.4 [0.14, 0.449]

8
lucene-3.0.0.csv noise:0.185, imb:6.037,190,1147, Shape:(1337, 65)
('smote', 'dt')
14.95 [0.236, 0.425]
('smote', 'lr')
13.32 [0.319, 0.347]
('smote', 'nb')
17.88 [0.307, 0.334]
('smote', 'svm')
24.79 [0.329, 0.339]
('smote', 'knn')
15.05 [0.245, 0.454]
('smote', 'rf')
15.0 [0.337, 0.295]
('rus', 'dt')
10.24 [0.27, 0.497]
('rus', 'lr')
12.43 [0.328, 0.34]
('rus', 'nb')
10.67 [0.29, 0.359]
('rus', 'svm')
10.39 [0.32, 0.32]
('rus', 'knn')
10.13 [0.257, 0.396]
('rus', 'rf')
11.41 [0.348, 0.316]
('wilson', 'dt')
8.92 [0.304, 0.46]
('wilson', 'lr')
10.96 [0.33, 0.337]
('wilson', 'nb')
8.88 [0.305, 0.358]
('wilson', 'svm')
10.43 [0.3, 0.312]
('wilson', 'knn')
9.11 [0.31, 0.359]
('wilson', 'rf')
10.6 [0.321, 0.313]
('tomek', 'dt')
12.14 [0.173, 0.305]
('tomek', 'lr')
13.92 [0.196, 0.301]
('tomek', 'nb')
12.23 [0.238, 0.348]
('tomek', 'svm')
13.74 [0.118, 0.251]
('tomek', 'knn')
12.55 [0.136, 0.233]
('tomek', 'rf')
13.9 [0.107, 0.275]
('None', 'dt')
10.7 [0.162, 0.294]
('N

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.93 [0.069, 0.266]

9
camel-2.9.0.csv noise:0.044, imb:34.600,200,6920, Shape:(7120, 65)
('smote', 'dt')
65.44 [0.185, 0.211]
('smote', 'lr')
77.27 [0.176, 0.202]
('smote', 'nb')
62.5 [0.221, 0.37]
('smote', 'svm')
739.79 [0.177, 0.164]
('smote', 'knn')
68.28 [0.187, 0.305]
('smote', 'rf')
84.05 [0.27, 0.199]
('rus', 'dt')
10.99 [0.177, 0.381]
('rus', 'lr')
13.65 [0.205, 0.196]
('rus', 'nb')
10.91 [0.23, 0.372]
('rus', 'svm')
11.64 [0.22, 0.191]
('rus', 'knn')
11.95 [0.217, 0.335]
('rus', 'rf')
12.66 [0.22, 0.272]
('wilson', 'dt')
53.75 [0.253, 0.286]
('wilson', 'lr')
52.61 [0.296, 0.268]
('wilson', 'nb')
50.49 [0.277, 0.329]
('wilson', 'svm')
56.64 [0.27, 0.224]
('wilson', 'knn')
60.06 [0.276, 0.275]
('wilson', 'rf')
56.45 [0.25, 0.245]
('tomek', 'dt')
51.78 [0.178, 0.28]
('tomek', 'lr')
54.62 [0.212, 0.261]
('tomek', 'nb')
50.21 [0.285, 0.294]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


56.95 [0.178, 0.168]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


60.55 [0.18, 0.256]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


54.0 [0.16, 0.241]
('None', 'dt')
28.8 [0.159, 0.263]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


30.19 [0.172, 0.263]
('None', 'nb')
28.32 [0.283, 0.283]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


33.79 [0.138, 0.156]
('None', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


36.27 [0.144, 0.255]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


31.23 [0.128, 0.239]

10
wicket-1.5.3.csv noise:0.064, imb:26.720,93,2485, Shape:(2578, 65)
('smote', 'dt')
21.05 [0.117, 0.153]
('smote', 'lr')
22.07 [0.16, 0.145]
('smote', 'nb')
17.85 [0.209, 0.345]
('smote', 'svm')
81.23 [0.163, 0.131]
('smote', 'knn')
20.09 [0.154, 0.25]
('smote', 'rf')
25.44 [0.144, 0.138]
('rus', 'dt')
8.82 [0.199, 0.431]
('rus', 'lr')
9.78 [0.205, 0.178]
('rus', 'nb')
9.03 [0.216, 0.389]
('rus', 'svm')
9.3 [0.217, 0.154]
('rus', 'knn')
9.88 [0.195, 0.338]
('rus', 'rf')
11.29 [0.213, 0.252]
('wilson', 'dt')
11.07 [0.156, 0.212]
('wilson', 'lr')
13.34 [0.126, 0.196]
('wilson', 'nb')
10.74 [0.224, 0.291]
('wilson', 'svm')
13.85 [0.133, 0.144]
('wilson', 'knn')
11.73 [0.102, 0.15]
('wilson', 'rf')
12.94 [0.112, 0.186]
('tomek', 'dt')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.06 [0.096, 0.229]
('tomek', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.26 [0.084, 0.177]
('tomek', 'nb')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


10.06 [0.162, 0.269]
('tomek', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


12.97 [0.099, 0.121]
('tomek', 'knn')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.56 [0.094, 0.136]
('tomek', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


11.86 [0.105, 0.156]
('None', 'dt')
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
7.75 [0.078, 0.349]
('None', 'lr')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


7.3 [0.063, 0.171]
('None', 'nb')
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
8.19 [0.13, 0.291]
('None', 'svm')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


6.94 [0.039, 0.107]
('None', 'knn')
ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out
8.03 [0.033, 0.153]
('None', 'rf')


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


ERROR!!! Pos class completely filtered out


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


8.14 [0.054, 0.149]

11
lucene-3.1.csv noise:0.120, imb:7.477,331,2475, Shape:(2806, 65)
('smote', 'dt')
19.92 [0.148, 0.312]
('smote', 'lr')
24.55 [0.153, 0.232]
('smote', 'nb')
17.8 [0.179, 0.274]
('smote', 'svm')
70.69 [0.142, 0.129]
('smote', 'knn')
