In [None]:
!pip install xlrd
!pip install openpyxl

In [4]:
#import dataiku
#from dataiku import pandasutils as pdu
import pandas as pd
import time

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn import svm, tree, linear_model, neighbors, naive_bayes, ensemble, discriminant_analysis, gaussian_process
from xgboost import XGBClassifier

class Classifier_comparision:
    def __init__(self, X, y, test_size, train_size, stratified = True):
        self.X = X
        self.y = y
        self.test_size = test_size
        self.train_size = train_size
        self.stratified = stratified
        
    def make_result(self):
        #Machine Learning Algorithm (MLA) Selection and Initialization
        MLA = [
            #Ensemble Methods
            ensemble.AdaBoostClassifier(),
            ensemble.BaggingClassifier(),
            ensemble.ExtraTreesClassifier(),
            ensemble.GradientBoostingClassifier(),
            ensemble.RandomForestClassifier(),

            #Gaussian Processes
            gaussian_process.GaussianProcessClassifier(),

            #GLM
            linear_model.LogisticRegressionCV(),
            linear_model.PassiveAggressiveClassifier(),
            linear_model.RidgeClassifierCV(),
            linear_model.SGDClassifier(),
            linear_model.Perceptron(),

            #Navies Bayes
            naive_bayes.BernoulliNB(),
            naive_bayes.GaussianNB(),

            #Nearest Neighbor
            neighbors.KNeighborsClassifier(),

            #SVM
            svm.SVC(probability=True, kernel = 'linear', C = 1, gamma = 1),
            svm.SVC(probability=True, kernel = 'poly', C = 1, gamma = 1),
            svm.SVC(probability=True, kernel = 'rbf', C = 1, gamma = 1),
            svm.SVC(probability=True, kernel = 'sigmoid'),
            svm.NuSVC(probability=True),
            svm.LinearSVC(),

            #Trees    
            tree.DecisionTreeClassifier(),
            tree.ExtraTreeClassifier(),

            #Discriminant Analysis
            discriminant_analysis.LinearDiscriminantAnalysis(),
            discriminant_analysis.QuadraticDiscriminantAnalysis(),

            #xgboost: http://xgboost.readthedocs.io/en/latest/model.html
            XGBClassifier(eval_metric='mlogloss')    
            ]

        from sklearn.model_selection import train_test_split, ShuffleSplit, StratifiedShuffleSplit
        # prepare
        cv_split_1 = ShuffleSplit(n_splits = 10, test_size = .25, train_size = .55, random_state = 33)        
        cv_split_2 = StratifiedShuffleSplit(n_splits = 10, test_size = .25, train_size = .55, random_state = 33)
        # pre-test
        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, 
                                                            test_size = 0.25, random_state = 33)
        
        from sklearn import model_selection
        MLA_model_name = []
        run_times = []
        train_acc = []
        test_acc = []
        test_rep = []
        test_mat = []
        best_params = []

        for idx, alg in enumerate(MLA):
            t0 = time.time()
            #set name and parameters
            MLA_model_name.append(alg.__class__.__name__)    
            best_params.append(str(alg.get_params()))

            #score model with cross validation:
            if self.stratified == 1:
                cv_results = model_selection.cross_validate(alg, self.X, self.y, cv = cv_split_1, return_train_score = True)
            else:
                cv_results = model_selection.cross_validate(alg, self.X, self.y, cv = cv_split_2, return_train_score = True)
                
            train_acc.append(cv_results['train_score'].mean())
            test_acc.append(cv_results['test_score'].mean())
            run_times.append(time.time() - t0)
            
            # pre-test
            alg.fit(X_train, y_train)

            y_pr_tr = alg.predict(X_train)
            y_pr_tt = alg.predict(X_test)
            test_mat.append(confusion_matrix(y_test, y_pr_tt))
            test_rep.append(classification_report(y_test, y_pr_tt))
            
        import pandas as pd
        df = pd.DataFrame({"MLA_model_name": MLA_model_name,
                           "best_params": best_params,
                           "train_acc_kfold": train_acc,
                           "test_acc_kfold": test_acc,
                           "run_time": run_times,
                           "conf_test_matrix": test_mat,
                           "clf_test_report": test_rep
                          })
        return df.sort_values(by=['test_acc_kfold', 'train_acc_kfold'], ascending=False)

In [6]:
#mydataset = dataiku.Dataset("SBN")
#mydataset_df = mydataset.get_dataframe()
mydataset_df = pd.read_excel(r"../input/nhandv6/Swiss Bank Notes.xlsx", sheet_name = 'Sheet1')
mydataset_df.head()

Unnamed: 0,Length,Height (left),Height (right),Inner Frame (lower),Inner Frame (upper),Diagonal
0,214.8,131.0,131.1,9.0,9.7,141.0
1,214.6,129.7,129.7,8.1,9.5,141.7
2,214.8,129.7,129.7,8.7,9.6,142.2
3,214.8,129.7,129.6,7.5,10.4,142.0
4,215.0,129.6,129.7,10.4,7.7,141.8


In [7]:
SWB_df = mydataset_df.copy()
SWB_df['target'] = 100*[1] + 100*[0]
SWB_df.head()

Unnamed: 0,Length,Height (left),Height (right),Inner Frame (lower),Inner Frame (upper),Diagonal,target
0,214.8,131.0,131.1,9.0,9.7,141.0,1
1,214.6,129.7,129.7,8.1,9.5,141.7,1
2,214.8,129.7,129.7,8.7,9.6,142.2,1
3,214.8,129.7,129.6,7.5,10.4,142.0,1
4,215.0,129.6,129.7,10.4,7.7,141.8,1


In [8]:
import warnings

warnings.filterwarnings("ignore")
warnings.filterwarnings('ignore', category = UserWarning)
warnings.filterwarnings('ignore', category = Warning)

X = SWB_df.iloc[:, :-1]
y = SWB_df.iloc[:, -1]
cc = Classifier_comparision(X, y, test_size = 0.25, train_size = 0.6)
%time res = cc.make_result()
res

CPU times: user 13.5 s, sys: 827 ms, total: 14.3 s
Wall time: 12.2 s


Unnamed: 0,MLA_model_name,best_params,train_acc_kfold,test_acc_kfold,run_time,conf_test_matrix,clf_test_report
2,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",1.0,1.0,1.512226,"[[25, 0], [0, 25]]",precision recall f1-score ...
4,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",1.0,1.0,2.078298,"[[24, 1], [0, 25]]",precision recall f1-score ...
9,SGDClassifier,"{'alpha': 0.0001, 'average': False, 'class_wei...",1.0,1.0,0.071239,"[[25, 0], [0, 25]]",precision recall f1-score ...
13,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",0.998182,1.0,0.160594,"[[25, 0], [0, 25]]",precision recall f1-score ...
18,NuSVC,"{'break_ties': False, 'cache_size': 200, 'clas...",0.996364,1.0,0.10656,"[[25, 0], [0, 25]]",precision recall f1-score ...
5,GaussianProcessClassifier,"{'copy_X_train': True, 'kernel': None, 'max_it...",0.995455,1.0,0.262364,"[[25, 0], [0, 25]]",precision recall f1-score ...
14,SVC,"{'C': 1, 'break_ties': False, 'cache_size': 20...",0.995455,1.0,0.082308,"[[25, 0], [0, 25]]",precision recall f1-score ...
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...",0.994545,1.0,0.104718,"[[25, 0], [0, 25]]",precision recall f1-score ...
22,LinearDiscriminantAnalysis,"{'covariance_estimator': None, 'n_components':...",0.994545,1.0,0.078689,"[[25, 0], [0, 25]]",precision recall f1-score ...
23,QuadraticDiscriminantAnalysis,"{'priors': None, 'reg_param': 0.0, 'store_cova...",0.994545,1.0,0.086085,"[[25, 0], [0, 25]]",precision recall f1-score ...


In [9]:
print(res['clf_test_report'][0])
print(res['conf_test_matrix'][0])

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       1.00      1.00      1.00        25

    accuracy                           1.00        50
   macro avg       1.00      1.00      1.00        50
weighted avg       1.00      1.00      1.00        50

[[25  0]
 [ 0 25]]


In [10]:
from sklearn.decomposition import PCA
rate = 0.95
pca = PCA(rate)
X_pca = pca.fit_transform(X)
n1 = X.shape[1]
n2 = X_pca.shape[1]
print("{} is the number of dimension that keep {}% information from the original dataset {}."
      .format(n2, int(rate*100), n1))

# Evaluate
cc_pca = Classifier_comparision(X_pca, y, test_size = 0.25, train_size = 0.6)
%time cc_pca.make_result()

4 is the number of dimension that keep 95% information from the original dataset 6.
CPU times: user 9.11 s, sys: 617 ms, total: 9.72 s
Wall time: 7.93 s


Unnamed: 0,MLA_model_name,best_params,train_acc_kfold,test_acc_kfold,run_time,conf_test_matrix,clf_test_report
2,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",1.0,1.0,1.466029,"[[25, 0], [0, 25]]",precision recall f1-score ...
19,LinearSVC,"{'C': 1.0, 'class_weight': None, 'dual': True,...",0.999091,1.0,0.020681,"[[25, 0], [0, 25]]",precision recall f1-score ...
13,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",0.998182,1.0,0.104276,"[[25, 0], [0, 25]]",precision recall f1-score ...
5,GaussianProcessClassifier,"{'copy_X_train': True, 'kernel': None, 'max_it...",0.995455,1.0,0.15442,"[[25, 0], [0, 25]]",precision recall f1-score ...
14,SVC,"{'C': 1, 'break_ties': False, 'cache_size': 20...",0.995455,1.0,0.029946,"[[25, 0], [0, 25]]",precision recall f1-score ...
17,SVC,"{'C': 1.0, 'break_ties': False, 'cache_size': ...",0.995455,1.0,0.031741,"[[25, 0], [1, 24]]",precision recall f1-score ...
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...",0.994545,1.0,0.040701,"[[25, 0], [0, 25]]",precision recall f1-score ...
22,LinearDiscriminantAnalysis,"{'covariance_estimator': None, 'n_components':...",0.994545,1.0,0.02585,"[[25, 0], [0, 25]]",precision recall f1-score ...
23,QuadraticDiscriminantAnalysis,"{'priors': None, 'reg_param': 0.0, 'store_cova...",0.994545,1.0,0.023332,"[[25, 0], [0, 25]]",precision recall f1-score ...
4,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",1.0,0.998,2.041246,"[[24, 1], [0, 25]]",precision recall f1-score ...


In [11]:
import seaborn as sns
iris = sns.load_dataset("iris")
X = iris.iloc[:, :-1]
y = iris.iloc[:, -1]

cc = Classifier_comparision(X, y, test_size = 0.25, train_size = 0.6)
%time cc.make_result()

CPU times: user 25.2 s, sys: 3.05 s, total: 28.3 s
Wall time: 22.8 s


Unnamed: 0,MLA_model_name,best_params,train_acc_kfold,test_acc_kfold,run_time,conf_test_matrix,clf_test_report
14,SVC,"{'C': 1, 'break_ties': False, 'cache_size': 20...",0.984146,0.978947,0.079365,"[[13, 0, 0], [0, 12, 1], [0, 0, 12]]",precision recall f1-score ...
22,LinearDiscriminantAnalysis,"{'covariance_estimator': None, 'n_components':...",0.981707,0.978947,0.079049,"[[13, 0, 0], [0, 12, 1], [0, 0, 12]]",precision recall f1-score ...
16,SVC,"{'C': 1, 'break_ties': False, 'cache_size': 20...",0.987805,0.976316,0.090394,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
23,QuadraticDiscriminantAnalysis,"{'priors': None, 'reg_param': 0.0, 'store_cova...",0.985366,0.976316,0.076614,"[[13, 0, 0], [0, 12, 1], [0, 0, 12]]",precision recall f1-score ...
13,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",0.976829,0.963158,0.145267,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
12,GaussianNB,"{'priors': None, 'var_smoothing': 1e-09}",0.960976,0.957895,0.079128,"[[13, 0, 0], [0, 13, 0], [0, 1, 11]]",precision recall f1-score ...
5,GaussianProcessClassifier,"{'copy_X_train': True, 'kernel': None, 'max_it...",0.971951,0.957895,1.417088,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
2,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",1.0,0.955263,1.520717,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...",0.995122,0.955263,0.355397,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
6,LogisticRegressionCV,"{'Cs': 10, 'class_weight': None, 'cv': None, '...",0.979268,0.955263,9.195157,"[[13, 0, 0], [0, 12, 1], [0, 0, 12]]",precision recall f1-score ...


In [12]:
iris_bin = pd.DataFrame({})
for idx, fea_name in enumerate(iris.columns[: - 1]):
    iris_bin[fea_name] = pd.qcut(iris.iloc[:, idx], 3, labels = False)
X_bins = iris_bin.to_numpy()

cc2 = Classifier_comparision(X_bins, y, test_size = 0.25, train_size = 0.6)
%time res = cc2.make_result()
res[res.MLA_model_name == 'BernoulliNB']

CPU times: user 16.8 s, sys: 1.3 s, total: 18.1 s
Wall time: 14.8 s


Unnamed: 0,MLA_model_name,best_params,train_acc_kfold,test_acc_kfold,run_time,conf_test_matrix,clf_test_report
11,BernoulliNB,"{'alpha': 1.0, 'binarize': 0.0, 'class_prior':...",0.776829,0.734211,0.030183,"[[0, 0, 13], [0, 0, 13], [0, 0, 12]]",precision recall f1-score ...


In [13]:
rate = 0.99
pca = PCA(rate)
X = iris.iloc[:, :-1]
y = iris.iloc[:, -1]
X_pca = pca.fit_transform(X)
n1 = X.shape[1]
n2 = X_pca.shape[1]
print("{} is the number of dimension that keep {}% information from the original dataset {}."
      .format(n2, int(rate*100), n1))
print(25*"=")
cc_pca = Classifier_comparision(X_pca, y, test_size = 0.25, train_size = 0.6)
%time cc_pca.make_result()

3 is the number of dimension that keep 99% information from the original dataset 4.
CPU times: user 16.4 s, sys: 1.27 s, total: 17.6 s
Wall time: 14.4 s


Unnamed: 0,MLA_model_name,best_params,train_acc_kfold,test_acc_kfold,run_time,conf_test_matrix,clf_test_report
22,LinearDiscriminantAnalysis,"{'covariance_estimator': None, 'n_components':...",0.985366,0.989474,0.028625,"[[13, 0, 0], [0, 12, 1], [0, 0, 12]]",precision recall f1-score ...
14,SVC,"{'C': 1, 'break_ties': False, 'cache_size': 20...",0.984146,0.981579,0.033138,"[[13, 0, 0], [0, 12, 1], [0, 0, 12]]",precision recall f1-score ...
16,SVC,"{'C': 1, 'break_ties': False, 'cache_size': 20...",0.986585,0.973684,0.045023,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
23,QuadraticDiscriminantAnalysis,"{'priors': None, 'reg_param': 0.0, 'store_cova...",0.979268,0.965789,0.026767,"[[13, 0, 0], [0, 12, 1], [0, 0, 12]]",precision recall f1-score ...
6,LogisticRegressionCV,"{'Cs': 10, 'class_weight': None, 'cv': None, '...",0.979268,0.960526,3.177977,"[[13, 0, 0], [0, 12, 1], [0, 0, 12]]",precision recall f1-score ...
13,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",0.97439,0.960526,0.090272,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
5,GaussianProcessClassifier,"{'copy_X_train': True, 'kernel': None, 'max_it...",0.971951,0.957895,0.411074,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
15,SVC,"{'C': 1, 'break_ties': False, 'cache_size': 20...",0.984146,0.952632,0.031242,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
18,NuSVC,"{'break_ties': False, 'cache_size': 200, 'clas...",0.963415,0.952632,0.053935,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...
24,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...",1.0,0.944737,0.520984,"[[13, 0, 0], [0, 12, 1], [0, 1, 11]]",precision recall f1-score ...


In [14]:
ttn_df = sns.load_dataset("titanic")
ttn_df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [22]:
ttn_dum2 = ttn_df.drop(columns = ['alive'])
#ttn_dum2['age'] = pd.qcut(ttn_dum2['age'], 4, labels=["children", "teen", "alduts", "old"])
#ttn_dum2['fare'] = pd.qcut(ttn_dum2['fare'], 3, labels=["cheap", "medium", "expensive"])
ttn_dum2 = pd.get_dummies(ttn_dum2)
ttn_dum2 = ttn_dum2.astype('float64')
ttn_dum2 = ttn_dum2.fillna(0)

In [None]:
X = ttn_dum2.iloc[:, 1:]
y = ttn_dum2.iloc[:, 0]
cc = Classifier_comparision(X, y, test_size = 0.25, train_size = 0.6)
%time cc.make_result()

#### 4. For `wine` dataset

In [19]:
from sklearn import datasets
#Load dataset
wine = datasets.load_wine()
X = wine['data']
y = wine['target']

cc2 = Classifier_comparision(X, y, test_size = 0.25, train_size = 0.6)
%time res = cc2.make_result()
res

CPU times: user 36.6 s, sys: 1.19 s, total: 37.8 s
Wall time: 34.6 s


Unnamed: 0,MLA_model_name,best_params,train_acc_kfold,test_acc_kfold,run_time,conf_test_matrix,clf_test_report
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...",0.997938,0.977778,0.037742,"[[15, 0, 0], [0, 17, 1], [0, 0, 12]]",precision recall f1-score ...
2,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",1.0,0.975556,1.491599,"[[15, 0, 0], [0, 18, 0], [0, 0, 12]]",precision recall f1-score ...
22,LinearDiscriminantAnalysis,"{'covariance_estimator': None, 'n_components':...",0.998969,0.973333,0.023303,"[[15, 0, 0], [0, 18, 0], [0, 0, 12]]",precision recall f1-score ...
4,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",1.0,0.966667,2.033749,"[[15, 0, 0], [0, 18, 0], [0, 0, 12]]",precision recall f1-score ...
23,QuadraticDiscriminantAnalysis,"{'priors': None, 'reg_param': 0.0, 'store_cova...",1.0,0.966667,0.023784,"[[15, 0, 0], [0, 18, 0], [0, 1, 11]]",precision recall f1-score ...
12,GaussianNB,"{'priors': None, 'var_smoothing': 1e-09}",0.989691,0.962222,0.02254,"[[15, 0, 0], [0, 18, 0], [0, 0, 12]]",precision recall f1-score ...
24,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...",1.0,0.946667,0.501597,"[[15, 0, 0], [0, 18, 0], [0, 0, 12]]",precision recall f1-score ...
6,LogisticRegressionCV,"{'Cs': 10, 'class_weight': None, 'cv': None, '...",0.993814,0.937778,14.155421,"[[15, 0, 0], [0, 18, 0], [0, 0, 12]]",precision recall f1-score ...
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...",0.997938,0.935556,0.40641,"[[15, 0, 0], [0, 18, 0], [0, 0, 12]]",precision recall f1-score ...
15,SVC,"{'C': 1, 'break_ties': False, 'cache_size': 20...",1.0,0.928889,3.379251,"[[15, 0, 0], [0, 18, 0], [0, 0, 12]]",precision recall f1-score ...


#### 5. Spam detection

In [None]:
#mydataset = dataiku.Dataset("spam")
#spam_df = mydataset.get_dataframe()
spam_df = pd.read_csv(r"../input/nhandv6/spam.csv", usecols = ['text_mes', 'target'], encoding='ISO-8859-1')
spam_df = spam_df.iloc[1:, :2]
spam_df.columns = ['target', 'text_mes']
spam_df.head()

In [None]:
spam_sumr_ = pd.DataFrame({})
spam_df = spam_df.drop_duplicates()
spam_df.shape

In [None]:
from wordcloud import STOPWORDS
import string

spam_sumr_['total_words'] = spam_df.text_mes.apply(lambda x: len(x.split()))
spam_sumr_['total_unique_words'] = spam_df.text_mes.apply(lambda x: len(set(x.split())))
spam_sumr_['char_count'] = spam_df.text_mes.apply(lambda x: len(x))
spam_sumr_['average_words'] = (spam_sumr_.char_count / spam_sumr_.total_words).round(2)
spam_sumr_['count_stopwords'] = spam_df.text_mes.apply(lambda x: len([w for w in str(x).lower().split() if w in STOPWORDS]))
spam_sumr_['count_punct'] = spam_df.text_mes.apply(lambda x: len([w for w in str(x) if w in string.punctuation]))
spam_sumr_['count_hashtag#'] = spam_df.text_mes.apply(lambda x: x.count('#'))
spam_sumr_['count_fb.tag@'] = spam_df.text_mes.apply(lambda x: x.count('@'))
spam_sumr_['count_url'] = spam_df.text_mes.apply(lambda x: len([w for w in str(x).lower() if 'http' in w or 'https' in w or 'www' in w]))

spam_sumr_.head()

In [None]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

text_process = CountVectorizer().fit_transform(spam_df['text_mes'])
tfidf_vect = TfidfVectorizer()
tfidf_X = tfidf_vect.fit_transform(spam_df['text_mes'])
tfidf_X.shape

In [None]:
cate_X = spam_sumr_.iloc[:, ]

In [None]:
X = tfidf_X.toarray()
y = spam_df['target']

cc2 = Classifier_comparision(X, y, test_size = 0.25, train_size = 0.6)
%time res = cc2.make_result()
res