In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from matplotlib import pyplot as plt
from sklearn.svm import SVC
import numpy as np
import operator
%matplotlib inline

In [2]:
def CrossValidationGridSearchNested(X_data, Y_data, num_trials, fold_num, est_classifcation, tuned_param, scoring):
    max_score = -1
    best_estimator = est_classifcation
    is_tuned_param_empty = (tuned_param == []) | (tuned_param == None)
    
    for i in range(num_trials):
        inner_cv = StratifiedKFold(n_splits=fold_num, random_state=i, shuffle=True)
        outer_cv = StratifiedKFold(n_splits=fold_num, random_state=i+1, shuffle=True)
        
        if(is_tuned_param_empty):
            param_score = cross_val_score(est_classifcation, X=X_data, y=Y_data, cv=outer_cv, scoring=scoring).mean()
        else:
            # Non_nested parameter search and scoring
            clf = GridSearchCV(estimator=est_classifcation, param_grid=tuned_param, cv=inner_cv, scoring=scoring)
            clf.fit(X_data, Y_data)
        
            # CV with parameter optimization
            param_score = cross_val_score(clf.best_estimator_, X=X_data, y=Y_data, cv=outer_cv, scoring=scoring).mean()
            
        if(param_score > max_score):
            max_score = param_score
            if(is_tuned_param_empty):
                best_estimator = est_classifcation
            else:
                best_estimator = clf.best_estimator_
            
        progress = (i+1)/num_trials*100
        print(f'> progress = {progress}%')
    
    return (max_score, best_estimator)

In [3]:
data = load_breast_cancer()
X = data.data
Y = data.target
print(f'dimension of X = {len(X[0])}')
print(f'number of samples of X = {len(X)}')

dimension of X = 30
number of samples of X = 569


In [60]:
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [253]:
Y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [32]:
X.shape

(569, 30)

In [51]:
len(Y.reshape(len(Y), 1)[0])

1

In [54]:
org_columns = ["col_org_"+str(x) for x in range(0, len(X[0]), 1)]
original_df_X = pd.DataFrame(data = X, columns = org_columns)
original_df_X

Unnamed: 0,col_org_0,col_org_1,col_org_2,col_org_3,col_org_4,col_org_5,col_org_6,col_org_7,col_org_8,col_org_9,...,col_org_20,col_org_21,col_org_22,col_org_23,col_org_24,col_org_25,col_org_26,col_org_27,col_org_28,col_org_29
0,17.990,10.38,122.80,1001.0,0.11840,0.27760,0.300100,0.147100,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.71190,0.26540,0.4601,0.11890
1,20.570,17.77,132.90,1326.0,0.08474,0.07864,0.086900,0.070170,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.24160,0.18600,0.2750,0.08902
2,19.690,21.25,130.00,1203.0,0.10960,0.15990,0.197400,0.127900,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.45040,0.24300,0.3613,0.08758
3,11.420,20.38,77.58,386.1,0.14250,0.28390,0.241400,0.105200,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.68690,0.25750,0.6638,0.17300
4,20.290,14.34,135.10,1297.0,0.10030,0.13280,0.198000,0.104300,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.40000,0.16250,0.2364,0.07678
5,12.450,15.70,82.57,477.1,0.12780,0.17000,0.157800,0.080890,0.2087,0.07613,...,15.470,23.75,103.40,741.6,0.17910,0.52490,0.53550,0.17410,0.3985,0.12440
6,18.250,19.98,119.60,1040.0,0.09463,0.10900,0.112700,0.074000,0.1794,0.05742,...,22.880,27.66,153.20,1606.0,0.14420,0.25760,0.37840,0.19320,0.3063,0.08368
7,13.710,20.83,90.20,577.9,0.11890,0.16450,0.093660,0.059850,0.2196,0.07451,...,17.060,28.14,110.60,897.0,0.16540,0.36820,0.26780,0.15560,0.3196,0.11510
8,13.000,21.82,87.50,519.8,0.12730,0.19320,0.185900,0.093530,0.2350,0.07389,...,15.490,30.73,106.20,739.3,0.17030,0.54010,0.53900,0.20600,0.4378,0.10720
9,12.460,24.04,83.97,475.9,0.11860,0.23960,0.227300,0.085430,0.2030,0.08243,...,15.090,40.68,97.65,711.4,0.18530,1.05800,1.10500,0.22100,0.4366,0.20750


In [55]:
org_columns = ["Label"+str(x) for x in range(0, len(Y.reshape(len(Y), 1)[0]), 1)]
original_df_Y = pd.DataFrame(data = Y.reshape(len(Y), 1), columns = org_columns)
original_df_Y

Unnamed: 0,Label0
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0


In [5]:
# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

# Number of random trials
NUM_TRIALS = 30

# We will use a Support Vector Classifier with "rbf" kernel
svm = SVC()

(max_score, svm_best_estimator) = CrossValidationGridSearchNested(X, Y, NUM_TRIALS, 10, svm, tuned_parameters, 'roc_auc')
svm_best_parameter = svm_best_estimator.get_params()

print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {svm_best_estimator}\n')
print(f'\nbest_parameter = {svm_best_parameter}\n')

> progress = 3.3333333333333335%
> progress = 6.666666666666667%
> progress = 10.0%
> progress = 13.333333333333334%
> progress = 16.666666666666664%
> progress = 20.0%
> progress = 23.333333333333332%
> progress = 26.666666666666668%
> progress = 30.0%
> progress = 33.33333333333333%
> progress = 36.666666666666664%
> progress = 40.0%
> progress = 43.333333333333336%
> progress = 46.666666666666664%
> progress = 50.0%
> progress = 53.333333333333336%
> progress = 56.666666666666664%
> progress = 60.0%
> progress = 63.33333333333333%
> progress = 66.66666666666666%
> progress = 70.0%
> progress = 73.33333333333333%
> progress = 76.66666666666667%
> progress = 80.0%
> progress = 83.33333333333334%
> progress = 86.66666666666667%
> progress = 90.0%
> progress = 93.33333333333333%
> progress = 96.66666666666667%
> progress = 100.0%

max_score = 0.9936468425754139


best_estimator = SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='au

In [6]:
svm = SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [7]:
from sklearn.naive_bayes import GaussianNB

# Set the parameters by cross-validation
tuned_parameters = []

# Number of random trials
NUM_TRIALS = 30

clf = GaussianNB()

(max_score, gau_nb_best_estimator) = CrossValidationGridSearchNested(X, Y, NUM_TRIALS, 10, clf, tuned_parameters, 'roc_auc')
gau_nb_best_parameter = gau_nb_best_estimator.get_params()

print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {gau_nb_best_estimator}\n')
print(f'\nbest_parameter = {gau_nb_best_parameter}\n')

> progress = 3.3333333333333335%
> progress = 6.666666666666667%
> progress = 10.0%
> progress = 13.333333333333334%
> progress = 16.666666666666664%
> progress = 20.0%
> progress = 23.333333333333332%
> progress = 26.666666666666668%
> progress = 30.0%
> progress = 33.33333333333333%
> progress = 36.666666666666664%
> progress = 40.0%
> progress = 43.333333333333336%
> progress = 46.666666666666664%
> progress = 50.0%
> progress = 53.333333333333336%
> progress = 56.666666666666664%
> progress = 60.0%
> progress = 63.33333333333333%
> progress = 66.66666666666666%
> progress = 70.0%
> progress = 73.33333333333333%
> progress = 76.66666666666667%
> progress = 80.0%
> progress = 83.33333333333334%
> progress = 86.66666666666667%
> progress = 90.0%
> progress = 93.33333333333333%
> progress = 96.66666666666667%
> progress = 100.0%

max_score = 0.989685975400261


best_estimator = GaussianNB(priors=None)


best_parameter = {'priors': None}



In [8]:
from sklearn.naive_bayes import MultinomialNB

# Set the parameters by cross-validation
tuned_parameters = [{'alpha': [10**x for x in range(1, 15+1, 1)]}]

# Number of random trials
NUM_TRIALS = 30

clf = MultinomialNB()

(max_score, mul_nb_best_estimator) = CrossValidationGridSearchNested(X, Y, NUM_TRIALS, 10, clf, tuned_parameters, 'roc_auc')
mul_nb_best_parameter = mul_nb_best_estimator.get_params()

print(f'\nmax_score = {max_score}\n')
print(f'\nbest_estimator = {mul_nb_best_estimator}\n')
print(f'\nbest_parameter = {mul_nb_best_parameter}\n')

> progress = 3.3333333333333335%
> progress = 6.666666666666667%
> progress = 10.0%
> progress = 13.333333333333334%
> progress = 16.666666666666664%
> progress = 20.0%
> progress = 23.333333333333332%
> progress = 26.666666666666668%
> progress = 30.0%
> progress = 33.33333333333333%
> progress = 36.666666666666664%
> progress = 40.0%
> progress = 43.333333333333336%
> progress = 46.666666666666664%
> progress = 50.0%
> progress = 53.333333333333336%
> progress = 56.666666666666664%
> progress = 60.0%
> progress = 63.33333333333333%
> progress = 66.66666666666666%
> progress = 70.0%
> progress = 73.33333333333333%
> progress = 76.66666666666667%
> progress = 80.0%
> progress = 83.33333333333334%
> progress = 86.66666666666667%
> progress = 90.0%
> progress = 93.33333333333333%
> progress = 96.66666666666667%
> progress = 100.0%

max_score = 0.9679425204425206


best_estimator = MultinomialNB(alpha=10000000, class_prior=None, fit_prior=True)


best_parameter = {'alpha': 10000000, 'clas

In [57]:
def StackingMethod(origin_df_X, origin_df_Y, kfold, **all_basic_classifiers):
    for k, v in all_basic_classifiers.items():
        print(f'key = {k}, val = {v}')

In [58]:
StackingMethod(original_df_X, original_df_Y, 10, svm_classifier=svm, naive_bayse_gaussian_classifier=gau_nb_best_estimator)

key = svm_classifier, val = SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
key = naive_bayse_gaussian_classifier, val = GaussianNB(priors=None)


In [59]:
all_classifier = {'svm_classifier':svm, 'naive_bayse_gaussian_classifier':gau_nb_best_estimator}
StackingMethod(original_df_X, original_df_Y, 10, **all_classifier)

key = svm_classifier, val = SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
key = naive_bayse_gaussian_classifier, val = GaussianNB(priors=None)


In [107]:
X_train = original_df_X.values
Y_train = original_df_Y.values
#Y_train.ravel()
Y_train = Y_train.ravel()
Y_train.shape
Y_train

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [189]:
len(Y_train)

569

In [108]:
X_train.shape
X_train

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [135]:
one_count = list(Y_train).count(1)
zero_count = list(Y_train).count(0)
print(f'1 num = {one_count}')
print(f'0 num = {zero_count}')

1 num = 357
0 num = 212


In [134]:
X_train[0]

array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
       3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
       8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
       3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
       1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])

In [132]:
skf = StratifiedKFold(n_splits=10, random_state=300, shuffle=True)
iteration = 0
for train_index, test_index in skf.split(X_train, Y_train):
    X_cv_train = X_train[train_index]
    Y_cv_train = Y_train[train_index]
    X_cv_test  = X_train[test_index]
    Y_cv_test  = Y_train[test_index]
    
    
    
    print(f'X_cv_train = {X_cv_train}')
    print('-----------')
    print(f'Y_cv_train = {Y_cv_train}')
    print('-----------')
    print(f'X_cv_test = {X_cv_test}')
    print('-----------')
    print(f'Y_cv_test = {Y_cv_test}')
    print('-----------')
    print('{} {} {}'.format(iteration, train_index, test_index))
    one_count = Y_cv_test.tolist().count(1)
    zero_count = Y_cv_test.tolist().count(0)
    print(f'1 num = {one_count}')
    print(f'0 num = {zero_count}')
    iteration += 1


X_cv_train = [[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
-----------
Y_cv_train = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 0 0 1 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0
 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 1 0 1 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0
 1 0 0 1 0 0 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 1 1 0
 1 1 1 1 0 1 1 0 0 0 1 0 1 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 1 0 1 0 1 1 0 1
 0 0 0 0 1 1 0 0 1 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1
 1 1 1 1 1 1 1 1 0 1 1 0 0 1 

In [248]:
import random
from datetime import datetime
import sys

def StackingMethod(origin_df_X, origin_df_Y, kfold, **all_basic_classifiers):
    X_train = original_df_X.values
    Y_train = original_df_Y.values.ravel()
    random.seed(datetime.now())
    skf = StratifiedKFold(n_splits=10, random_state=random.randint(0, 2**32-1), shuffle=True)
    iteration = 0
    len_y = 0
    new_feature_columns = ['Label_'+x for x in all_basic_classifiers.keys()]
    new_feature_arr     = np.zeros([len(X_train), len(new_feature_columns)])
    
    for train_index, test_index in skf.split(X_train, Y_train):
        X_cv_train = X_train[train_index]
        Y_cv_train = Y_train[train_index]
        X_cv_test  = X_train[test_index]
        Y_cv_test  = Y_train[test_index]
        column_label_index = 0
        
        print(f"-----iteration {iteration}-----")
        print(f'test_index = {test_index}')
        for k, v in all_basic_classifiers.items():
            print(f'key = {k}, val = {v}')
            classifier_cv = v
            classifier_cv.fit(X_cv_train, Y_cv_train)
            Y_cv_test_result = classifier_cv.predict(X_cv_test)
            count_result_index = 0
            for index in test_index:
                new_feature_arr[index][column_label_index] = Y_cv_test_result[count_result_index]
                count_result_index += 1
            
            print(f'Y_cv_test_result = {Y_cv_test_result}')
            print(f'len(Y_cv_test_result) = {len(Y_cv_test_result)}')
            len_y += len(Y_cv_test_result)
            print(type(Y_cv_test_result))
            column_label_index += 1
            print('-------')
        iteration += 1
        
    new_feature_df = pd.DataFrame(data = new_feature_arr, columns = new_feature_columns)
    output_df_all = pd.concat([origin_df_X, new_feature_df], axis=1, ignore_index=False)
    print(f'total len_y = {len_y}')
    print(f'new_feature_columns = {new_feature_columns}')
    count_index_fin = 0
    for x in new_feature_arr:
        print(f'index = {count_index_fin}, label = {x}')
        count_index_fin += 1
    return output_df_all

In [249]:
all_classifier = {'svm_classifier':svm, 'naive_bayse_gaussian_classifier':gau_nb_best_estimator}
new_df = StackingMethod(original_df_X, original_df_Y, 10, **all_classifier)

-----iteration 0-----
test_index = [ 37  39  57  68  82  92 115 118 122 129 156 158 160 184 185 199 202 206
 208 214 232 240 258 268 290 298 306 312 318 321 326 335 337 346 372 376
 382 394 409 415 416 422 439 440 446 451 457 458 465 482 494 503 516 528
 533 548 550 565]
key = svm_classifier, val = SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
Y_cv_test_result = [1 0 0 1 0 1 1 0 0 0 0 1 1 0 1 0 0 1 0 0 1 1 0 1 0 1 1 1 1 0 1 0 0 1 0 1 1
 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 0 1 1 0]
len(Y_cv_test_result) = 58
<class 'numpy.ndarray'>
-------
key = naive_bayse_gaussian_classifier, val = GaussianNB(priors=None)
Y_cv_test_result = [1 0 0 0 0 1 1 0 0 0 0 1 1 1 1 0 0 1 1 0 1 1 0 1 0 1 1 1 1 0 1 0 0 1 0 1 1
 1 1 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 0]
len(Y_cv_test_result) = 58
<class 'numpy.ndarray'>
-------
-----itera

Y_cv_test_result = [0 0 1 1 0 1 0 0 1 1 0 1 1 0 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1
 0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1]
len(Y_cv_test_result) = 56
<class 'numpy.ndarray'>
-------
key = naive_bayse_gaussian_classifier, val = GaussianNB(priors=None)
Y_cv_test_result = [0 0 1 1 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1
 0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1]
len(Y_cv_test_result) = 56
<class 'numpy.ndarray'>
-------
-----iteration 9-----
test_index = [  0   9  19  29  31  32  43  73  78  84  86 102 106 111 125 147 162 166
 176 186 190 191 200 218 222 227 244 247 256 277 286 307 309 322 338 342
 350 365 368 395 404 425 438 442 444 450 467 468 473 474 479 488 507 508
 515 532]
key = svm_classifier, val = SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
Y_cv_test_result = [0 0 1 

In [250]:
new_df['Label'] = original_df_Y['Label0']

In [251]:
new_df

Unnamed: 0,col_org_0,col_org_1,col_org_2,col_org_3,col_org_4,col_org_5,col_org_6,col_org_7,col_org_8,col_org_9,...,col_org_23,col_org_24,col_org_25,col_org_26,col_org_27,col_org_28,col_org_29,Label_svm_classifier,Label_naive_bayse_gaussian_classifier,Label
0,17.990,10.38,122.80,1001.0,0.11840,0.27760,0.300100,0.147100,0.2419,0.07871,...,2019.0,0.16220,0.66560,0.71190,0.26540,0.4601,0.11890,0.0,0.0,0
1,20.570,17.77,132.90,1326.0,0.08474,0.07864,0.086900,0.070170,0.1812,0.05667,...,1956.0,0.12380,0.18660,0.24160,0.18600,0.2750,0.08902,0.0,0.0,0
2,19.690,21.25,130.00,1203.0,0.10960,0.15990,0.197400,0.127900,0.2069,0.05999,...,1709.0,0.14440,0.42450,0.45040,0.24300,0.3613,0.08758,0.0,0.0,0
3,11.420,20.38,77.58,386.1,0.14250,0.28390,0.241400,0.105200,0.2597,0.09744,...,567.7,0.20980,0.86630,0.68690,0.25750,0.6638,0.17300,0.0,0.0,0
4,20.290,14.34,135.10,1297.0,0.10030,0.13280,0.198000,0.104300,0.1809,0.05883,...,1575.0,0.13740,0.20500,0.40000,0.16250,0.2364,0.07678,0.0,0.0,0
5,12.450,15.70,82.57,477.1,0.12780,0.17000,0.157800,0.080890,0.2087,0.07613,...,741.6,0.17910,0.52490,0.53550,0.17410,0.3985,0.12440,0.0,0.0,0
6,18.250,19.98,119.60,1040.0,0.09463,0.10900,0.112700,0.074000,0.1794,0.05742,...,1606.0,0.14420,0.25760,0.37840,0.19320,0.3063,0.08368,0.0,0.0,0
7,13.710,20.83,90.20,577.9,0.11890,0.16450,0.093660,0.059850,0.2196,0.07451,...,897.0,0.16540,0.36820,0.26780,0.15560,0.3196,0.11510,0.0,0.0,0
8,13.000,21.82,87.50,519.8,0.12730,0.19320,0.185900,0.093530,0.2350,0.07389,...,739.3,0.17030,0.54010,0.53900,0.20600,0.4378,0.10720,0.0,0.0,0
9,12.460,24.04,83.97,475.9,0.11860,0.23960,0.227300,0.085430,0.2030,0.08243,...,711.4,0.18530,1.05800,1.10500,0.22100,0.4366,0.20750,0.0,0.0,0


In [97]:
l = [x for x in range(1, 10, 1)]
l

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [99]:
g = [l for x in range(1, 10, 1)]
g

[[1, 2, 3, 4, 5, 6, 7, 8, 9],
 [1, 2, 3, 4, 5, 6, 7, 8, 9],
 [1, 2, 3, 4, 5, 6, 7, 8, 9],
 [1, 2, 3, 4, 5, 6, 7, 8, 9],
 [1, 2, 3, 4, 5, 6, 7, 8, 9],
 [1, 2, 3, 4, 5, 6, 7, 8, 9],
 [1, 2, 3, 4, 5, 6, 7, 8, 9],
 [1, 2, 3, 4, 5, 6, 7, 8, 9],
 [1, 2, 3, 4, 5, 6, 7, 8, 9]]

In [101]:
g_arr = np.asarray(g)
g_arr

array([[1, 2, 3, 4, 5, 6, 7, 8, 9],
       [1, 2, 3, 4, 5, 6, 7, 8, 9],
       [1, 2, 3, 4, 5, 6, 7, 8, 9],
       [1, 2, 3, 4, 5, 6, 7, 8, 9],
       [1, 2, 3, 4, 5, 6, 7, 8, 9],
       [1, 2, 3, 4, 5, 6, 7, 8, 9],
       [1, 2, 3, 4, 5, 6, 7, 8, 9],
       [1, 2, 3, 4, 5, 6, 7, 8, 9],
       [1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [204]:
a = np.zeros([2, 2])
a[0][0] = 100
a[1][1] = 20
a

array([[100.,   0.],
       [  0.,  20.]])

In [231]:
df1 = pd.DataFrame([['a', 1], ['b', 2]],columns=['letter', 'number'])
df2 = pd.DataFrame([['c', 3], ['d', 4]],columns=['letter_1', 'number_2'])

In [232]:
df1

Unnamed: 0,letter,number
0,a,1
1,b,2


In [233]:
df2

Unnamed: 0,letter_1,number_2
0,c,3
1,d,4


In [238]:
df3 = pd.concat([df1, df2], axis=1, ignore_index=False)

In [239]:
df3

Unnamed: 0,letter,number,letter_1,number_2
0,a,1,c,3
1,b,2,d,4
