# Support Vector Machines

In [1]:
import numpy as np                     # Llibreria matemÃ tica
import matplotlib.pyplot as plt        # Per mostrar plots
import sklearn                         # Llibreia de DM
import sklearn.datasets as ds            # Per carregar mÃ©s facilment el dataset digits
import sklearn.model_selection as cv    # Pel Cross-validation
import sklearn.neighbors as nb           # Per fer servir el knn
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import make_scorer
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np    # Numeric and matrix computation
import pandas as pd   # Optional: good package for manipulating data 
import sklearn as sk  # Package with learning algorithms implemented

# Loading the dataset.
df = pd.read_csv("Train.csv")
y=df['readmitted'].values
X=df.values[:,0:71].astype('float32')

Let's separate the data into training (for adjusting parameters), and validation. 

In [3]:
(X_train, X_test,  y_train, y_test) = cv.train_test_split(X, y, test_size=.3, stratify = y,random_state=1)

unique, counts = np.unique(y_train, return_counts=True)
dict(zip(unique, counts))

{0: 5428, 1: 42525}

Our dataset is entirely numerical, but values are not normalized. We proceed to do so:

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#scaler = StandardScaler().fit(X_train)
scaler = MinMaxScaler(feature_range=(-1, 1)).fit(X_train)

# Apply the normalization trained in training data in both training and test sets
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Our dataset has a lot of rows and is highly imbalanced, so we perform an undersampling of the train dataset but preserving the ratio of the classes:

In [5]:
ratio = counts[1]/counts[0]
num_samples_0 = 500;
num_samples_1 = int(num_samples_0*ratio)

from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(sampling_strategy={0:num_samples_0,1:num_samples_1},random_state=42)

splits = 5
X_train_splits = []
y_train_splits = []

for i in range(splits):
    X_train_r, y_train_r = rus.fit_resample(X_train, y_train)
    X_train_splits.append(X_train_r)
    y_train_splits.append(y_train_r)
    indices = rus.sample_indices_
    X_train = np.delete(X_train,indices,0)
    y_train = np.delete(y_train,indices,0)
    unique, counts = np.unique(y_train_r, return_counts=True)
    print("Split",i,dict(zip(unique, counts)))
    unique, counts = np.unique(y_train, return_counts=True)
    print("Remaining",dict(zip(unique, counts)))



Split 0 {0: 500, 1: 3917}
Remaining {0: 4928, 1: 38608}
Split 1 {0: 500, 1: 3917}
Remaining {0: 4428, 1: 34691}
Split 2 {0: 500, 1: 3917}
Remaining {0: 3928, 1: 30774}
Split 3 {0: 500, 1: 3917}
Remaining {0: 3428, 1: 26857}
Split 4 {0: 500, 1: 3917}
Remaining {0: 2928, 1: 22940}


## Voting classifier
Custom implementation of a voting classifier for fitted classifiers:

In [6]:
class VotingClassifier(object):
    """ Implements a voting classifier for pre-trained classifiers"""

    def __init__(self, estimators):
        self.estimators = estimators

    def predict(self, X):
        # get values
        Y = np.zeros([X.shape[0], len(self.estimators)], dtype=int)
        for i, clf in enumerate(self.estimators):
            Y[:, i] = clf.predict(X)
        # apply voting 
        y = np.zeros(X.shape[0])
        for i in range(X.shape[0]):
            y[i] = np.argmax(np.bincount(Y[i,:]))
        return y

## Linear SVM

Let's try an SVM with default parameters. Linear means that we are not using any kernel to move the data to a higher dimensional space.

In [7]:
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

svms = []
for i in range(splits):
    svm = SVC(kernel='linear')
    svm.fit(X_train_splits[i],y_train_splits[i])
    svms.append(svm)

vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("Confusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))

Confusion matrix on test set:
 [[    0  2327]
 [    0 18225]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      2327
           1       0.89      1.00      0.94     18225

   micro avg       0.89      0.89      0.89     20552
   macro avg       0.44      0.50      0.47     20552
weighted avg       0.79      0.89      0.83     20552



The results are really bad, so this classifier doesn't work. However, we can try to adjust the weight of each class to compensate the classes being unbalanced:

In [8]:
svms = []
for i in range(splits):
    svm = SVC(kernel='linear', class_weight='balanced')
    svm.fit(X_train_splits[i],y_train_splits[i])
    svms.append(svm)

vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("Confusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))

Confusion matrix on test set:
 [[ 1219  1108]
 [ 5482 12743]]
              precision    recall  f1-score   support

           0       0.18      0.52      0.27      2327
           1       0.92      0.70      0.79     18225

   micro avg       0.68      0.68      0.68     20552
   macro avg       0.55      0.61      0.53     20552
weighted avg       0.84      0.68      0.74     20552



We notice a huge improvement in recall of class 0, which was our goal. Therefore, we will stick with these weights. However, the linear SVM has parameter C that has to be adjusted. We will use *GridSearch* method to find the optimal value of C.

In [9]:
Cs = np.logspace(-2, 2, num=5, base=10.0)
param_grid = {'C': Cs}

svms = []
for i in range(splits):
    print("\nSplit",i)
    
    scorer = make_scorer(f1_score,pos_label=0)
    grid_search = GridSearchCV(SVC(kernel='linear',class_weight='balanced'), param_grid, cv=10, scoring=scorer, iid=True)
    grid_search.fit(X_train_splits[i],y_train_splits[i])

    parval=grid_search.best_params_
    print("Best value of parameter C found: ",parval)
    
    scorer = make_scorer(recall_score,pos_label=0)
    cvacc = cross_val_score(SVC(C=parval['C'],kernel='linear',class_weight='balanced'), X=X_train_splits[i],  y=y_train_splits[i], cv=10, scoring=scorer)
    print('Recall 10-fold cross mean on train data =', cvacc.mean())

    svm = SVC(C=parval['C'],kernel='linear',class_weight='balanced')
    svm.fit(X_train_splits[i], y_train_splits[i])
    svms.append(svm)
    
    print("Number of supports: ",np.sum(svm.n_support_), "(",np.sum(np.abs(svm.dual_coef_)==parval['C']) ,"of them have slacks)")
    print("Prop. of supports: ",np.sum(svm.n_support_)/X_train_splits[i].shape[0])
    
vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("\nConfusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))

print(classification_report(y_test, pred))


Split 0
Best value of parameter C found:  {'C': 0.01}
Recall 10-fold cross mean on train data = 0.508
Number of supports:  3970 ( 0 of them have slacks)
Prop. of supports:  0.8988000905592031

Split 1
Best value of parameter C found:  {'C': 0.01}
Recall 10-fold cross mean on train data = 0.526
Number of supports:  3880 ( 0 of them have slacks)
Prop. of supports:  0.8784242698664252

Split 2
Best value of parameter C found:  {'C': 0.01}
Recall 10-fold cross mean on train data = 0.502
Number of supports:  3874 ( 0 of them have slacks)
Prop. of supports:  0.87706588182024

Split 3
Best value of parameter C found:  {'C': 0.01}
Recall 10-fold cross mean on train data = 0.5720000000000001
Number of supports:  3740 ( 0 of them have slacks)
Prop. of supports:  0.8467285487887707

Split 4
Best value of parameter C found:  {'C': 10.0}
Recall 10-fold cross mean on train data = 0.508
Number of supports:  3520 ( 0 of them have slacks)
Prop. of supports:  0.7969209870953136

Confusion matrix on tes

## Polynomial kernels

After trying with linear SVMs, the next step is using SVMs with a kernel. We'll try first polynomial kernel with degree 2 with default parameters.

In [10]:
svms = []
for i in range(splits):
    svm = SVC(kernel='poly', degree=2, gamma='auto', class_weight='balanced')
    svm.fit(X_train_splits[i],y_train_splits[i])
    svms.append(svm)

vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("Confusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))

Confusion matrix on test set:
 [[ 1233  1094]
 [ 5664 12561]]
              precision    recall  f1-score   support

           0       0.18      0.53      0.27      2327
           1       0.92      0.69      0.79     18225

   micro avg       0.67      0.67      0.67     20552
   macro avg       0.55      0.61      0.53     20552
weighted avg       0.84      0.67      0.73     20552



The performance is similar to the linear SVMs. Now, let's try to find the best C parameter:

In [11]:
Cs = np.logspace(-2, 2, num=5, base=10.0)
param_grid = {'C': Cs}

degree = 2

svms = []
for i in range(splits):
    print("\nSplit",i)
    
    scorer = make_scorer(f1_score,pos_label=0)
    grid_search = GridSearchCV(SVC(kernel='poly',degree=degree,gamma='auto',class_weight='balanced'), param_grid, cv=10, scoring=scorer, iid=True)
    grid_search.fit(X_train_splits[i],y_train_splits[i])

    parval=grid_search.best_params_
    print("Best value of parameter C found: ",parval)
    
    scorer = make_scorer(recall_score,pos_label=0)
    cvacc = cross_val_score(SVC(C=parval['C'],kernel='poly',degree=degree,gamma='auto',class_weight='balanced'), X=X_train_splits[i],  y=y_train_splits[i], cv=10, scoring=scorer)
    print('Recall 10-fold cross mean on train data =', cvacc.mean())

    svm = SVC(C=parval['C'],kernel='poly',degree=degree,gamma='auto',class_weight='balanced')
    svm.fit(X_train_splits[i], y_train_splits[i])
    svms.append(svm)
    
    print("Number of supports: ",np.sum(svm.n_support_), "(",np.sum(np.abs(svm.dual_coef_)==parval['C']) ,"of them have slacks)")
    print("Prop. of supports: ",np.sum(svm.n_support_)/X_train_splits[i].shape[0])
    
vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("\nConfusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))

print(classification_report(y_test, pred))


Split 0
Best value of parameter C found:  {'C': 0.1}
Recall 10-fold cross mean on train data = 0.43600000000000005
Number of supports:  4211 ( 0 of them have slacks)
Prop. of supports:  0.9533620104143083

Split 1
Best value of parameter C found:  {'C': 1.0}
Recall 10-fold cross mean on train data = 0.53
Number of supports:  3756 ( 0 of them have slacks)
Prop. of supports:  0.8503509169119312

Split 2
Best value of parameter C found:  {'C': 1.0}
Recall 10-fold cross mean on train data = 0.48599999999999993
Number of supports:  3765 ( 0 of them have slacks)
Prop. of supports:  0.8523884989812089

Split 3
Best value of parameter C found:  {'C': 1.0}
Recall 10-fold cross mean on train data = 0.5519999999999999
Number of supports:  3629 ( 0 of them have slacks)
Prop. of supports:  0.8215983699343445

Split 4
Best value of parameter C found:  {'C': 1.0}
Recall 10-fold cross mean on train data = 0.502
Number of supports:  3694 ( 0 of them have slacks)
Prop. of supports:  0.8363142404346842


Now, let's try with degree 3

In [12]:
svms = []
for i in range(splits):
    svm = SVC(kernel='poly', degree=3, gamma='auto', class_weight='balanced')
    svm.fit(X_train_splits[i],y_train_splits[i])
    svms.append(svm)

vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("Confusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))

Confusion matrix on test set:
 [[ 1198  1129]
 [ 5586 12639]]
              precision    recall  f1-score   support

           0       0.18      0.51      0.26      2327
           1       0.92      0.69      0.79     18225

   micro avg       0.67      0.67      0.67     20552
   macro avg       0.55      0.60      0.53     20552
weighted avg       0.83      0.67      0.73     20552



Find the best C parameter for degree 3:

In [13]:
Cs = np.logspace(-2, 2, num=5, base=10.0)
param_grid = {'C': Cs}

degree = 3

svms = []
for i in range(splits):
    print("\nSplit",i)
    
    scorer = make_scorer(f1_score,pos_label=0)
    grid_search = GridSearchCV(SVC(kernel='poly',degree=degree,gamma='auto',class_weight='balanced'), param_grid, cv=10, scoring=scorer, iid=True)
    grid_search.fit(X_train_splits[i],y_train_splits[i])

    parval=grid_search.best_params_
    print("Best value of parameter C found: ",parval)
    
    scorer = make_scorer(recall_score,pos_label=0)
    cvacc = cross_val_score(SVC(C=parval['C'],kernel='poly',degree=degree,gamma='auto',class_weight='balanced'), X=X_train_splits[i],  y=y_train_splits[i], cv=10, scoring=scorer)
    print('Recall 10-fold cross mean on train data =', cvacc.mean())

    svm = SVC(C=parval['C'],kernel='poly',degree=degree,gamma='auto',class_weight='balanced')
    svm.fit(X_train_splits[i], y_train_splits[i])
    svms.append(svm)
    
    print("Number of supports: ",np.sum(svm.n_support_), "(",np.sum(np.abs(svm.dual_coef_)==parval['C']) ,"of them have slacks)")
    print("Prop. of supports: ",np.sum(svm.n_support_)/X_train_splits[i].shape[0])
    
vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("\nConfusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))

print(classification_report(y_test, pred))


Split 0
Best value of parameter C found:  {'C': 0.1}
Recall 10-fold cross mean on train data = 0.44000000000000006
Number of supports:  4205 ( 0 of them have slacks)
Prop. of supports:  0.9520036223681232

Split 1
Best value of parameter C found:  {'C': 1.0}
Recall 10-fold cross mean on train data = 0.514
Number of supports:  3700 ( 0 of them have slacks)
Prop. of supports:  0.8376726284808693

Split 2
Best value of parameter C found:  {'C': 1.0}
Recall 10-fold cross mean on train data = 0.47000000000000003
Number of supports:  3712 ( 0 of them have slacks)
Prop. of supports:  0.8403894045732397

Split 3
Best value of parameter C found:  {'C': 1.0}
Recall 10-fold cross mean on train data = 0.5280000000000001
Number of supports:  3591 ( 0 of them have slacks)
Prop. of supports:  0.8129952456418383

Split 4
Best value of parameter C found:  {'C': 1.0}
Recall 10-fold cross mean on train data = 0.506
Number of supports:  3653 ( 0 of them have slacks)
Prop. of supports:  0.8270319221190854

## RBF Kernel
There's another possibility for the kernel: The RBF kernel. This is the default kernel in the implementation of SVMs in sklearn, so we don't need to explicitely say the kernel used. Let's try it with default parameters.

In [14]:
svms = []
for i in range(splits):
    svm = SVC(gamma='auto',class_weight='balanced')
    svm.fit(X_train_splits[i],y_train_splits[i])
    svms.append(svm)

vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("Confusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))

Confusion matrix on test set:
 [[ 1216  1111]
 [ 5554 12671]]
              precision    recall  f1-score   support

           0       0.18      0.52      0.27      2327
           1       0.92      0.70      0.79     18225

   micro avg       0.68      0.68      0.68     20552
   macro avg       0.55      0.61      0.53     20552
weighted avg       0.84      0.68      0.73     20552



Find C and gamma parameters

In [15]:
Cs = np.logspace(-2, 2, num=5, base=10.0)
gammas = [0.000001,0.00001, 0.0001,0.001,0.01,0.1,1,10]
param_grid = {'C': Cs, 'gamma': gammas}

svms = []
for i in range(splits):
    print("\nSplit",i)
    
    scorer = make_scorer(f1_score,pos_label=0)
    grid_search = GridSearchCV(SVC(class_weight='balanced'), param_grid, cv=10, scoring=scorer, iid=True)
    grid_search.fit(X_train_splits[i],y_train_splits[i])

    parval=grid_search.best_params_
    print("Best combination of parameters found: ",parval)
    
    scorer = make_scorer(recall_score,pos_label=0)
    cvacc = cross_val_score(SVC(C=parval['C'],gamma=parval['gamma'],class_weight='balanced'), X=X_train_splits[i],  y=y_train_splits[i], cv=10, scoring=scorer)
    print('Recall 10-fold cross on train data =', cvacc.mean())

    svm = SVC(C=parval['C'],gamma=parval['gamma'],class_weight='balanced')
    svm.fit(X_train_splits[i], y_train_splits[i])
    svms.append(svm)
    
    print("Number of supports: ",np.sum(svm.n_support_), "(",np.sum(np.abs(svm.dual_coef_)==parval['C']) ,"of them have slacks)")
    print("Prop. of supports: ",np.sum(svm.n_support_)/X_train_splits[i].shape[0])
    
vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("\nConfusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))

print(classification_report(y_test, pred))


Split 0
Best combination of parameters found:  {'C': 10.0, 'gamma': 0.001}
Recall 10-fold cross on train data = 0.502
Number of supports:  3893 ( 0 of them have slacks)
Prop. of supports:  0.8813674439664931

Split 1
Best combination of parameters found:  {'C': 100.0, 'gamma': 0.0001}
Recall 10-fold cross on train data = 0.5279999999999999
Number of supports:  3782 ( 0 of them have slacks)
Prop. of supports:  0.856237265112067

Split 2
Best combination of parameters found:  {'C': 100.0, 'gamma': 0.0001}
Recall 10-fold cross on train data = 0.5039999999999999
Number of supports:  3783 ( 0 of them have slacks)
Prop. of supports:  0.8564636631197645

Split 3
Best combination of parameters found:  {'C': 10.0, 'gamma': 0.001}
Recall 10-fold cross on train data = 0.5640000000000001
Number of supports:  3652 ( 0 of them have slacks)
Prop. of supports:  0.8268055241113879

Split 4
Best combination of parameters found:  {'C': 10.0, 'gamma': 0.001}
Recall 10-fold cross on train data = 0.5
Numbe

The best values for recall of class 0 were obtained with linear kernel, polynomial kernel of degree 2 and RBF kernel. Now, we will look at the proportion of supports for each one of the experiments:

In [16]:
linear_kernel_supports = (0.8988000905592031 + 0.8784242698664252 + 0.87706588182024 + 0.8467285487887707 + 0.7969209870953136) / 5
print("Linear kernel average prop. of supports:", linear_kernel_supports)
poly_kernel_d_2_supports = (0.9533620104143083 + 0.8503509169119312 + 0.8523884989812089 + 0.8215983699343445 + 0.8363142404346842) / 5
print("Polynomial kernel average prop. of supports:", poly_kernel_d_2_supports)
rbf_kernel_supports = (0.8813674439664931 + 0.856237265112067 + 0.8564636631197645 + 0.8268055241113879 + 0.8419741906271225) / 5
print("RBF kernel average prop. of supports:", rbf_kernel_supports)

Linear kernel average prop. of supports: 0.8595879556259906
Polynomial kernel average prop. of supports: 0.8628028073352955
RBF kernel average prop. of supports: 0.8525696173873669


We can conclude that RBF kernel generalises better. Now, we will repeat the process done with the RBF kernel, using the best values found for each SVM. This time we begin the process with the whole training dataset, and we will use the test dataset that came out from the preprocessing step for validation purposes, which was unused until this point.

In [20]:
# Load train dataset
df = pd.read_csv("Train.csv")
y_train=df['readmitted'].values
X_train=df.values[:,0:71].astype('float32')
# Load test dataset
df2 = pd.read_csv("Test.csv")
y_test=df2['readmitted'].values
X_test=df2.values[:,0:71].astype('float32')

scaler = MinMaxScaler(feature_range=(-1, 1)).fit(X_train)
# Apply the normalization trained in training data in both training and test sets
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

unique, counts = np.unique(y_train, return_counts=True)
ratio = counts[1]/counts[0]
num_samples_0 = 500;
num_samples_1 = int(num_samples_0*ratio)

# Perform 5 undersamples of the train dataset
rus = RandomUnderSampler(sampling_strategy={0:num_samples_0,1:num_samples_1},random_state=42)

splits = 5
X_train_splits = []
y_train_splits = []

for i in range(splits):
    X_train_r, y_train_r = rus.fit_resample(X_train, y_train)
    X_train_splits.append(X_train_r)
    y_train_splits.append(y_train_r)
    indices = rus.sample_indices_
    X_train = np.delete(X_train,indices,0)
    y_train = np.delete(y_train,indices,0)
    unique, counts = np.unique(y_train_r, return_counts=True)
    print("Split",i,dict(zip(unique, counts)))
    unique, counts = np.unique(y_train, return_counts=True)
    print("Remaining",dict(zip(unique, counts)))

# Train 5 SVMs with the best parameters found
svms = []

svm0 = SVC(C=10.0,gamma=0.001,class_weight='balanced')
svm0.fit(X_train_splits[0],y_train_splits[0])
print("\nNumber of supports SVM0: ",np.sum(svm0.n_support_), "(",np.sum(np.abs(svm0.dual_coef_)==10.0) ,"of them have slacks)")
svm0_prop_supp = np.sum(svm0.n_support_)/X_train_splits[0].shape[0]
print("Prop. of supports SVM0: ",svm0_prop_supp)
svms.append(svm0)

svm1 = SVC(C=100.0,gamma=0.0001,class_weight='balanced')
svm1.fit(X_train_splits[1],y_train_splits[1])
print("\nNumber of supports SVM1: ",np.sum(svm1.n_support_), "(",np.sum(np.abs(svm1.dual_coef_)==100.0) ,"of them have slacks)")
svm1_prop_supp = np.sum(svm1.n_support_)/X_train_splits[1].shape[0]
print("Prop. of supports SVM1: ",svm1_prop_supp)
svms.append(svm1)

svm2 = SVC(C=100.0,gamma=0.0001,class_weight='balanced')
svm2.fit(X_train_splits[2],y_train_splits[2])
print("\nNumber of supports SVM2: ",np.sum(svm2.n_support_), "(",np.sum(np.abs(svm2.dual_coef_)==100.0) ,"of them have slacks)")
svm2_prop_supp = np.sum(svm2.n_support_)/X_train_splits[2].shape[0]
print("Prop. of supports SVM2: ",svm2_prop_supp)
svms.append(svm2)

svm3 = SVC(C=10.0,gamma=0.001,class_weight='balanced')
svm3.fit(X_train_splits[3],y_train_splits[3])
print("\nNumber of supports SVM3: ",np.sum(svm3.n_support_), "(",np.sum(np.abs(svm3.dual_coef_)==10.0) ,"of them have slacks)")
svm3_prop_supp = np.sum(svm3.n_support_)/X_train_splits[3].shape[0]
print("Prop. of supports SVM3: ",svm3_prop_supp)
svms.append(svm3)

svm4 = SVC(C=10.0,gamma=0.001,class_weight='balanced')
svm4.fit(X_train_splits[4],y_train_splits[4])
print("\nNumber of supports SVM4: ",np.sum(svm4.n_support_), "(",np.sum(np.abs(svm4.dual_coef_)==10.0) ,"of them have slacks)")
svm4_prop_supp = np.sum(svm4.n_support_)/X_train_splits[4].shape[0]
print("Prop. of supports SVM4: ",svm4_prop_supp)
svms.append(svm4)

vc = VotingClassifier(svms)
pred=vc.predict(X_test)
print("Confusion matrix on test set:\n",sklearn.metrics.confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))
print("Average prop. of supports:", (svm0_prop_supp + svm1_prop_supp + svm2_prop_supp + svm3_prop_supp + svm4_prop_supp) / 5)

Split 0 {0: 500, 1: 3916}
Remaining {0: 7255, 1: 56834}
Split 1 {0: 500, 1: 3916}
Remaining {0: 6755, 1: 52918}
Split 2 {0: 500, 1: 3916}
Remaining {0: 6255, 1: 49002}
Split 3 {0: 500, 1: 3916}
Remaining {0: 5755, 1: 45086}
Split 4 {0: 500, 1: 3916}
Remaining {0: 5255, 1: 41170}

Number of supports SVM0:  3648 ( 0 of them have slacks)
Prop. of supports SVM0:  0.8260869565217391

Number of supports SVM1:  3866 ( 0 of them have slacks)
Prop. of supports SVM1:  0.8754528985507246

Number of supports SVM2:  3796 ( 0 of them have slacks)
Prop. of supports SVM2:  0.8596014492753623

Number of supports SVM3:  3725 ( 0 of them have slacks)
Prop. of supports SVM3:  0.8435235507246377

Number of supports SVM4:  3737 ( 0 of them have slacks)
Prop. of supports SVM4:  0.8462409420289855
Confusion matrix on test set:
 [[ 1788  1621]
 [ 8039 17912]]
              precision    recall  f1-score   support

           0       0.18      0.52      0.27      3409
           1       0.92      0.69      0.79 