In [13]:
from sklearn.svm import SVC
import numpy as np
from sklearn import datasets
import pandas as pd
from ensemble import EnsembleClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
import math


In [2]:
iris = datasets.load_iris()
X = pd.DataFrame(data=iris['data'], columns=iris['feature_names'])
y = iris['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [3]:
X_train, y_train

(     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 96                 5.7               2.9                4.2               1.3
 105                7.6               3.0                6.6               2.1
 66                 5.6               3.0                4.5               1.5
 0                  5.1               3.5                1.4               0.2
 122                7.7               2.8                6.7               2.0
 ..                 ...               ...                ...               ...
 71                 6.1               2.8                4.0               1.3
 106                4.9               2.5                4.5               1.7
 14                 5.8               4.0                1.2               0.2
 92                 5.8               2.6                4.0               1.2
 102                7.1               3.0                5.9               2.1
 
 [100 rows x 4 columns],
 array([1, 2, 1, 0, 2, 1,

In [5]:

test = EnsembleClassifier(SVC(probability=True)).fit(X_train, y_train)

In [7]:
test.predict_proba(X_test)

array([[0.01136937, 0.76347456, 0.22515607],
       [0.86477009, 0.07889232, 0.05633759],
       [0.03020446, 0.0555117 , 0.91428384],
       [0.01131134, 0.80771738, 0.18097128],
       [0.01248659, 0.59539716, 0.39211625],
       [0.87934051, 0.07542632, 0.04523317],
       [0.02869155, 0.90883639, 0.06247206],
       [0.01270019, 0.14967869, 0.83762112],
       [0.01023963, 0.80088963, 0.18887074],
       [0.01318219, 0.918663  , 0.06815481],
       [0.01332466, 0.19312114, 0.7935542 ],
       [0.94218515, 0.03537586, 0.02243899],
       [0.87815684, 0.07092196, 0.0509212 ],
       [0.93824047, 0.03856994, 0.02318959],
       [0.95476307, 0.02761312, 0.01762381],
       [0.01541307, 0.63597006, 0.34861687],
       [0.01145629, 0.08449585, 0.90404787],
       [0.01413877, 0.93316657, 0.05269466],
       [0.01365327, 0.85515682, 0.1311899 ],
       [0.01088291, 0.10779665, 0.88132044],
       [0.95016335, 0.03230978, 0.01752687],
       [0.01427648, 0.39851724, 0.58720629],
       [0.

In [8]:
max_features = math.floor(math.sqrt(len(X.columns)))
bag = BaggingClassifier(SVC(probability=True), max_features=max_features).fit(X_train, y_train)

In [9]:
bag.predict_proba(X_test)

array([[0.02037274, 0.73897408, 0.24065318],
       [0.85171198, 0.09651276, 0.05177526],
       [0.03601193, 0.04283687, 0.9211512 ],
       [0.01987523, 0.77846997, 0.2016548 ],
       [0.02087511, 0.59372566, 0.38539924],
       [0.87158974, 0.08694038, 0.04146989],
       [0.04512126, 0.89725451, 0.05762423],
       [0.01851417, 0.18096559, 0.80052024],
       [0.01897872, 0.73947236, 0.24154892],
       [0.0269571 , 0.91200423, 0.06103868],
       [0.01902222, 0.21428874, 0.76668904],
       [0.92754421, 0.04367037, 0.02878543],
       [0.89491154, 0.06144332, 0.04364514],
       [0.92627524, 0.04548973, 0.02823502],
       [0.92411247, 0.04779088, 0.02809665],
       [0.02332558, 0.63091607, 0.34575835],
       [0.01594653, 0.05943872, 0.92461475],
       [0.03431755, 0.914872  , 0.05081044],
       [0.02364817, 0.823815  , 0.15253683],
       [0.01553478, 0.0759539 , 0.90851132],
       [0.93038263, 0.04365088, 0.02596649],
       [0.0210533 , 0.40309228, 0.57585442],
       [0.

In [10]:
test.predict(X_test)

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 2, 2, 1, 2])

In [11]:
bag.predict(X_test)

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 2, 2, 1, 2])

In [14]:
forest = RandomForestClassifier(n_estimators=10).fit(X_train, y_train)

In [16]:
forest.predict(X_test)

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 2, 2, 1, 2])