In [9]:
from sklearn import datasets
from scipy import stats
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np

def Get_Stats(data, target, classifier):
    scaler = StandardScaler()
    
    pipeline = Pipeline([("scaler", scaler), ('classifier', classifier)])

    grade = {'classifier__n_estimators': [10, 25, 50, 100]}

    gridSearch = GridSearchCV(pipeline, param_grid=grade,
                              scoring='accuracy', cv=4)

    cv = RepeatedStratifiedKFold(
        n_splits=10, n_repeats=3, random_state=36851234)

    scores = cross_val_score(
        gridSearch, data, target, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')

    mean = scores.mean()
    std = scores.std()
    lower, upper = stats.norm.interval(
        0.95, loc=mean, scale=std / np.sqrt(len(scores)))

    return mean, std, lower, upper

def Get_Digits():
    digits = datasets.load_digits()
    target = digits.target
    data = digits.data
    return data, target

def Get_Wine():
    digits = datasets.load_wine()
    target = digits.target
    data = digits.data
    return data, target

def Get_Breast_Cancer():
    digits = datasets.load_breast_cancer()
    target = digits.target
    data = digits.data
    return data, target


def Calculate_Digits_Classifiers():
    data, target = Get_Digits()
    classifier = BaggingClassifier()
    baggingResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Digits()
    classifier = AdaBoostClassifier()
    adaBoostResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Digits()
    classifier = RandomForestClassifier()
    randomForestResults = Get_Stats(data, target, classifier)

    print(baggingResults)
    print(adaBoostResults)
    print(randomForestResults)

def Calculate_Wine_Classifiers():
    data, target = Get_Wine()
    classifier = BaggingClassifier()
    baggingResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Wine()
    classifier = AdaBoostClassifier()
    adaBoostResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Wine()
    classifier = RandomForestClassifier()
    randomForestResults = Get_Stats(data, target, classifier)

    print(baggingResults)
    print(adaBoostResults)
    print(randomForestResults)

def Calculate_Breast_Cancer_Classifiers():
    data, target = Get_Breast_Cancer()
    classifier = BaggingClassifier()
    baggingResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Breast_Cancer()
    classifier = AdaBoostClassifier()
    adaBoostResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Breast_Cancer()
    classifier = RandomForestClassifier()
    randomForestResults = Get_Stats(data, target, classifier)

    print(baggingResults)
    print(adaBoostResults)
    print(randomForestResults)

Calculate_Digits_Classifiers()
# Calculate_Wine_Classifiers()
# Calculate_Breast_Cancer_Classifiers()

(0.9540078626112146, 0.013197001225008624, 0.9492854631499319, 0.9587302620724973)
(0.26987792261535276, 0.022260493391867975, 0.26191225373275584, 0.2778435914979497)
(0.9747713635423134, 0.01164965549576772, 0.9706026644644434, 0.9789400626201834)


In [52]:
from sklearn.base import BaseEstimator
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import collections as cl
import numpy as np


class HPClassifier(BaseEstimator):
    def __init__(self, n_Samples=None):
        super().__init__()
        self.n_Samples = n_Samples
        self.classifiers = []
        self.frequency = []

    def fit(self, data, target):
        classifiers = []

        self.frequency = cl.Counter(target)

        for nr_Index in range(self.n_Samples):
            if nr_Index != 0:
                data, target = resample(data, target, random_state=nr_Index-1)

            gaussianClassifier = GaussianNB()
            decisionClassifier = DecisionTreeClassifier()
            knnClassifier = KNeighborsClassifier(n_neighbors=1)

            gaussianClassifier.fit(data, target)
            decisionClassifier.fit(data, target)
            knnClassifier.fit(data, target)

            classifiers.append(gaussianClassifier)
            classifiers.append(decisionClassifier)
            classifiers.append(knnClassifier)

        self.classifiers = classifiers

    def predict(self, data):
        result = []
        predictions = []

        for classifier in self.classifiers:
            prediction = classifier.predict(data)
            predictions.append(prediction)

        dataFrame = pd.DataFrame(data=predictions)
        data = np.asarray(data)

        for index in dataFrame.columns:
            column = dataFrame[index]
            choice = cl.Counter(column).most_common(1)[0][0]
            result.append(choice)

        return np.asarray(result)


In [53]:
from sklearn.utils import resample
from sklearn import datasets

from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import pandas as pd

digits = datasets.load_wine()
target = digits.target
data = digits.data

scaler = StandardScaler()

hpClassifier = HPClassifier()

grade={'n_Samples': [1,3,5,7]}

gs = GridSearchCV(estimator=hpClassifier, param_grid = grade, 
                  scoring='accuracy', cv = 2)

rkf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2)

scores = cross_val_score(gs, data, target, scoring='accuracy', cv = rkf)

print(scores)




0    2
1    2
2    2
Name: 0, dtype: int64
0    2
1    2
2    2
Name: 1, dtype: int64
0    2
1    2
2    2
Name: 2, dtype: int64
0    1
1    2
2    2
Name: 3, dtype: int64
0    1
1    2
2    2
Name: 4, dtype: int64
0    1
1    2
2    2
Name: 5, dtype: int64
0    1
1    2
2    2
Name: 6, dtype: int64
0    1
1    1
2    2
Name: 7, dtype: int64
0    1
1    1
2    2
Name: 8, dtype: int64
0    1
1    1
2    2
Name: 9, dtype: int64
0    1
1    1
2    2
Name: 10, dtype: int64
0    1
1    1
2    2
Name: 11, dtype: int64
0    1
1    1
2    2
Name: 12, dtype: int64
0    1
1    1
2    2
Name: 13, dtype: int64
0    1
1    1
2    2
Name: 14, dtype: int64
0    1
1    1
2    2
Name: 15, dtype: int64
0    1
1    1
2    1
Name: 16, dtype: int64
0    1
1    1
2    1
Name: 17, dtype: int64
0    1
1    1
2    1
Name: 18, dtype: int64
0    1
1    1
2    1
Name: 19, dtype: int64
0    1
1    1
2    1
Name: 20, dtype: int64
0    1
1    1
2    1
Name: 21, dtype: int64
0    1
1    1
2    1
Name: 22, dtype: int6

Dúvidas: Como eu dou o fit no Heterogen