In [9]:
from sklearn import datasets
from scipy import stats
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np

def Get_Stats(data, target, classifier):
    scaler = StandardScaler()
    
    pipeline = Pipeline([("scaler", scaler), ('classifier', classifier)])

    grade = {'classifier__n_estimators': [10, 25, 50, 100]}

    gridSearch = GridSearchCV(pipeline, param_grid=grade,
                              scoring='accuracy', cv=4)

    cv = RepeatedStratifiedKFold(
        n_splits=10, n_repeats=3, random_state=36851234)

    scores = cross_val_score(
        gridSearch, data, target, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')

    mean = scores.mean()
    std = scores.std()
    lower, upper = stats.norm.interval(
        0.95, loc=mean, scale=std / np.sqrt(len(scores)))

    return mean, std, lower, upper

def Get_Digits():
    digits = datasets.load_digits()
    target = digits.target
    data = digits.data
    return data, target

def Get_Wine():
    digits = datasets.load_wine()
    target = digits.target
    data = digits.data
    return data, target

def Get_Breast_Cancer():
    digits = datasets.load_breast_cancer()
    target = digits.target
    data = digits.data
    return data, target


def Calculate_Digits_Classifiers():
    data, target = Get_Digits()
    classifier = BaggingClassifier()
    baggingResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Digits()
    classifier = AdaBoostClassifier()
    adaBoostResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Digits()
    classifier = RandomForestClassifier()
    randomForestResults = Get_Stats(data, target, classifier)

    print(baggingResults)
    print(adaBoostResults)
    print(randomForestResults)

def Calculate_Wine_Classifiers():
    data, target = Get_Wine()
    classifier = BaggingClassifier()
    baggingResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Wine()
    classifier = AdaBoostClassifier()
    adaBoostResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Wine()
    classifier = RandomForestClassifier()
    randomForestResults = Get_Stats(data, target, classifier)

    print(baggingResults)
    print(adaBoostResults)
    print(randomForestResults)

def Calculate_Breast_Cancer_Classifiers():
    data, target = Get_Breast_Cancer()
    classifier = BaggingClassifier()
    baggingResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Breast_Cancer()
    classifier = AdaBoostClassifier()
    adaBoostResults = Get_Stats(data, target, classifier)
    
    data, target = Get_Breast_Cancer()
    classifier = RandomForestClassifier()
    randomForestResults = Get_Stats(data, target, classifier)

    print(baggingResults)
    print(adaBoostResults)
    print(randomForestResults)

Calculate_Digits_Classifiers()
# Calculate_Wine_Classifiers()
# Calculate_Breast_Cancer_Classifiers()

(0.9540078626112146, 0.013197001225008624, 0.9492854631499319, 0.9587302620724973)
(0.26987792261535276, 0.022260493391867975, 0.26191225373275584, 0.2778435914979497)
(0.9747713635423134, 0.01164965549576772, 0.9706026644644434, 0.9789400626201834)


In [27]:
from sklearn.utils import resample
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_predict, RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import pandas as pd

digits = datasets.load_wine()
target = digits.target
data = digits.data

scaler = StandardScaler()

for nr_Samples in [1, 3, 5, 7]:
    scores = []

    for nr_Index in range(nr_Samples):
        if nr_Index != 0:
            data, target = resample(data, target, random_state=nr_Index-1)

        gaussianClassifier = GaussianNB()
        decisionClassifier = DecisionTreeClassifier()
        knnClassifier = KNeighborsClassifier(n_neighbors=1)

        # cv = RepeatedStratifiedKFold(
        #     n_splits=10, n_repeats=3, random_state=36851234)

        cv = 5

        gaussianPipeline = Pipeline([("scaler", scaler), ('classifier', gaussianClassifier)])
        decisionPipeline = Pipeline([("scaler", scaler), ('classifier', decisionClassifier)])
        knnPipeline = Pipeline([("scaler", scaler), ('classifier', knnClassifier)])

        gaussianScores = cross_val_predict(
            gaussianClassifier, data, target, cv=cv, n_jobs=-1)

        decisionScores = cross_val_predict(
            decisionClassifier, data, target, cv=cv, n_jobs=-1)

        knnScores = cross_val_predict(
            knnClassifier, data, target, cv=cv, n_jobs=-1)

        scores.append(gaussianScores)
        scores.append(decisionScores)
        scores.append(knnScores)

    for score in scores:
            


3
9
15
21


Dúvidas: Como eu dou o fit no Heterogen