# Bootstrap Sampling

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets
from sklearn.utils import resample
import numpy as np
from scipy import stats
import collections

In [None]:
iris_data = datasets.load_iris()
X_train = iris_data.data
Y_train = iris_data.target

In [None]:
len(Y_train)

150

## Sampleo bootstrap

Usamos muestras de tamaño 100

In [None]:
X_train_m1, Y_train_m1 = resample(X_train, Y_train, n_samples = 100)
X_train_m2, Y_train_m2 = resample(X_train, Y_train, n_samples = 100)
X_train_m3, Y_train_m3 = resample(X_train, Y_train, n_samples = 100)

#### Modelo 1: k-nearest neighbors

In [None]:
## k-nearest neighbors
modelo1 = KNeighborsClassifier()
modelo1.fit(X_train_m1, Y_train_m1) 

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

#### Modelo 2: Regresion logistica

In [None]:
# Regresion logistica
modelo2 = LogisticRegression(max_iter = 500)
modelo2.fit(X_train_m2,Y_train_m2)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=500,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

#### Modelo 3: Desicion trees

In [None]:
# Desicion trees
modelo3 = DecisionTreeClassifier()
modelo3.fit(X_train_m3, Y_train_m3)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

### Accuracy de los Modelos

In [None]:
print('Accuracy modelo 1 :', accuracy_score(Y_train, modelo1.predict(X_train))*100, '%')
print('Accuracy modelo 2 :', accuracy_score(Y_train, modelo2.predict(X_train))*100, '%')
print('Accuracy modelo 3 :', accuracy_score(Y_train, modelo3.predict(X_train))*100, '%')

Accuracy modelo 1 : 97.33333333333334 %
Accuracy modelo 2 : 98.0 %
Accuracy modelo 3 : 97.33333333333334 %


## Predicciones

In [None]:
estimadores=[modelo1, modelo2, modelo3]

### Prediccion por mayoria

In [None]:
def predict_voting(estimadores, features):
    predicciones = []
    for estimador in estimadores:
        predicciones.append(estimador.predict(features)[0])
        
    prediccion = stats.mode(predicciones)[0]
    print('Predicciones: ', predicciones)
    print('Prediccion:', prediccion)

### Prediccion por estadistica

In [None]:
y_clases = set(Y_train)
y_clases

{0, 1, 2}

In [None]:
def predict_prob(estimadores, features):
    predicciones = []
    numero_estimadores = len(estimadores)
    for estimador in estimadores:
        predicciones.append(estimador.predict(features)[0])
        
    conteo_clases = collections.Counter(predicciones)
    
    print('Predicciones: ', predicciones)
    for clase in y_clases:
        print('Clase [',clase,'] :', (conteo_clases[clase]/numero_estimadores)*100, '%' )

## Ejemplos

In [None]:
print("Mayoria")
predict_voting(estimadores, X_train[106].reshape(1, -1))
print("")
print("Estadistica")
predict_prob(estimadores, X_train[106].reshape(1, -1))

Mayoria
Predicciones:  [1, 2, 1]
Prediccion: [1]

Estadistica
Predicciones:  [1, 2, 1]
Clase [ 0 ] : 0.0 %
Clase [ 1 ] : 66.66666666666666 %
Clase [ 2 ] : 33.33333333333333 %


In [None]:
print("Mayoria")
predict_voting(estimadores, X_train[72].reshape(1, -1))
print("")
print("Estadistica")
predict_prob(estimadores, X_train[72].reshape(1, -1))

Mayoria
Predicciones:  [1, 1, 1]
Prediccion: [1]

Estadistica
Predicciones:  [1, 1, 1]
Clase [ 0 ] : 0.0 %
Clase [ 1 ] : 100.0 %
Clase [ 2 ] : 0.0 %
