### **Dureza de aleacion de metal**
```
Version     : 1.5, Para uso educativo
Autor       : Luis Beltran Palma Ttito
Lugar       : Cusco, Peru, 2023
Proposito   : Dureza de aleacion de metales con esambles
```


# **0. PASOS PREVIOS**

## **Librerias**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Algoritmos de clasificacion: K-NN, AD, SVM, MLP, NB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB

# Modulo para la separacion de datos para entrenamiento y test
from sklearn.model_selection import train_test_split

# Ensambles
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import ExtraTreesClassifier

# Modulo que implementa busqueda aleatoria en cuadricula
from sklearn.model_selection import RandomizedSearchCV

# Validacion cruzada
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score

%matplotlib inline

## **Importacion de datos**

In [2]:
datos = pd.read_csv('./dureza.csv')
datos.head()

Unnamed: 0,PC,PMg,PP,PS,Psi,Pti,PNb,PCr,PBr,Pal,PV,Pcu,PCl,Pni,Pmo,Pn,Dureza
0,0.243,1.27,0.015,0.008,0.112,0.009,0.001,0.03,0.0004,0.039,0.003,0.03,0.0019,0.03,0.007,0.0058,293
1,0.231,1.26,0.015,0.008,0.128,0.012,0.009,0.02,0.0005,0.028,0.003,0.01,0.0028,0.02,0.007,0.0035,293
2,0.22,1.26,0.017,0.0046,0.129,0.013,0.001,0.03,0.0004,0.044,0.004,0.03,0.0023,0.03,0.01,0.0048,293
3,0.23,1.26,0.014,0.004,0.13,0.013,0.002,0.01,0.0003,0.027,0.001,0.01,0.0022,0.01,0.003,0.0064,293
4,0.232,1.26,0.012,0.0073,0.135,0.011,0.001,0.02,0.0005,0.045,0.003,0.02,0.0026,0.02,0.01,0.0065,293


## **Separacion de datos para Training y Testing**

In [3]:
# Separacion de datos en entradas
x = datos.drop(['Dureza'], axis=1)
y = pd.DataFrame(datos['Dureza'])

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=123, train_size=0.75, shuffle=True)

## **Distribucion de datos**

In [4]:
datos.groupby('Dureza').size()

Dureza
293    36
302    28
311    45
321    31
dtype: int64

# **1. MODELO VOTING: AD + K-NN + SVC + MLP**

### Busqueda aleatoria de hiper-parametro en cuadricula, para **ARBOL DE DECISION**

In [6]:
# Creacion de cuadricula de busqueda
grid_aleatorio_ad = {'criterion': ['gini', 'entropy'], 'max_depth': [4, 8, 12, 20, 50, 120, 150]}
# Definicion del modelo 
arbol_decision = DecisionTreeClassifier()
# Busqueda aleatoria en cuadricula, con validacion cruzada
model_1 = RandomizedSearchCV(estimator=arbol_decision, param_distributions=grid_aleatorio_ad, n_iter=14, cv=5, verbose=2, random_state=42, n_jobs=-1)

model_1.fit(x_train, y_train)
print('Score de train: ', model_1.score(x_test, y_test))
print('Mejor modelo :', model_1.best_params_)

Fitting 5 folds for each of 14 candidates, totalling 70 fits
[CV] END ........................criterion=gini, max_depth=4; total time=   0.2s
[CV] END ........................criterion=gini, max_depth=4; total time=   0.2s[CV] END ........................criterion=gini, max_depth=8; total time=   0.2s

[CV] END ........................criterion=gini, max_depth=8; total time=   0.2s
[CV] END ........................criterion=gini, max_depth=4; total time=   0.2s
[CV] END ........................criterion=gini, max_depth=4; total time=   0.2s
[CV] END ........................criterion=gini, max_depth=8; total time=   0.2s
[CV] END ........................criterion=gini, max_depth=4; total time=   0.2s
[CV] END ........................criterion=gini, max_depth=8; total time=   0.0s
[CV] END ........................criterion=gini, max_depth=8; total time=   0.0s
[CV] END .......................criterion=gini, max_depth=12; total time=   0.0s
[CV] END .......................criterion=gini, 

### Busqueda aleatorio de hiper-parametro en cuadricula, para **K-VECINOS MAS CERCANOS**

In [7]:
grid_aleatorio_knn = {'n_neighbors': [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]}
knn = KNeighborsClassifier()
model_2 = RandomizedSearchCV(estimator=knn, param_distributions=grid_aleatorio_knn, n_iter=14, cv=5, verbose=2, random_state=42, n_jobs=-1)
model_2.fit(x_train, y_train)
print('Score de train: ', model_2.score(x_test, y_test))
print('Mejor modelo: ', model_2.best_params_)

Fitting 5 folds for each of 14 candidates, totalling 70 fits


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


[CV] END ......................................n_neighbors=2; total time=   1.0s[CV] END ......................................n_neighbors=2; total time=   1.0s
[CV] END ......................................n_neighbors=2; total time=   1.0s

[CV] END ......................................n_neighbors=2; total time=   1.0s
[CV] END ......................................n_neighbors=2; total time=   1.0s
[CV] END ......................................n_neighbors=7; total time=   1.0s
[CV] END ......................................n_neighbors=7; total time=   1.0s
[CV] END ......................................n_neighbors=7; total time=   1.0s
[CV] END ......................................n_neighbors=7; total time=   0.0s
[CV] END ......................................n_neighbors=7; total time=   0.0s
[CV] END .....................................n_neighbors=13; total time=   0.0s
[CV] END .....................................n_neighbors=13; total time=   0.0s
[CV] END ...................

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Score de train:  0.2857142857142857
Mejor modelo:  {'n_neighbors': 17}


### Busqueda aleatorioa de hiper-parametro en cuadricula, para **MAQUINA DE VECTOR DE SOPORTE**

In [8]:
grid_aleatorio_svc = {'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf', 'poly', 'sigmoid'], 'C': [0.1, 1, 2, 3]}
svc = SVC()
model_3 = RandomizedSearchCV(estimator=svc, param_distributions=grid_aleatorio_svc, n_iter=14, cv=5, verbose=2, random_state=42, n_jobs=-1)
model_3.fit(x_train, y_train)
print('Score de train: ', model_3.score(x_test, y_test))
print('Mejor modelo: ', model_3.best_params_)

Fitting 5 folds for each of 14 candidates, totalling 70 fits
[CV] END ........................C=3, gamma=0.1, kernel=poly; total time=   0.1s[CV] END ........................C=3, gamma=0.1, kernel=poly; total time=   0.1s
[CV] END .........................C=2, gamma=0.1, kernel=rbf; total time=   0.1s
[CV] END ........................C=3, gamma=0.1, kernel=poly; total time=   0.1s
[CV] END .........................C=2, gamma=0.1, kernel=rbf; total time=   0.1s
[CV] END .........................C=2, gamma=0.1, kernel=rbf; total time=   0.1s
[CV] END .........................C=2, gamma=0.1, kernel=rbf; total time=   0.1s
[CV] END .........................C=2, gamma=0.1, kernel=rbf; total time=   0.1s

[CV] END .......................C=2, gamma=1, kernel=sigmoid; total time=   0.0s
[CV] END ........................C=3, gamma=0.1, kernel=poly; total time=   0.0s[CV] END .......................C=3, gamma=0.01, kernel=poly; total time=   0.0s
[CV] END .......................C=2, gamma=1, ker

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

### Busqueda aleatoria de hiper-parametro en cuadricula, para **PERCEPTRON MULTICAPA**

In [10]:
grid_aleatorio_mlp = {'hidden_layer_sizes': [(50, 20, 10, 5), (80, 40, 15, 5), (100, 50, 20, 8), (20, 10, 5)], 'activation': ['logistic', 'tanh'], 'alpha': [0.01, 0.05, 0.1, 0.25]}
mlp = MLPClassifier(max_iter=1000)
model_4 = RandomizedSearchCV(estimator=mlp, param_distributions=grid_aleatorio_mlp, n_iter=14, cv=5, verbose=2, random_state=42, n_jobs=-1)
model_4.fit(x_train, y_train)
print('Score de train: ', model_4.score(x_test, y_test))
print('Mejor modelo: ', model_4.best_params_)

Fitting 5 folds for each of 14 candidates, totalling 70 fits


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(20, 10, 5); total time=   0.1s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(20, 10, 5); total time=   0.1s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(20, 10, 5); total time=   0.2s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(20, 10, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(20, 10, 5); total time=   0.3s
[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.6s
[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.8s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.9s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.3s
[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.8s
[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   1.3s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.5s

[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   1.5s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.4s
[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s
[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(100, 50, 20, 8); total time=   0.1s
[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.6s
[CV] END activation=logistic, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.6s


  y = column_or_1d(y, warn=True)


[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(50, 20, 10, 5); total time=   1.8s


  y = column_or_1d(y, warn=True)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(80, 40, 15, 5); total time=   2.0s


  y = column_or_1d(y, warn=True)


[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   1.3s
[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.7s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(100, 50, 20, 8); total time=   1.9s
[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   1.2s
[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(100, 50, 20, 8); total time=   2.0s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(100, 50, 20, 8); total time=   1.8s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.4s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.3s
[CV] END activation=tanh, alpha=0.25, hidden_layer_sizes=(100, 50, 20, 8); total time=   2.2s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.4s
[CV] END activation=logistic, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.4s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   2.0s
[CV] END activation=logistic, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.1s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.3s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s
[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV] END activation=logistic, alpha=0.05, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s
[CV] END activation=logistic, alpha=0.01, hidden_layer_sizes=(50, 20, 10, 5); total time=   0.7s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.2s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s
[CV] END activation=logistic, alpha=0.25, hidden_layer_sizes=(80, 40, 15, 5); total time=   0.3s
[CV] END activation=tanh, alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5); total time=   1.7s


  y = column_or_1d(y, warn=True)


Score de train:  0.2857142857142857
Mejor modelo:  {'hidden_layer_sizes': (100, 50, 20, 8), 'alpha': 0.25, 'activation': 'tanh'}


In [11]:
print(model_1.best_params_)
print(model_2.best_params_)
print(model_3.best_params_)
print(model_4.best_params_)

{'max_depth': 150, 'criterion': 'gini'}
{'n_neighbors': 17}
{'kernel': 'poly', 'gamma': 1, 'C': 3}
{'hidden_layer_sizes': (100, 50, 20, 8), 'alpha': 0.25, 'activation': 'tanh'}


## **Voting = AD + K-NN + SVC + MLP**

In [12]:
model_11 = DecisionTreeClassifier(criterion='gini', max_depth=50)
model_22 = KNeighborsClassifier(n_neighbors=17)
model_33 = SVC(gamma=1, kernel='poly', C=3)
model_44 = MLPClassifier(activation='tanh', alpha=0.1, hidden_layer_sizes=(80, 40, 15, 5), max_iter=1000)

In [13]:
# Definir el ensamble Voting de clasificacion
EnsambleVoting = VotingClassifier(estimators=[('dt', model_11), ('knn', model_22), ('svc', model_33), ('MLP', model_44)], voting='hard', weights=[1,3,1,10], n_jobs=-1)

# Entrenar ensamble
EnsambleVoting.fit(x_train, y_train)

print('Score de train: ', EnsambleVoting.score(x_test, y_test))

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Score de train:  0.2571428571428571


### Validacion cruzada

In [14]:
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
# Evaluar un modelo dado usando cross-validation

def evaluate_model(model, x, y):
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    scores = cross_val_score(model, x, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
    return scores

import statistics
acu_vc_10_test_voting = statistics.mean(evaluate_model(EnsambleVoting, x_test, y_test))

print('acu cv 10 test: ', acu_vc_10_test_voting)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.clas

acu cv 10 test:  0.3388888888888889


# **MODELO BAGGING: RF, AD, K-NN, SVC, MLP**

In [15]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier

### Bagging con **BOSQUE ALEATORIO**

In [16]:
EnsambleBaggingRF = RandomForestClassifier(n_estimators=300, criterion='gini', max_depth=80)
EnsambleBaggingRF.fit(x_train, y_train)

print('Score de testing: ', EnsambleBaggingRF.score(x_test, y_test))

  EnsambleBaggingRF.fit(x_train, y_train)


Score de testing:  0.6285714285714286
