# Obtención de datos

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
from sklearn.datasets import load_breast_cancer
cancer_dataset = load_breast_cancer()
df = pd.DataFrame(cancer_dataset.data, columns = cancer_dataset.feature_names)
df['Target'] = cancer_dataset.target[df.index]
df.head()

In [None]:
x = df.drop("Target", axis = 1)
y = df.Target

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.30, random_state=100)

# Usando el algoritmo GradientBoost

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html#sklearn.ensemble.GradientBoostingClassifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators = 100, random_state = 100)

In [None]:
clf.fit(x_train, y_train)

In [None]:
y_train_predict = clf.predict(x_train)
y_test_predict = clf.predict(x_test)

In [None]:
clf.estimators_

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_train, y_train_predict))
print(classification_report(y_test, y_test_predict))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(y_train, y_train_predict)
sns.heatmap(cm, linewidth = 0.5, annot = True, cmap = 'Reds', fmt = 'g')
plt.ylabel('Valores predecidos')
plt.xlabel('Valores reales')
plt.title('Comparación con valores de entrenamiento')
plt.show()

In [None]:
cm = confusion_matrix(y_test, y_test_predict)
sns.heatmap(cm, linewidth = 0.5, annot = True, cmap = 'Reds', fmt = 'g')
plt.ylabel('Valores predecidos')
plt.xlabel('Valores reales')
plt.title('Comparación con valores de prueba')
plt.show()

In [None]:
feature_imp = pd.Series(clf.feature_importances_, index = x.columns)
feature_imp = feature_imp.sort_values(ascending = False)

plt.figure(figsize=(6,15))
sns.barplot(x = feature_imp, y = feature_imp.index)
plt.xlabel('Importancia')
plt.ylabel('Característica')
plt.title("Visualizando la importancia de cada Característica")
plt.show()