#  <center>Models</center>
Este Script tiene como función contener los procesos relacionados con Machine Learning.


In [3]:
# !pip install graphviz
# !pip install pydot
# !pip install pydotplus

Collecting pydotplus
  Downloading pydotplus-2.0.2.tar.gz (278 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pydotplus
  Building wheel for pydotplus (setup.py): started
  Building wheel for pydotplus (setup.py): finished with status 'done'
  Created wheel for pydotplus: filename=pydotplus-2.0.2-py3-none-any.whl size=24575 sha256=fbd618edca416d0b7f8abb519a74331892ee4a3892779b589511e2550a7dea9b
  Stored in directory: c:\users\sebastian\appdata\local\pip\cache\wheels\69\b2\67\08f0eef649af92df772c09f451558298e07fab1bc7cdf33db0
Successfully built pydotplus
Installing collected packages: pydotplus
Successfully installed pydotplus-2.0.2


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier # Bagging

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.tree import export_graphviz
import graphviz

from subprocess import check_call
from pydotplus import graph_from_dot_data


%run utils.ipynb

class Models:

    def __init__(self):

        self.reg = {
            'DecisionTree': DecisionTreeClassifier(),
            'RandomForest': RandomForestClassifier(n_estimators=200 , n_jobs=-1),
        }

        self.params = {
            'DecisionTree' : {
                'max_depth': np.arange(2,6), # Cuantas preguntas puede hacer
                'max_leaf_nodes': np.arange(2,6), # Cuantas hojas quedan al final
                'min_samples_split': np.arange(2,6) # Numero de muestras minimo para poder separ el nodo
            }, 'RandomForest' : {
                'min_samples_split': np.arange(2,6), 
                'max_leaf_nodes': np.arange(2,6),
                'max_depth': np.arange(2,6)
            }
        }

    def grid_training(self, X, y):
        best_score = 999
        best_model = None
        
        for name, reg in self.reg.items():
            grid_reg = GridSearchCV(reg, self.params[name], cv=5).fit(X, y.values.ravel())
            score = np.abs(grid_reg.best_score_)

            if score < best_score:
                best_score = score
                best_model = grid_reg.best_estimator_

        utils = Utils()
        utils.model_export(best_model, best_score, best_model)
        
        return grid_reg
        
    def model_results(self, X, y, grid_reg):
        
        x_train, x_test, y_train, y_test = utils.dataset_split(X, y)
        
        model = grid_reg.best_estimator_
        print(model)
        model.fit(x_train, y_train.values.ravel())
        y_fit = model.predict(x_test)
        
        print("="*100)
        
        print(classification_report(y_test,y_fit))
        cm = confusion_matrix(y_test,y_fit)
        
        print("="*100)

        plt.figure(figsize=(5,5))
        sns.heatmap(cm.T ,square=True , annot=True, cbar=False, xticklabels=True)
        plt.xlabel('True label')
        plt.ylabel('Predicted label')
        plt.show()
        
        print("="*100)
        
        n_features = len(X.columns)
        plt.figure(figsize=(12,8))
        plt.barh(range(n_features), model.feature_importances_, align='center')
        plt.yticks(np.arange(n_features), X.columns.to_list())
        plt.xlabel("Feature importance")
        plt.ylabel("Feature")
        plt.grid()
        plt.show()
        
        utils.print_min_tree_feature_importance(X, model, 0.06)
        
        print("="*100)
        
        estimator = model.estimators_[10]
#         print(estimator)
        
        export_graphviz(estimator , out_file="..//out//breast_cancer_tree.dot", class_names=['Benign','Malingnant'],
                        feature_names=X.columns, impurity=False, filled=True)
                     
#         with open("..//out//breast_cancer_tree.dot") as f:
#             dot_graph = f.read()
            
#         graphviz.Source(dot_graph)