#  <center>Models</center>

In [17]:
# !pip install graphviz

Collecting graphviz
  Downloading graphviz-0.19.1-py3-none-any.whl (46 kB)
Installing collected packages: graphviz
Successfully installed graphviz-0.19.1


In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.tree import export_graphviz
import graphviz

%run utils.ipynb

class Models:

    def __init__(self):

        self.reg = {
            'DecisionTree': DecisionTreeClassifier(),
            'RandomForest': RandomForestClassifier(n_estimators=200 , n_jobs=-1),
        }

        self.params = {
            'DecisionTree' : {
                'max_depth': np.arange(2,6), 
                'max_leaf_nodes': np.arange(2,6)
            }, 'RandomForest' : {
                'min_samples_split': np.arange(2,6),
                'max_depth': np.arange(2,6)
            }
        }

    def grid_training(self, X, y):
        best_score = 999
        best_model = None
        
        for name, reg in self.reg.items():
            grid_reg = GridSearchCV(reg, self.params[name], cv=5).fit(X, y.values.ravel())
            score = np.abs(grid_reg.best_score_)

            if score < best_score:
                best_score = score
                best_model = grid_reg.best_estimator_

        utils = Utils()
        utils.model_export(best_model, best_score, best_model)
        
        return grid_reg
        
    def model_results(self, X, y, grid_reg):
        
        x_train, x_test, y_train, y_test = utils.dataset_split(X, y)
        
        model = grid_reg.best_estimator_
        print(model)
        model.fit(x_train, y_train.values.ravel())
        y_fit = model.predict(x_test)
        
        print("="*100)
        
        print(classification_report(y_test,y_fit))
        cm = confusion_matrix(y_test,y_fit)
        
        print("="*100)

        plt.figure(figsize=(5,5))
        sns.heatmap(cm.T ,square=True , annot=True, cbar=False, xticklabels=True)
        plt.xlabel('True label')
        plt.ylabel('Predicted label')
        plt.show()
        
        print("="*100)
        
        n_features = len(X.columns)
        plt.figure(figsize=(12,8))
        plt.barh(range(n_features), model.feature_importances_, align='center')
        plt.yticks(np.arange(n_features), X.columns.to_list())
        plt.xlabel("Feature importance")
        plt.ylabel("Feature")
        plt.grid()
        plt.show()
        
#         print("="*100)
        
#         export_graphviz(model, out_file="breast_cancer_tree.dot", feature_names=X.columns, class_names=['Benigno','Maligno'], rounded=True, filled=True)
        
#         with open("breast_cancer_tree.dot") as f:
#             dot_graph = f.read()
        
#         graphviz.Source(dot_graph)