In [14]:
import extract_features
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import time
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix

%matplotlib inline

In [15]:
def plot_confusion_matrix(cm, n_classes,display_labels):
    
    fig, ax = plt.subplots(figsize=(10,10))
    cm = cm
    n_classes = n_classes
    
    im_ = ax.imshow(cm, interpolation='nearest', cmap='viridis')
    cmap_min, cmap_max = im_.cmap(0), im_.cmap(256)
    
    text_ = np.empty_like(cm, dtype=object)
    values_format = '.2g'

    # print text with appropriate color depending on background
    thresh = (cm.max() - cm.min()) / 2.
    for i, j in product(range(n_classes), range(n_classes)):
        color = cmap_max if cm[i, j] < thresh else cmap_min
        text_[i, j] = ax.text(j, i,format(cm[i, j], values_format),
                                   ha="center", va="center",
                                   color=color)
        
    fig.colorbar(im_, ax=ax)
    ax.set(xticks=np.arange(n_classes),
           yticks=np.arange(n_classes),
           xticklabels=display_labels,
           yticklabels=display_labels,
           ylabel="True label",
           xlabel="Predicted label")

    ax.set_ylim((n_classes - 0.5, -0.5))
    plt.setp(ax.get_xticklabels(), rotation='vertical')

    figure_ = fig
    ax_ = ax

In [16]:
def grid_search(X_train, y_train, models, score, cv, refit):
    
    bestmodels = dict()
    for name, value in models.items():
        
        print('*********** Model: {} ***********'.format(name))

        estimator = value['model']
        tuned_parameters = value['parameters']
        
        clf = GridSearchCV(estimator, tuned_parameters, scoring=score, cv=cv, n_jobs=-1,refit=refit)

        t_beg = time.time()
        clf.fit(X_train_scaled, y_train)
        selection_time = time.time() - t_beg

        print('El tiempo de selección fue: {:0.3f}'.format(selection_time))
        print("Los mejores parámetros son:")
        print(clf.best_params_)

        bestmodels[name] = dict()
        bestmodels[name]['bestModel'] = clf.best_estimator_
        bestmodels[name]['bestParams'] = clf.best_params_
        bestmodels[name]['bestScore'] = clf.best_score_
        bestmodels[name]['selectionTime'] = selection_time

        for metric in score:
            mean_name = 'mean_test_{}'.format(metric)
            std_name = 'std_test_{}'.format(metric)

            idx_mean_metric = np.argmax(clf.cv_results_[mean_name])
            idx_std_metric = np.argmax(clf.cv_results_[std_name])

            mean = clf.cv_results_[mean_name][idx_mean_metric]
            std = clf.cv_results_[std_name][idx_std_metric]

            bestmodels[name]['mean_{}'.format(metric)] = mean
            bestmodels[name]['std_{}'.format(metric)] = std

    return bestmodels

In [17]:
full_data = extract_features.matriz_features()
X = full_data[:,:-1]
y = full_data[:,-1]

In [18]:
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
"""
models = {
    'SVM': {
        'model':SVC(),
        'parameters':[
            {'C': [0.001,0.01,0.1,1,10], 'kernel': ['linear'], 'decision_function_shape':['ovr']},
            {'C': [0.001,0.01,0.1,1,10], 'gamma': [0.0001, 0.001,0.01,0.1,1], 'kernel': ['rbf'], 'decision_function_shape':['ovr']}
        ]
    },
    'MLP': {
        'model': MLPClassifier(),
        'parameters': [
            {'hidden_layer_sizes': [(5),(5,10,5),(5,10,20,10,5),(10),(10,20,10),(20)],
             'activation': ['tanh','relu','logistic'], 'solver': ['adam'], 'learning_rate': [0.001,0.01]
            }
        ]
    }
}
"""

models = {
    'SVM': {
        'model':SVC(),
        'parameters':[
            {'C': [0.001,0.01], 'kernel': ['linear'], 'decision_function_shape':['ovr']},
            {'C': [0.001,0.01], 'gamma': [0.001,0.01], 'kernel': ['rbf'], 'decision_function_shape':['ovr']}
        ]
    },
    'MLP': {
        'model': MLPClassifier(),
        'parameters': [
            {'hidden_layer_sizes': [(5),(5,10,5),(5,10,20,10,5)],
             'activation': ['relu'], 'solver': ['adam'], 'learning_rate': [0.001]
            }
        ]
    }
}

scoring = {'Accuracy': 'accuracy',
           'Precision': 'precision_macro',
           'Recall': 'recall_macro',
           'F1': 'f1_macro'}

refit = 'Recall'

bestmodels = grid_search(X_train_scaled,y_train,models,cv=10,score=scoring,refit=refit)

*********** Model: SVM ***********


In [None]:
bestmodels

In [None]:
for name, value in bestmodels.items():
    print('***************{}***************'.format(name))
    y_true, y_pred = y_test, value['bestModel'].predict(X_test_scaled)
    print(classification_report(y_true, y_pred))
    ####AQUI EL PLOT####
    cm = confusion_matrix(y_true, y_pred, labels=['fat','fit','half'])
    plot_confusion_matrix(cm, num_classes,['fat','fit','half'])
    ####HASTA AQUI EL PLOT####