## Procesamiento de datos

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import model_estimator

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [2]:
dataset = pd.read_csv("Data/Tx_0x04.csv")

### Separación del conjunto de datos

In [3]:
def splitData(dataset, validation_size):
    vector = dataset.values
    X = vector[:, 0:dataset.shape[1] - 1] # Features
    Y = vector[:, dataset.shape[1] - 1] # Target

    return train_test_split(X, Y, test_size=validation_size)

In [4]:
X_train, X_test, y_train, y_test = splitData(dataset, 0.20)

## Selección de estimadores

In [5]:
model_DT = { 
    'DecisionTree': DecisionTreeClassifier()
}

# Parametros de los modelos para el Test 
# DT: 2*2*3*2*4 = 96
params_DT = {
    'DecisionTree': { 
        'class_weight': ['balanced', None],
        'criterion': ['entropy', 'gini'],
        'max_features': ['sqrt', 'log2', None],
        'min_samples_split': [2, 4, 6, 8],
        'splitter': ['best', 'random']
    }
}

In [6]:
model_SVC = { 
    'SVC': SVC()
}

# Parametros de los modelos para el Test 
# SVC: 4*4*4*2*2*2 = 512
params_SVC = {
    'SVC': {
        'kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 
        'C': [0.5, 1, 1.5, 3], 
        'degree': [2, 3, 4, 5],
        'probability': [True, False],
        'shrinking': [True, False],
        'decision_function_shape': ['ovo', 'ovr'],
        #'max_iter': [300]
    }
}

In [7]:
model_kNN = {
    'kNN': KNeighborsClassifier()
}

# Parametros de los modelos para el Test
# KNN: 10*2*3*5*9=2700
params_kNN = {
    'kNN': { 
        'n_neighbors': list(range(1,11)),
        'weights': ['uniform', 'distance'], 
        'algorithm': ['ball_tree','kd_tree','brute'], 
        'p': [1, 2, 3, 4, 5],
        'leaf_size': list(range(10, 51, 5))
    }
}

## Testing búsqueda exhaustiva

### DT

In [8]:
helperExh_DT = model_estimator.EstimatorSelection(model_DT, params_DT)
helperExh_DT.fitModel('Exh', X_train, y_train, scoring='accuracy', n_jobs=8)

Ejecutando la búsqueda exhaustiva para el modelo DecisionTree ....
Fitting 10 folds for each of 96 candidates, totalling 960 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    2.8s
[Parallel(n_jobs=8)]: Done 328 tasks      | elapsed:    5.3s
[Parallel(n_jobs=8)]: Done 945 out of 960 | elapsed:    8.8s remaining:    0.1s
[Parallel(n_jobs=8)]: Done 960 out of 960 | elapsed:    8.9s finished


In [9]:
print("Tiempo de ejecución (seg): "+str(helperExh_DT.timeModel['DecisionTree']))
helperExh_DT.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)

Tiempo de ejecución (seg): [8.932374477386475]


Unnamed: 0,Unnamed: 1,.Accuracy,.Error,class_weight,criterion,max_features,min_samples_split,splitter
DecisionTree,64,0.784983,0.009773,,entropy,,2,best
DecisionTree,88,0.78115,0.016511,,gini,,2,best
DecisionTree,16,0.779756,0.015086,balanced,entropy,,2,best
DecisionTree,40,0.77732,0.017415,balanced,gini,,2,best
DecisionTree,92,0.776795,0.019015,,gini,,6,best
DecisionTree,22,0.776791,0.014213,balanced,entropy,,8,best
DecisionTree,18,0.776271,0.013492,balanced,entropy,,4,best
DecisionTree,66,0.775923,0.01287,,entropy,,4,best
DecisionTree,72,0.775752,0.015302,,gini,sqrt,2,best
DecisionTree,20,0.775398,0.010277,balanced,entropy,,6,best


### SVC

In [10]:
#helperExh_SVC = model_estimator.EstimatorSelection(model_SVC, params_SVC)
#helperExh_SVC.fitModel('Exh', X_train, y_train, scoring='accuracy', n_jobs=4, population_size=20, generations_number=10)

In [11]:
#print("Tiempo de ejecución (seg): "+str(helperExh_SVC.timeModel['SVC']))
#helperExh_SVC.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)

### kNN

In [12]:
#helperExh_kNN = model_estimator.EstimatorSelection(model_kNN, params_kNN)
#helperExh_kNN.fitModel('Exh', X_train, y_train, scoring='accuracy', n_jobs=4, population_size=20, generations_number=10)

In [13]:
#print("Tiempo de ejecución (seg): "+str(helperExh_kNN.timeModel['kNN']))
#helperExh_kNN.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)

## Testing búsqueda aleatoria

### DT

In [14]:
helperRnd_DT = model_estimator.EstimatorSelection(model_DT, params_DT)
helperRnd_DT.fitModel('Rdn', X_train, y_train, scoring='accuracy', n_jobs=8)

Ejecutando la búsqueda aleatoria para el modelo DecisionTree ...
Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  53 tasks      | elapsed:    0.6s
[Parallel(n_jobs=8)]: Done  85 out of 100 | elapsed:    0.8s remaining:    0.1s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.8s finished


In [15]:
print("Tiempo de ejecución (seg): "+str(helperRnd_DT.timeModel['DecisionTree']))
helperRnd_DT.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)

Tiempo de ejecución (seg): [0.8321702480316162]


Unnamed: 0,Unnamed: 1,.Accuracy,.Error,splitter,min_samples_split,max_features,criterion,class_weight
DecisionTree,7,0.76442,0.014069,best,8,log2,gini,
DecisionTree,1,0.758327,0.01764,best,8,log2,entropy,
DecisionTree,6,0.756061,0.021019,random,2,,entropy,balanced
DecisionTree,5,0.747169,0.019127,random,4,,entropy,balanced
DecisionTree,8,0.744206,0.00812,random,8,,gini,
DecisionTree,2,0.726255,0.017583,random,2,sqrt,gini,balanced
DecisionTree,4,0.717549,0.012898,random,6,sqrt,gini,balanced
DecisionTree,9,0.706399,0.023669,random,8,log2,gini,
DecisionTree,3,0.705181,0.025659,random,6,log2,entropy,balanced
DecisionTree,0,0.69891,0.022986,random,6,sqrt,entropy,balanced


### SVC

In [16]:
#helperRdn_SVC = model_estimator.EstimatorSelection(model_SVC, params_SVC)
#helperRdn_SVC.fitModel('Rdn', X_train, y_train, scoring='accuracy', n_jobs=4, population_size=20, generations_number=10)

In [17]:
#print("Tiempo de ejecución (seg): "+str(helperRdn_SVC.timeModel['SVC']))
#helperRdn_SVC.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)

### kNN

In [18]:
helperRdn_kNN = model_estimator.EstimatorSelection(model_kNN, params_kNN)
helperRdn_kNN.fitModel('Rdn', X_train, y_train, scoring='accuracy', n_jobs=4, population_size=20, generations_number=10)

Ejecutando la búsqueda aleatoria para el modelo kNN ...
Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   15.7s
[Parallel(n_jobs=4)]: Done 100 out of 100 | elapsed:   21.2s finished


In [19]:
print("Tiempo de ejecución (seg): "+str(helperRdn_kNN.timeModel['kNN']))
helperRdn_kNN.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)

Tiempo de ejecución (seg): [21.60860013961792]


Unnamed: 0,Unnamed: 1,.Accuracy,.Error,weights,p,n_neighbors,leaf_size,algorithm
kNN,7,0.839001,0.016138,distance,1,6,40,brute
kNN,6,0.823668,0.016183,uniform,1,10,20,ball_tree
kNN,0,0.82175,0.016726,uniform,1,10,30,brute
kNN,8,0.820877,0.016387,distance,1,2,45,ball_tree
kNN,9,0.816521,0.017589,distance,2,8,10,ball_tree
kNN,1,0.805369,0.014895,distance,5,5,45,brute
kNN,2,0.80415,0.016882,distance,4,7,15,brute
kNN,4,0.800668,0.016475,uniform,4,3,40,kd_tree
kNN,3,0.799793,0.017546,distance,4,8,25,kd_tree
kNN,5,0.788469,0.018697,uniform,3,7,50,ball_tree


## Testing búsqueda evolutiva

### DT

In [20]:
helperEvol_DT = model_estimator.EstimatorSelection(model_DT, params_DT)
helperEvol_DT.fitModel('Evol', X_train, y_train, scoring='accuracy', n_jobs=8, population_size=10, generations_number=9)

Ejecutando la búsqueda evolutiva para el modelo DecisionTree ...
Tipos: [1, 1, 1, 1, 1], rangos: [1, 1, 2, 3, 1]
--- El modelo evoluciona en 96 posibles combinaciones ---
gen	nevals	avg     	min     	max     	std      
0  	10    	0.750131	0.700819	0.775919	0.0220324
1  	5     	0.76773 	0.755184	0.775919	0.005892 
2  	6     	0.771546	0.765464	0.77679 	0.00489471
3  	6     	0.774525	0.767207	0.77679 	0.00352047
4  	5     	0.776459	0.775745	0.777139	0.000564354
5  	8     	0.776529	0.775745	0.777139	0.000573956
6  	2     	0.776965	0.776268	0.777139	0.000348493
7  	7     	0.775867	0.764767	0.777836	0.00373314 
8  	6     	0.777017	0.776093	0.777836	0.000821179
9  	2     	0.775327	0.756229	0.777836	0.00640279 


In [21]:
print("Tiempo de ejecución (seg): "+str(helperEvol_DT.timeModel['DecisionTree']))
helperEvol_DT.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)

Tiempo de ejecución (seg): [3.5189762115478516]


Unnamed: 0,Unnamed: 1,.Accuracy,.Error,class_weight,criterion,max_features,min_samples_split,splitter
DecisionTree,45,0.777836,0.0,,gini,,6,best
DecisionTree,44,0.777836,0.0,,gini,,6,best
DecisionTree,43,0.777836,0.0,,gini,,6,best
DecisionTree,40,0.777836,0.0,,gini,,6,best
DecisionTree,23,0.777139,0.0,,gini,,6,best
DecisionTree,35,0.777139,0.0,,gini,,6,best
DecisionTree,25,0.777139,0.0,,gini,,6,best
DecisionTree,33,0.777139,0.0,,gini,,6,best
DecisionTree,31,0.777139,0.0,,gini,,6,best
DecisionTree,29,0.777139,0.0,,gini,,6,best


### SVC

In [22]:
#helperEvol_SVM = model_estimator.EstimatorSelection(model_SVC, params_SVC)
#helperEvol_SVM.fitModel('Evol', X_train, y_train, scoring='accuracy', n_jobs=4, population_size=20, generations_number=10)

In [23]:
#print("Tiempo de ejecución (seg): "+str(helperEvol_SVM.timeModel['SVC']))
#helperEvol_SVM.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)

### kNN

In [24]:
#helperEvol_kNN = model_estimator.EstimatorSelection(model_kNN, params_kNN)
#helperEvol_kNN.fitModel('Evol', X_train, y_train, scoring='accuracy', n_jobs=4, population_size=20, generations_number=10)

In [25]:
#print("Tiempo de ejecución (seg): "+str(helperEvol_kNN.timeModel['kNN']))
#helperEvol_kNN.scoreModel().sort_values(['.Accuracy'], ascending=False).head(10)