In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import common as com

from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.datasets import fetch_openml

In [2]:
#data = fetch_openml(name='mfeat-karhunen')
#data = fetch_openml(name='heart-statlog')
data = fetch_openml(name='vehicle')
#data = fetch_openml(name='sonar')
#data = fetch_openml(name='glass')
#data = fetch_openml(name='ecoli')
#data = fetch_openml(name='yeast')
#data = fetch_openml(name='splice')
#data = fetch_openml(name='vowel')
#data = fetch_openml(name='credit-g')
#data = fetch_openml(name='spambase')
label_names = np.unique(data.target)
x = data.data
y = np.zeros(data.target.shape[0], dtype=int)
for k, k_label in enumerate(label_names):
    y[data.target == k_label] = k
    
train_ratio = 0.8
validation_ratio = 0.2

print("Número de padrões: %d" % x.shape[0])
print("Número de atributos: %d" % x.shape[1])
print("Número de classes: %d" % np.unique(y).shape[0])    

Número de padrões: 846
Número de atributos: 18
Número de classes: 4


  " {version}.".format(name=name, version=res[0]['version']))


## Divisão Treino Teste

In [3]:
def div_train_test(x,y,normalize_data = True):
    
    number_train = int(np.ceil(train_ratio*(x.shape[0])))
    random_index = np.random.permutation(x.shape[0])
    train_index = random_index[:number_train+1]
    test_index = random_index[number_train+1:]

    x_train = x[train_index]
    x_test = x[test_index]
    y_train = y[train_index]
    y_test = y[test_index]
    
    
    if normalize_data:
        x_mean = np.mean(x_train, axis=0)        
        x_train -= x_mean
        x_std = np.std(x_train, axis=0)
        index_not_zero = x_std > 0
        x_train[:,index_not_zero] /= x_std[index_not_zero]

        x_test -= x_mean
        x_test[:,index_not_zero] /= x_std[index_not_zero]
    
    #print("Número de padrões de treinamento: %d" % len(y_train))
    #print("Número de padrões de teste: %d" % len(y_test))
    
    return x_train,x_test,y_train,y_test


## Análise utilizando acurácia

In [None]:
models_names = ['KNN', 'MLP', 'SVM', 'KNN-Bagging', 'MLP-Bagging']

models_scores = {}

for model_name in models_names:
    
    models_scores[model_name] = { 'train':[], 'test':[] }

for i in range(50):
    
    knn = KNeighborsClassifier(n_neighbors=3)
    mlp = MLPClassifier(hidden_layer_sizes=20,activation='tanh',alpha=10**-2,batch_size=10)
    svm = SVC(gamma='auto')
    knn_bagging = BaggingClassifier(knn, n_estimators=10,
                              max_samples=1.0, n_jobs=-1, random_state=12345)
    mlp_bagging = BaggingClassifier(mlp, n_estimators=10,
                              max_samples=1.0, n_jobs=-1, random_state=12345)
    
    models = [knn,mlp,svm,knn_bagging,mlp_bagging]
    
    x_train,x_test,y_train,y_test = div_train_test(x,y)
    
    for model,model_name in zip(models,models_names):
        
        model.fit(x_train,y_train)
        
        model_pred = model.predict(x_test)
        
        model_score_train = model.score(x_train,y_train)
        
        model_score_test = model.score(x_test,y_test)
        
        models_scores[model_name]['train'].append(model_score_train)
        
        models_scores[model_name]['test'].append(model_score_test)

        
        
for model_name in models_names:
    
    train_mean = np.mean(models_scores[model_name]['train'])
    test_mean = np.mean(models_scores[model_name]['test'])
    
    train_std = np.std(models_scores[model_name]['train'])
    test_std = np.std(models_scores[model_name]['test'])
    
    print("[%s]: Média treino: %.2f%% - Média teste: %.2f%% - Desvio treino: %.2f%% - Desvio teste: %.2f%%" % (model_name, train_mean,test_mean,train_std,test_std))
        

