In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn import model_selection
from sklearn import tree
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier


balanceScale = pd.read_table('balance-scale.data', sep=',')
print("\nDataset Balance Scale:")
balanceScale


Dataset Balance Scale:


Unnamed: 0,Class-Name,Left-Weight,Left-Distance,Light-Weight,Light-Distance
0,2,1,1,1,1
1,1,1,1,1,2
2,1,1,1,1,3
3,1,1,1,1,4
4,1,1,1,1,5
...,...,...,...,...,...
620,3,5,5,5,1
621,3,5,5,5,2
622,3,5,5,5,3
623,3,5,5,5,4


In [2]:
balanceScale.columns

Index(['Class-Name', 'Left-Weight', 'Left-Distance', 'Light-Weight',
       'Light-Distance'],
      dtype='object')

In [3]:
balanceScaleNormalized = balanceScale.values.copy()
normalizador =  StandardScaler()
balanceScale


Unnamed: 0,Class-Name,Left-Weight,Left-Distance,Light-Weight,Light-Distance
0,2,1,1,1,1
1,1,1,1,1,2
2,1,1,1,1,3
3,1,1,1,1,4
4,1,1,1,1,5
...,...,...,...,...,...
620,3,5,5,5,1
621,3,5,5,5,2
622,3,5,5,5,3
623,3,5,5,5,4


In [4]:
balanceScaleValues = balanceScale.iloc[:,0:5].values
print("\nBalance Scale features:\n")
print(balanceScaleValues)



Balance Scale features:

[[2 1 1 1 1]
 [1 1 1 1 2]
 [1 1 1 1 3]
 ...
 [3 5 5 5 3]
 [3 5 5 5 4]
 [2 5 5 5 5]]


In [5]:
balanceScaleClasses = balanceScale.iloc[:,4].values
print("\nBalance Scale classes:\n")
print(balanceScaleClasses)
print("\nBalance Scale classes shape:")
print(balanceScaleClasses.shape)


Balance Scale classes:

[1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2
 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4
 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1
 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3
 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5
 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2
 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4
 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1
 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3
 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5
 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2
 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4
 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1 2 3 4 5 1


In [6]:
kfold_treinamento = model_selection.StratifiedKFold(n_splits=5, shuffle=True, random_state=None)
kfold_ajuste_parametros = model_selection.StratifiedKFold(n_splits=3, shuffle=True, random_state=None)

In [7]:
arvore_decisao = DecisionTreeClassifier()
print("====== Iniciando busca pelos melhores parâmetros do algoritmo Arvore de decisao no dataset Balance Scale ....\n")
param_dist = {'max_depth':list(np.arange(1,10)), 'min_samples_split':list(np.arange(1,10)) ,'criterion':['entropy'], 'splitter':['best','random']}
grid_search = GridSearchCV(arvore_decisao, param_grid=param_dist, cv=kfold_ajuste_parametros, scoring='accuracy', refit=False)
grid_search.fit(balanceScaleValues, balanceScaleClasses)
decisionTreeBestParams = grid_search.best_params_
print("Decision Tree: %s \n\n" % decisionTreeBestParams)


Decision Tree: {'criterion': 'entropy', 'max_depth': 3, 'splitter': 'best'} 




In [8]:
knn_vizinhos_proximos = KNeighborsClassifier()
print("====== Iniciando busca pelos melhores parâmetros do algoritmo KNN no dataset Balance Scale ....\n")
param_dist = {'n_neighbors':[3,5,7,9], 'metric':['euclidean', 'manhattan'], 'weights':['uniform', 'distance']} #Parâmetros testados
grid_search = GridSearchCV(knn_vizinhos_proximos, param_grid=param_dist, cv=kfold_ajuste_parametros, scoring='accuracy', refit=False)
grid_search.fit(balanceScaleValues, balanceScaleClasses)
knnBestParams = grid_search.best_params_
print("KNN: %s \n\n" % knnBestParams)


KNN: {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'distance'} 




In [9]:
regressao_logistica = LogisticRegression()
print("====== Iniciando busca pelos melhores parâmetros do algoritmo Regressao logistica no dataset Balance Scale ....\n")
param_dist = {'penalty':['l2','l1'], 'max_iter':[200,300,400,500],'solver':['liblinear','sag','saga']} #Parâmetros testados
grid_search = GridSearchCV(regressao_logistica, param_grid=param_dist, cv=kfold_ajuste_parametros, scoring='accuracy', refit=False)
grid_search.fit(balanceScaleValues, balanceScaleClasses)
regressao_logistica_param = grid_search.best_params_
print("Regressão logistica: %s \n\n" % regressao_logistica_param)



Regressão logistica: {'max_iter': 300, 'penalty': 'l2', 'solver': 'lbfgs', 'tol': 0.0001} 




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
rede_neural = MLPClassifier()
print("====== Iniciando busca pelos melhores parâmetros do algoritmo Redes neurais no dataset Balance Scale ....\n")
param_dist = {'hidden_layer_sizes':list(np.arange(10,20)),'activation':['logistic','identity','tanh'], 'max_iter':[100,200,300,400],'alpha':[0.0001,0.001, 0.01, 0.1], 'solver':['sgd','adam']} #Parâmetros testados
grid_search = GridSearchCV(rede_neural, param_grid=param_dist, cv=kfold_ajuste_parametros, scoring='accuracy', refit=False)
grid_search.fit(balanceScaleValues, balanceScaleClasses)
redes_neurais_param = grid_search.best_params_
print("Redes neurais: %s \n\n" % redes_neurais_param)







In [None]:
kf = model_selection.StratifiedKFold(n_splits = 5)
arvore_decisao = tree.DecisionTreeClassifier(criterion='entropy', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, class_weight=None, presort='deprecated', ccp_alpha=0.0)
vizinhos_proximos = KNeighborsClassifier(n_neighbors=3, weights = 'distance')
naive_bayes_gaussian = GaussianNB()
regressao_logistica = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='saga', max_iter=100, multi_class='auto', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)
rede_neural = MLPClassifier(hidden_layer_sizes=(5,), activation="logistic", max_iter= 300, alpha=0.001, solver="sgd", tol=1e-4, verbose=True, learning_rate_init=.01)

In [None]:
predicted_classes = dict()
predicted_classes['arvore_decisao'] = np.zeros(balanceScaleClasses.shape)
predicted_classes['vizinhos_proximos'] = np.zeros(balanceScaleClasses.shape)
predicted_classes['naive_bayes_gaussian'] = np.zeros(balanceScaleClasses.shape)
predicted_classes['regressao_logistica'] = np.zeros(balanceScaleClasses.shape)
predicted_classes['rede_neural'] = np.zeros(balanceScaleClasses.shape)

In [None]:
for train, test in kf.split(balanceScaleValues, balanceScaleClasses):
    data_train, target_train = balanceScaleValues[train], balanceScaleClasses[train]
    data_test, target_test = balanceScaleValues[test], balanceScaleClasses[test]

    arvore_decisao = arvore_decisao.fit(data_train, target_train)
    arvore_decisao_predicted = arvore_decisao.predict(data_test)
    predicted_classes['arvore_decisao'][test] = arvore_decisao_predicted

    vizinhos_proximos = vizinhos_proximos.fit(data_train, target_train)
    vizinhos_proximos_predicted = vizinhos_proximos.predict(data_test)
    predicted_classes['vizinhos_proximos'][test] = vizinhos_proximos_predicted

    naive_bayes_gaussian = naive_bayes_gaussian.fit(data_train, target_train)
    naive_bayes_gaussian_predicted = naive_bayes_gaussian.predict(data_test)
    predicted_classes['naive_bayes_gaussian'][test] = naive_bayes_gaussian_predicted

    regressao_logistica = regressao_logistica.fit(data_train, target_train)
    regressao_logistica_predicted = regressao_logistica.predict(data_test)
    predicted_classes['regressao_logistica'][test] = regressao_logistica_predicted

    rede_neural = rede_neural.fit(data_train, target_train)
    rede_neural_predicted = rede_neural.predict(data_test)
    predicted_classes['rede_neural'][test] = rede_neural_predicted

In [None]:
for classificador in predicted_classes.keys():
    print("================================================================================================")
    print("Resultados do classificador %s\n%s\n"
          %(classificador, metrics.classification_report(balanceScaleClasses, predicted_classes[classificador])))
    print("Matriz de confusão: \n%s\n\n\n" % metrics.confusion_matrix(balanceScaleClasses, predicted_classes[classificador]))


