In [1]:
import pandas as pd
import numpy as np
# import data_sampling as ds
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score
import time
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler

data_iris = pd.read_csv("iris.csv")
data_glass = pd.read_csv("glass.data")

print('Iris:', np.shape(data_iris))
print('Glass:', np.shape(data_glass))

Iris: (150, 5)
Glass: (214, 11)


In [2]:
data_glass = shuffle(data_glass)
data_glass.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,target
20,21,1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0.0,0.19,1
202,203,1.51514,14.85,0.0,2.42,73.72,0.0,8.39,0.56,0.0,7
68,69,1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0.0,0.16,1
23,24,1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.0,0.0,1
88,89,1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0.0,0.0,2


In [3]:
y = data_glass['target']
data_glass = data_glass.drop(columns = ['target', 'a'])

In [4]:
# Normalizing the dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_glass)
data_glass[:] = scaler.transform(data_glass)
data_glass.head()

Unnamed: 0,b,c,d,e,f,g,h,i,j
20,-0.28564,-0.72156,0.601422,0.090531,0.128201,0.066,-0.307758,-0.352877,1.368065
202,-1.06458,1.770175,-1.865511,1.957619,1.383484,-0.763919,-0.399319,0.776028,-0.586451
68,1.041198,-0.353324,0.62227,-1.093966,-0.583557,-0.410435,0.607848,-0.352877,1.059457
23,-0.282339,-0.733835,0.615321,-0.190536,0.47761,0.188951,-0.258456,-0.352877,-0.586451
88,-0.721318,-0.488344,0.566677,0.070454,0.309376,0.158213,-0.589483,-0.352877,-0.586451


In [5]:
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3, 6), activation='logistic', 
                            random_state=1), 
            Perceptron(tol=1e-3, random_state=2019), 
            svm.SVC(gamma='scale', kernel='linear'),
            svm.SVC(gamma='scale', kernel='rbf'), 
            svm.SVC(gamma='scale', kernel='sigmoid')]

In [6]:
kf = KFold(n_splits=5, shuffle=True, random_state=2019)    

In [7]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_glass)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y.iloc[train_index].copy(), y.iloc[test_index]
        X_train, X_valid = data_glass.iloc[train_index,:].copy(), data_glass.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred, labels=[1,2,3,5,6,7]))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    print('Mean traning model time: ', np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(3, 6), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
[[13  4  0  0  0  0]
 [ 8  4  0  2  0  1]
 [ 2  0  0  0  0  0]
 [ 0  0  0  1  0  0]
 [ 0  0  0  0  1  0]
 [ 0  0  0  0  1  6]]
[[14  2  0  0  0  0]
 [ 4  9  0  2  0  0]
 [ 1  0  0  0  0  0]
 [ 0  2  0  2  0  0]
 [ 0  0  0  0  2  0]
 [ 0  1  0  0  0  4]]
[[8 0 0 0 0 0]
 [9 9 1 0 0 0]
 [4 0 2 0 0 0]
 [0 0 0 0 0 2]
 [0 0 0 0 1 0]
 [0 0 0 0 0 7]]
[[9 5 0 1 0 0]
 [2 8 0 0 2 0]
 [3 2 0 0 0 0]
 [0 2 0 1 1 0]
 [0 0 0 0 2 0]
 [0 1 0 0 0 4]]
[[14  0  0  0  0  0]
 [ 5  8  0  1  1  0]
 [ 1  1  1  0  0  0]
 [ 0  1  0  0  0  1]
 [ 0  1  0  0  1  1]
 [ 1  0  0  0  0  4]]
====
Final results: 
Mean accuracy: 0.6310077519379844
Mean traning model time:  0.15765867233276368


      fit_