In [1]:
import pandas as pd
import numpy as np
# import data_sampling as ds
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score
import time
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler

data_iris = pd.read_csv("iris.csv")
data_glass = pd.read_csv("glass.data")

print('Iris:', np.shape(data_iris))
print('Glass:', np.shape(data_glass))

Iris: (150, 5)
Glass: (214, 11)


In [2]:
data_glass = shuffle(data_glass)
data_glass.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,target
175,176,1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.0,0.28,5
54,55,1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.0,0.09,1
174,175,1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,5
81,82,1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.0,0.0,2
89,90,1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0.0,0.09,2


In [3]:
data_iris = shuffle(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
48,5.3,3.7,1.5,0.2,Setosa
67,5.8,2.7,4.1,1.0,Versicolor
148,6.2,3.4,5.4,2.3,Virginica
64,5.6,2.9,3.6,1.3,Versicolor
63,6.1,2.9,4.7,1.4,Versicolor


In [4]:
y = data_glass['target']
data_glass = data_glass.drop(columns = ['target', 'a'])

In [5]:
y_iris = data_iris['variety']
data_iris = data_iris.drop(columns = ['variety'])

In [6]:
# Normalizing the Glass dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_glass)
data_glass[:] = scaler.transform(data_glass)
data_glass.head()

Unnamed: 0,b,c,d,e,f,g,h,i,j
175,0.932278,-0.537442,-1.63619,0.130683,0.956429,-0.564123,1.629101,-0.352877,2.293888
54,-0.193223,-0.242853,0.087189,-0.310994,0.425846,0.019893,0.044398,-0.352877,0.339372
174,0.730942,-0.684737,-0.746704,1.455713,-0.609439,0.404115,0.52333,0.130939,4.659881
81,-0.803833,-0.193754,0.531931,-0.029927,0.671726,0.173582,-0.772604,-0.352877,-0.586451
89,-0.648705,-1.052973,0.552779,0.853427,0.749372,0.20432,-0.617656,-0.352877,0.339372


In [7]:
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=1), 
            Perceptron(tol=1e-3, random_state=2019), 
            svm.SVC(gamma='scale', kernel='linear'),
            svm.SVC(gamma='scale', kernel='rbf'), 
            svm.SVC(gamma='scale', kernel='sigmoid')]

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=2019)    

Running models for Glass dataset:

In [9]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_glass)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y.iloc[train_index].copy(), y.iloc[test_index]
        X_train, X_valid = data_glass.iloc[train_index,:].copy(), data_glass.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred, labels=[1,2,3,5,6,7]))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    print('Mean traning model time: ', np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
[[ 6  8  0  0  0  0]
 [ 3 11  0  0  0  0]
 [ 1  2  0  0  0  0]
 [ 0  1  0  1  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  1  9]]
[[10  4  0  0  0  0]
 [ 1  9  0  2  0  0]
 [ 2  2  0  0  0  0]
 [ 0  0  0  3  0  0]
 [ 0  1  0  0  1  0]
 [ 0  0  0  0  1  7]]
[[12  1  0  0  0  0]
 [ 1 15  0  1  0  0]
 [ 2  1  0  0  0  0]
 [ 0  1  0  2  0  0]
 [ 0  2  0  1  0  1]
 [ 0  0  0  0  0  3]]
[[10  4  1  0  0  0]
 [ 2 10  0  0  0  0]
 [ 3  1  0  0  0  0]
 [ 0  0  0  2  0  1]
 [ 0  0  0  0  2  0]
 [ 0  3  0  0  0  4]]
[[ 9  5  0  0  0  0]
 [ 2 15  1  2  1  0]
 [ 0  1  2  0  0  0]
 [ 0  0  0  2  0  0]
 [ 0  1  0  0  0  0]
 [ 0  0  0  0  0  1]]
====
Final results: 
Mean accuracy: 0.68

In [10]:
# Normalizing the Iris dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_iris)
data_iris[:] = scaler.transform(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
48,-0.658345,1.479398,-1.283389,-1.315444
67,-0.052506,-0.82257,0.194384,-0.262387
148,0.432165,0.788808,0.933271,1.448832
64,-0.294842,-0.362176,-0.089803,0.13251
63,0.310998,-0.362176,0.535409,0.264142


In [11]:
# Restarting the models (would them keep the parameters of last run?)
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=1939), 
            Perceptron(tol=1e-3, random_state=1945), 
            svm.SVC(gamma='scale', kernel='linear'),
            svm.SVC(gamma='scale', kernel='rbf'), 
            svm.SVC(gamma='scale', kernel='sigmoid')]

Running models for Iris dataset:

In [12]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_iris)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y_iris.iloc[train_index].copy(), y_iris.iloc[test_index]
        X_train, X_valid = data_iris.iloc[train_index,:].copy(), data_iris.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    print('Mean traning model time: ', np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1939, shuffle=True, solver='lbfgs', tol=0.0001,
[[ 9  0  0]
 [ 0  9  1]
 [ 0  1 10]]
[[13  0  0]
 [ 0 11  0]
 [ 0  0  6]]
[[11  0  0]
 [ 0  7  0]
 [ 0  1 11]]
[[ 7  0  0]
 [ 0 15  1]
 [ 0  1  6]]
[[10  0  0]
 [ 0  4  2]
 [ 0  1 13]]
====
Final results: 
Mean accuracy: 0.9466666666666667
Mean traning model time:  0.12873940467834472


      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=1945, shuffle=True,
[[ 9  0  0]
 [ 0 10  0]
 [ 0  7  4]]
[[12  1  0]
 [ 0 11  0]
 [ 0  1  5]]
[[11  0  0]
 [ 0  5  2]
 [ 0  0 12]]
[[ 7  0  0]
 [ 0 16  0]
 [ 0  2  5]]
[[10  0  0]
 [ 0  3  3]
 [ 0  1 13]]
====
Final results: 
Mean accuracy: 0.8866666666666667
Mean