In [1]:
import pandas as pd
import numpy as np
# import data_sampling as ds
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score
import time
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler

data_iris = pd.read_csv("iris.csv")
data_glass = pd.read_csv("glass.data")

# to store the models and their results (to report)
models, results, tempos = list(), list(), list()

print('Iris:', np.shape(data_iris))
print('Glass:', np.shape(data_glass))

Iris: (150, 5)
Glass: (214, 11)


In [2]:
data_glass = shuffle(data_glass)
data_glass.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,target
28,29,1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.0,0.0,1
78,79,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.0,0.14,2
110,111,1.52664,11.23,0.0,0.77,73.21,0.0,14.68,0.0,0.0,2
13,14,1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.0,0.17,1
61,62,1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.0,1


In [3]:
data_iris = shuffle(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
106,4.9,2.5,4.5,1.7,Virginica
23,5.1,3.3,1.7,0.5,Setosa
59,5.2,2.7,3.9,1.4,Versicolor
54,6.5,2.8,4.6,1.5,Versicolor
80,5.5,2.4,3.8,1.1,Versicolor


In [4]:
y = data_glass['target']
data_glass = data_glass.drop(columns = ['target', 'a'])

In [5]:
y_iris = data_iris['variety']
data_iris = data_iris.drop(columns = ['variety'])

In [6]:
# Normalizing the Glass dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_glass)
data_glass[:] = scaler.transform(data_glass)
data_glass.head()

Unnamed: 0,b,c,d,e,f,g,h,i,j
28,-0.226229,-1.040699,0.580575,-0.029927,0.645844,0.112107,-0.293672,-0.352877,-0.586451
78,-0.737821,0.628641,0.580575,-0.391299,0.296435,-0.195271,-0.716259,-0.352877,0.853719
110,2.731101,-2.673215,-1.865511,-1.354957,0.72349,-0.763919,4.030806,-0.352877,-0.586451
13,-0.292241,-0.672462,0.608372,-0.351146,0.72349,0.066,-0.406362,-0.352877,1.162326
61,0.463594,0.493621,0.62227,-0.250765,-1.204728,-0.579492,-0.202111,1.038095,-0.586451


In [7]:
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='linear', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='linear', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]

models.extend(all_models)

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=2019)    

Running models for Glass dataset:

In [9]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_glass)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y.iloc[train_index].copy(), y.iloc[test_index]
        X_train, X_valid = data_glass.iloc[train_index,:].copy(), data_glass.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred, labels=[1,2,3,5,6,7]))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    print('Mean traning model time: ', np.mean(times))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
[[ 9  3  2  0  0  0]
 [ 3 11  2  1  0  0]
 [ 0  0  1  0  0  0]
 [ 0  0  0  1  0  1]
 [ 0  1  0  0  1  0]
 [ 0  1  0  0  1  5]]
[[13  4  0  0  0  0]
 [ 2 12  1  0  0  0]
 [ 1  2  0  0  0  0]
 [ 0  0  0  1  0  0]
 [ 0  1  0  0  0  0]
 [ 0  0  0  0  0  6]]
[[ 5  4  1  0  1  0]
 [ 0 12  1  2  1  0]
 [ 2  2  0  0  0  0]
 [ 0  1  0  3  0  0]
 [ 0  1  0  0  1  0]
 [ 1  1  0  0  0  4]]
[[11  4  1  0  0  0]
 [ 4 10  0  0  1  0]
 [ 4  0  0  0  0  0]
 [ 0  1  0  1  0  0]
 [ 0  0  0  0  0  1]
 [ 0  0  0  0  0  5]]
[[ 9  1  2  0  0  0]
 [ 1 10  0  1  1  0]
 [ 2  2  1  0  0  0]
 [ 0  1  0  3  0  0]
 [ 1  0  0  1  0  1]
 [ 1  1  0  1  1  1]]
====
Final results: 
Mean accuracy: 0.63

In [10]:
data = {'Model': models, 'Accuracy': results, 'Time': tempos}
dataframe = pd.DataFrame(data=data)
dataframe = dataframe.sort_values(by=['Accuracy'], ascending=False)
dataframe

Unnamed: 0,Model,Accuracy,Time
3,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.705205,0.003544
6,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.705205,0.00502
2,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.644518,0.004262
5,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.644518,0.009673
0,"MLPClassifier(activation='logistic', alpha=1e-...",0.635216,0.148784
1,"Perceptron(alpha=0.0001, class_weight=None, ea...",0.560465,0.003124
4,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.555814,0.003393
7,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.555814,0.003353


In [11]:
dataframe.to_csv('glass_results.csv',index=False)

In [12]:
# Normalizing the Iris dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_iris)
data_iris[:] = scaler.transform(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
106,-1.143017,-1.282963,0.421734,0.659038
23,-0.900681,0.558611,-1.169714,-0.920548
59,-0.779513,-0.82257,0.080709,0.264142
54,0.795669,-0.592373,0.478571,0.395774
80,-0.41601,-1.51316,0.023872,-0.130755


In [13]:
models, results, tempos = list(), list(), list()

In [14]:
# Restarting the models (would them keep the parameters of last run?)
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='linear', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='linear', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]
models.extend(all_models)

Running models for Iris dataset:

In [15]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_iris)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y_iris.iloc[train_index].copy(), y_iris.iloc[test_index]
        X_train, X_valid = data_iris.iloc[train_index,:].copy(), data_iris.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))
    print('Mean traning model time: ', np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
[[11  0  0]
 [ 0 12  0]
 [ 0  0  7]]
[[10  0  0]
 [ 0 10  0]
 [ 0  1  9]]
[[10  0  0]
 [ 0  8  1]
 [ 0  1 10]]
[[13  1  0]
 [ 0  9  2]
 [ 0  0  5]]
[[ 5  0  0]
 [ 0  8  0]
 [ 0  2 15]]
====
Final results: 
Mean accuracy: 0.9466666666666667
Mean traning model time:  0.08163471221923828


      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=5, shuffle=True, tol=0.001,
[[11  0  0]
 [ 2  7  3]
 [ 0  0  7]]
[[10  0  0]
 [ 9  1  0]
 [ 0  0 10]]
[[10  0  0]
 [ 0  8  1]
 [ 0  1 10]]
[[14  0  0]
 [ 2  7  2]
 [ 0  0  5]]
[[ 5  0  0]
 [ 0  7  1]
 [ 0  2 15]]
====
Final results: 
Mean accuracy: 0.8466666666666667

In [16]:
data = {'Model': models, 'Accuracy': results, 'Time': tempos}
dataframe = pd.DataFrame(data=data)
dataframe = dataframe.sort_values(by=['Accuracy'], ascending=False)
dataframe

Unnamed: 0,Model,Accuracy,Time
2,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.966667,0.002053
5,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.966667,0.013823
3,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.96,0.002603
6,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.96,0.020016
0,"MLPClassifier(activation='logistic', alpha=1e-...",0.946667,0.081635
4,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.906667,0.003475
7,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.906667,0.002147
1,"Perceptron(alpha=0.0001, class_weight=None, ea...",0.846667,0.002655


In [17]:
dataframe.to_csv('iris_results.csv',index=False)