In [1]:
import pandas as pd
import numpy as np
# import data_sampling as ds
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score
import time
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler

data_iris = pd.read_csv("iris.csv")
data_glass = pd.read_csv("glass.data")

# to store the models and their results (to report)
models, results, tempos = list(), list(), list()

print('Iris:', np.shape(data_iris))
print('Glass:', np.shape(data_glass))

Iris: (150, 5)
Glass: (214, 11)


In [2]:
data_glass = shuffle(data_glass)
data_glass.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,target
77,78,1.51627,13.0,3.58,1.54,72.83,0.61,8.04,0.0,0.0,2
51,52,1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0.0,0.11,1
20,21,1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0.0,0.19,1
183,184,1.51969,14.56,0.0,0.56,73.48,0.0,11.22,0.0,0.0,6
7,8,1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.0,0.0,1


In [3]:
data_iris = shuffle(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
1,4.9,3.0,1.4,0.2,Setosa
14,5.8,4.0,1.2,0.2,Setosa
95,5.7,3.0,4.2,1.2,Versicolor
137,6.4,3.1,5.5,1.8,Virginica
7,5.0,3.4,1.5,0.2,Setosa


In [4]:
y = data_glass['target']
data_glass = data_glass.drop(columns = ['target', 'a'])

In [5]:
y_iris = data_iris['variety']
data_iris = data_iris.drop(columns = ['variety'])

In [6]:
# Normalizing the Glass dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_glass)
data_glass[:] = scaler.transform(data_glass)
data_glass.head()

Unnamed: 0,b,c,d,e,f,g,h,i,j
77,-0.691613,-0.500618,0.62227,0.190912,0.23173,0.173582,-0.645828,-0.352877,-0.586451
51,0.295264,-0.255127,0.448542,-0.33107,-0.3765,0.158213,0.128916,-0.352877,0.545111
20,-0.28564,-0.72156,0.601422,0.090531,0.128201,0.066,-0.307758,-0.352877,1.368065
183,0.43719,1.414212,-1.865511,-1.776558,1.072899,-0.763919,1.593885,-0.352877,-0.586451
7,-0.265836,-0.3165,0.643117,-0.792823,0.762313,0.112107,-0.504966,-0.352877,-0.586451


In [7]:
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]

models.extend(all_models)

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=2019)    

Running models for Glass dataset:

In [9]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_glass)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y.iloc[train_index].copy(), y.iloc[test_index]
        X_train, X_valid = data_glass.iloc[train_index,:].copy(), data_glass.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred, labels=[1,2,3,5,6,7]))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    print('Mean traning model time: ', np.mean(times))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
[[9 3 3 0 0 0]
 [4 9 1 2 0 0]
 [2 2 0 0 0 0]
 [0 0 0 0 1 0]
 [1 0 0 0 1 0]
 [0 0 0 0 0 5]]
[[9 1 0 0 0 0]
 [4 8 3 0 0 1]
 [3 1 1 0 0 0]
 [0 2 0 4 0 0]
 [0 0 0 0 1 0]
 [0 0 0 0 0 5]]
[[11  5  3  0  1  0]
 [ 0 10  2  0  0  0]
 [ 1  0  2  0  0  0]
 [ 0  0  0  1  0  1]
 [ 0  0  0  0  1  0]
 [ 0  2  0  0  0  3]]
[[13  2  0  0  0  0]
 [ 3  6  0  0  0  0]
 [ 1  1  0  0  0  0]
 [ 0  0  0  3  0  1]
 [ 0  1  0  0  3  0]
 [ 0  0  0  1  0  8]]
[[ 8  2  0  0  0  0]
 [ 8 11  1  2  1  0]
 [ 3  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  1  0]
 [ 1  0  0  0  1  3]]
====
Final results: 
Mean accuracy: 0.6351052048726468
Mean traning model time:  0.14688749313354493


      fit

In [10]:
data = {'Model': models, 'Accuracy': results, 'Time': tempos}
dataframe = pd.DataFrame(data=data)
dataframe = dataframe.sort_values(by=['Accuracy'], ascending=False)
dataframe

Unnamed: 0,Model,Accuracy,Time
3,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.686932,0.004016
6,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.686932,0.037437
0,"MLPClassifier(activation='logistic', alpha=1e-...",0.635105,0.146887
4,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.532337,0.021475
7,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.532337,0.03906
1,"Perceptron(alpha=0.0001, class_weight=None, ea...",0.508638,0.003227
2,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.466777,0.003357
5,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.466777,0.03055


In [11]:
dataframe.to_csv('glass_results.csv',index=False)

In [12]:
# Normalizing the Iris dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_iris)
data_iris[:] = scaler.transform(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
1,-1.143017,-0.131979,-1.340227,-1.315444
14,-0.052506,2.169988,-1.453901,-1.315444
95,-0.173674,-0.131979,0.251221,0.000878
137,0.674501,0.098217,0.990108,0.790671
7,-1.021849,0.788808,-1.283389,-1.315444


In [13]:
models, results, tempos = list(), list(), list()

In [14]:
# Restarting the models (would them keep the parameters of last run?)
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]
models.extend(all_models)

Running models for Iris dataset:

In [15]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_iris)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y_iris.iloc[train_index].copy(), y_iris.iloc[test_index]
        X_train, X_valid = data_iris.iloc[train_index,:].copy(), data_iris.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))
    print('Mean traning model time: ', np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
[[ 9  0  0]
 [ 0  9  0]
 [ 0  1 11]]
[[ 8  0  0]
 [ 0  9  1]
 [ 0  1 11]]
[[11  0  0]
 [ 0 11  0]
 [ 0  0  8]]
[[10  1  0]
 [ 0 11  0]
 [ 0  2  6]]
[[11  0  0]
 [ 0  8  1]
 [ 0  0 10]]
====
Final results: 
Mean accuracy: 0.9533333333333334
Mean traning model time:  0.8617836952209472


      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=5, shuffle=True, tol=0.001,
[[ 9  0  0]
 [ 0  9  0]
 [ 0  1 11]]
[[ 8  0  0]
 [ 1  9  0]
 [ 0  1 11]]
[[11  0  0]
 [ 6  3  2]
 [ 0  0  8]]
[[10  1  0]
 [ 1 10  0]
 [ 0  5  3]]
[[11  0  0]
 [ 0  8  1]
 [ 0  0 10]]
====
Final results: 
Mean accuracy: 0.8733333333333333


In [16]:
data = {'Model': models, 'Accuracy': results, 'Time': tempos}
dataframe = pd.DataFrame(data=data)
dataframe = dataframe.sort_values(by=['Accuracy'], ascending=False)
dataframe

Unnamed: 0,Model,Accuracy,Time
3,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.966667,0.002007
6,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.966667,0.001963
0,"MLPClassifier(activation='logistic', alpha=1e-...",0.953333,0.861784
2,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.926667,0.022635
5,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.926667,0.001668
4,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.906667,0.001672
7,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.906667,0.001855
1,"Perceptron(alpha=0.0001, class_weight=None, ea...",0.873333,0.024877


In [17]:
dataframe.to_csv('iris_results.csv',index=False)