In [1]:
import pandas as pd
import numpy as np
# import data_sampling as ds
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score
import time
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler

data_iris = pd.read_csv("iris.csv")
data_glass = pd.read_csv("glass.data")

# to store the models and their results (to report)
models, results, tempos = list(), list(), list()

print('Iris:', np.shape(data_iris))
print('Glass:', np.shape(data_glass))

Iris: (150, 5)
Glass: (214, 11)


In [2]:
data_glass = shuffle(data_glass)
data_glass.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,target
24,25,1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0.0,0.0,1
162,163,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.0,0.37,3
211,212,1.52065,14.36,0.0,2.02,73.42,0.0,8.44,1.64,0.0,7
166,167,1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.0,0.0,5
107,108,1.53393,12.3,0.0,1.0,70.16,0.12,16.19,0.0,0.24,2


In [3]:
data_iris = shuffle(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
30,4.8,3.1,1.6,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
131,7.9,3.8,6.4,2.0,Virginica
27,5.2,3.5,1.5,0.2,Setosa
29,4.7,3.2,1.6,0.2,Setosa


In [4]:
y = data_glass['target']
data_glass = data_glass.drop(columns = ['target', 'a'])

In [5]:
y_iris = data_iris['variety']
data_iris = data_iris.drop(columns = ['variety'])

In [6]:
# Normalizing the Glass dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_glass)
data_glass[:] = scaler.transform(data_glass)
data_glass.head()

Unnamed: 0,b,c,d,e,f,g,h,i,j
24,-0.384658,-0.034185,0.566677,-0.592061,0.257612,0.004524,-0.371146,-0.352877,-0.586451
162,1.235933,0.960054,0.761252,-1.07389,-1.670607,-0.410435,0.128916,-0.352877,3.219712
211,0.754046,1.168721,-1.865511,1.15457,0.995252,-0.763919,-0.364103,2.9532,-0.586451
166,1.037897,-2.918706,-0.677213,0.231064,1.021135,0.127475,1.87561,-0.352877,-0.586451
107,5.137232,-1.359837,-1.865511,-0.893204,-3.223534,-0.579492,5.094318,-0.352877,1.882411


In [7]:
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='linear', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='linear', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]

models.extend(all_models)

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=2019)    

Running models for Glass dataset:

In [9]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_glass)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y.iloc[train_index].copy(), y.iloc[test_index]
        X_train, X_valid = data_glass.iloc[train_index,:].copy(), data_glass.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred, labels=[1,2,3,5,6,7]))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    print('Mean traning model time: ', np.mean(times))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
[[ 9  3  2  0  0  0]
 [ 0 13  1  1  0  1]
 [ 1  1  0  0  0  0]
 [ 0  2  0  1  0  0]
 [ 0  0  0  2  0  1]
 [ 0  2  0  0  0  3]]
[[ 8  3  1  0  0  0]
 [ 3 12  1  1  2  0]
 [ 1  1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  3  0]
 [ 0  0  0  0  1  6]]
[[12  2  0  0  0  0]
 [ 1  8  0  0  1  0]
 [ 4  1  0  0  0  0]
 [ 1  0  0  2  0  1]
 [ 1  1  0  0  1  0]
 [ 0  2  0  0  1  4]]
[[ 6  9  0  0  0  0]
 [ 3 14  0  0  0  1]
 [ 1  2  0  0  0  0]
 [ 0  0  0  2  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  5]]
[[11  3  1  0  0  0]
 [ 2  8  0  1  1  1]
 [ 3  1  1  0  0  0]
 [ 0  0  0  4  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  5]]
====
Final results: 
Mean accuracy: 0.64

In [10]:
data = {'Model': models, 'Accuracy': results, 'Time': tempos}
dataframe = pd.DataFrame(data=data)
dataframe

Unnamed: 0,Model,Accuracy,Time
0,"MLPClassifier(activation='logistic', alpha=1e-...",0.645072,0.149359
1,"Perceptron(alpha=0.0001, class_weight=None, ea...",0.546844,0.00313
2,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.598228,0.005025
3,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.677519,0.004021
4,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.537431,0.003421
5,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.598228,0.002895
6,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.677519,0.003611
7,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.537431,0.005224


In [11]:
dataframe.to_csv('glass_results.csv',index=False)

In [12]:
# Normalizing the Iris dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_iris)
data_iris[:] = scaler.transform(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
30,-1.264185,0.098217,-1.226552,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444
131,2.492019,1.709595,1.501645,1.053935
27,-0.779513,1.019004,-1.283389,-1.315444
29,-1.385353,0.328414,-1.226552,-1.315444


In [13]:
models, results, tempos = list(), list(), list()

In [14]:
# Restarting the models (would them keep the parameters of last run?)
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='linear', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='linear', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]
models.extend(all_models)

Running models for Iris dataset:

In [15]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_iris)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y_iris.iloc[train_index].copy(), y_iris.iloc[test_index]
        X_train, X_valid = data_iris.iloc[train_index,:].copy(), data_iris.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        # print("Tempo: ", end - start)
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(pred)
        print(confusion_matrix(y_valid, pred))
        acc = accuracy_score(pred, y_valid)
        # print('Accuracy:', acc)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))
    print('Mean traning model time: ', np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
[[ 8  0  0]
 [ 0  9  0]
 [ 0  2 11]]
[[ 5  1  0]
 [ 0 11  2]
 [ 0  1 10]]
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
[[13  0  0]
 [ 0  8  3]
 [ 0  0  6]]
[[13  0  0]
 [ 0  8  0]
 [ 0  0  9]]
====
Final results: 
Mean accuracy: 0.9399999999999998
Mean traning model time:  0.07152705192565918


      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=5, shuffle=True, tol=0.001,
[[ 8  0  0]
 [ 1  8  0]
 [ 0  2 11]]
[[ 5  1  0]
 [ 0 11  2]
 [ 0  1 10]]
[[10  0  0]
 [ 0  9  0]
 [ 0  2  9]]
[[13  0  0]
 [ 3  7  1]
 [ 0  1  5]]
[[13  0  0]
 [ 7  1  0]
 [ 0  0  9]]
====
Final results: 
Mean accuracy: 0.8600000000000001

In [16]:
data = {'Model': models, 'Accuracy': results, 'Time': tempos}
dataframe = pd.DataFrame(data=data)
dataframe

Unnamed: 0,Model,Accuracy,Time
0,"MLPClassifier(activation='logistic', alpha=1e-...",0.94,0.071527
1,"Perceptron(alpha=0.0001, class_weight=None, ea...",0.86,0.002177
2,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.966667,0.001628
3,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.973333,0.001924
4,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.9,0.009003
5,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.966667,0.012606
6,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.973333,0.002261
7,"SVC(C=1.0, cache_size=200, class_weight=None, ...",0.9,0.002676


In [17]:
dataframe.to_csv('iris_results.csv',index=False)