In [1]:
import pandas as pd
import numpy as np
# import data_sampling as ds
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score
import time
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler

pd.options.display.max_colwidth = 150

data_iris = pd.read_csv("iris.csv")
data_glass = pd.read_csv("glass.data")

# to store the models and their results (to report)
models, results, tempos = list(), list(), list()

print('Iris:', np.shape(data_iris))
print('Glass:', np.shape(data_glass))

Iris: (150, 5)
Glass: (214, 11)


In [2]:
data_glass = shuffle(data_glass)
data_glass.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,target
159,160,1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0.0,0.09,3
157,158,1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.0,0.0,3
55,56,1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0.0,0.24,1
64,65,1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0.0,0.07,1
75,76,1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0.0,0.0,2


In [3]:
data_iris = shuffle(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
54,6.5,2.8,4.6,1.5,Versicolor
128,6.4,2.8,5.6,2.1,Virginica
97,6.2,2.9,4.3,1.3,Versicolor
98,5.1,2.5,3.0,1.1,Versicolor
32,5.2,4.1,1.5,0.1,Setosa


In [4]:
y = data_glass['target']
data_glass = data_glass.drop(columns = ['target', 'a'])

In [5]:
y_iris = data_iris['variety']
data_iris = data_iris.drop(columns = ['variety'])

In [6]:
# Normalizing the Glass dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_glass)
data_glass[:] = scaler.transform(data_glass)
data_glass.head()

Unnamed: 0,b,c,d,e,f,g,h,i,j
159,-0.133813,0.113109,0.469389,0.371598,-0.920025,0.112107,-0.103508,-0.352877,0.339372
157,0.938879,0.763661,0.747354,-1.736405,-1.114141,-0.594861,0.488115,-0.352877,-0.586451
55,-0.222929,-1.175719,0.017697,-0.310994,1.357602,0.096738,0.072571,-0.352877,1.882411
64,1.10721,0.08856,0.733456,-1.093966,-0.829438,-0.487279,0.459942,-0.352877,0.133634
75,-0.813735,-0.476069,0.62227,0.130683,0.607021,0.296533,-0.702173,-0.352877,-0.586451


In [7]:
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]

models.extend(all_models)

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=2019)    

Running models for Glass dataset:

In [9]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_glass)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y.iloc[train_index].copy(), y.iloc[test_index]
        X_train, X_valid = data_glass.iloc[train_index,:].copy(), data_glass.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(confusion_matrix(y_valid, pred, labels=[1,2,3,5,6,7]))
        acc = accuracy_score(pred, y_valid)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    print('Mean traning model time: ', np.mean(times))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
====
Final results: 
Mean accuracy: 0.6118493909191584
Mean traning model time:  0.14593353271484374


      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=5, shuffle=True, tol=0.001,
====
Final results: 
Mean accuracy: 0.5227021040974529
Mean traning model time:  0.0061283111572265625


  decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
  max_iter=-1, probability=False, random_state=5, shrinking=True,
====
Final results: 
Mean accuracy: 0.4908084163898117
Mean traning model time:  0.005064487457275391


  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  ma

In [10]:
data = {'Accuracy': results, 'Time': tempos, 'Model': models}
dataframe = pd.DataFrame(data=data)
dataframe = dataframe.sort_values(by=['Accuracy'], ascending=False)
dataframe

Unnamed: 0,Accuracy,Time,Model
3,0.676855,0.003488,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n max_iter=-1, ..."
6,0.676855,0.003842,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='rbf',\n max_iter=-1, ..."
0,0.611849,0.145934,"MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto',\n beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,\n ..."
4,0.522924,0.003434,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='sigmoid',\n max_iter=..."
7,0.522924,0.003284,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='sigmoid',\n max_iter=..."
1,0.522702,0.006128,"Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,\n fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_chang..."
2,0.490808,0.005064,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',\n max_iter=-1,..."
5,0.490808,0.003457,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='poly',\n max_iter=-1,..."


In [11]:
dataframe.to_csv('glass_results.csv',index=False)

In [12]:
# Normalizing the Iris dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_iris)
data_iris[:] = scaler.transform(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
54,0.795669,-0.592373,0.478571,0.395774
128,0.674501,-0.592373,1.046945,1.185567
97,0.432165,-0.362176,0.308059,0.13251
98,-0.900681,-1.282963,-0.430828,-0.130755
32,-0.779513,2.400185,-1.283389,-1.447076


In [13]:
models, results, tempos = list(), list(), list()

In [14]:
# Restarting the models (would them keep the parameters of last run?)
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]
models.extend(all_models)

Running models for Iris dataset:

In [15]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_iris)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y_iris.iloc[train_index].copy(), y_iris.iloc[test_index]
        X_train, X_valid = data_iris.iloc[train_index,:].copy(), data_iris.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(confusion_matrix(y_valid, pred))
        acc = accuracy_score(pred, y_valid)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))
    print('Mean traning model time: ', np.mean(times))



       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
====
Final results: 
Mean accuracy: 0.9333333333333333
Mean traning model time:  1.0312739372253419


      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=5, shuffle=True, tol=0.001,
====
Final results: 
Mean accuracy: 0.8666666666666668
Mean traning model time:  0.02809906005859375


  decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
  max_iter=-1, probability=False, random_state=5, shrinking=True,
====
Final results: 
Mean accuracy: 0.9066666666666666
Mean traning model time:  0.0181276798248291


  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_ite

In [16]:
data = {'Accuracy': results, 'Time': tempos, 'Model': models}
dataframe = pd.DataFrame(data=data)
dataframe = dataframe.sort_values(by=['Accuracy'], ascending=False)
dataframe

Unnamed: 0,Accuracy,Time,Model
3,0.96,0.014468,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n max_iter=-1, ..."
6,0.96,0.018135,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='rbf',\n max_iter=-1, ..."
0,0.933333,1.031274,"MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto',\n beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,\n ..."
2,0.906667,0.018128,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',\n max_iter=-1,..."
5,0.906667,0.016945,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='poly',\n max_iter=-1,..."
4,0.9,0.019138,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='sigmoid',\n max_iter=..."
7,0.9,0.001772,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='sigmoid',\n max_iter=..."
1,0.866667,0.028099,"Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,\n fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_chang..."


In [17]:
dataframe.to_csv('iris_results.csv',index=False)