In [1]:
import pandas as pd
import numpy as np
# import data_sampling as ds
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score
import time
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler

pd.options.display.max_colwidth = 150

data_iris = pd.read_csv("iris.csv")
data_glass = pd.read_csv("glass.data")

# to store the models and their results (to report)
models, results, tempos = list(), list(), list()

print('Iris:', np.shape(data_iris))
print('Glass:', np.shape(data_glass))

('Iris:', (150, 5))
('Glass:', (214, 11))


In [2]:
data_glass = shuffle(data_glass)
data_glass.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,target
144,145,1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0.0,0.24,2
129,130,1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0.0,0.18,2
202,203,1.51514,14.85,0.0,2.42,73.72,0.0,8.39,0.56,0.0,7
175,176,1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.0,0.28,5
82,83,1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0.0,0.0,2


In [3]:
data_iris = shuffle(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
84,5.4,3.0,4.5,1.5,Versicolor
69,5.6,2.5,3.9,1.1,Versicolor
12,4.8,3.0,1.4,0.1,Setosa
118,7.7,2.6,6.9,2.3,Virginica
139,6.9,3.1,5.4,2.1,Virginica


In [4]:
y = data_glass['target']
data_glass = data_glass.drop(columns = ['target', 'a'])

In [5]:
y_iris = data_iris['variety']
data_iris = data_iris.drop(columns = ['variety'])

In [6]:
# Normalizing the Glass dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_glass)
data_glass[:] = scaler.transform(data_glass)
data_glass.head()

Unnamed: 0,b,c,d,e,f,g,h,i,j
144,-0.582693,-0.512893,0.344306,-0.431451,0.412905,0.127475,-0.103508,-0.352877,1.882411
129,0.60552,0.702288,-0.927381,0.371598,-1.152964,-0.164533,1.129039,-0.352877,1.265196
202,-1.06458,1.770175,-1.865511,1.957619,1.383484,-0.763919,-0.399319,0.776028,-0.586451
175,0.932278,-0.537442,-1.63619,0.130683,0.956429,-0.564123,1.629101,-0.352877,2.293888
82,-0.628901,0.002638,0.601422,-0.391299,0.205848,0.281164,-0.603569,-0.352877,-0.586451


In [7]:
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]

models.extend(all_models)

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=2019)    

Running models for Glass dataset:

In [9]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_glass)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y.iloc[train_index].copy(), y.iloc[test_index]
        X_train, X_valid = data_glass.iloc[train_index,:].copy(), data_glass.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(confusion_matrix(y_valid, pred, labels=[1,2,3,5,6,7]))
        acc = accuracy_score(pred, y_valid)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    print('Mean traning model time: ', np.mean(times))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))

       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
('====\nFinal results: \nMean accuracy:', 0.6591362126245848)
('Mean traning model time: ', 0.22319340705871582)
      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=5, shuffle=True, tol=0.001,
('====\nFinal results: \nMean accuracy:', 0.5652270210409746)
('Mean traning model time: ', 0.00650935173034668)
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
  max_iter=-1, probability=False, random_state=5, shrinking=True,
('====\nFinal results: \nMean accuracy:', 0.4904761904761905)
('Mean traning model time: ', 0.002805614471435547)
  decision_function_shape='ovr', degree=3, gamma='

In [10]:
data = {'Accuracy': results, 'Time': tempos, 'Model': models}
dataframe = pd.DataFrame(data=data)
dataframe = dataframe.sort_values(by=['Accuracy'], ascending=False)
dataframe

Unnamed: 0,Accuracy,Model,Time
3,0.687043,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n max_iter=-1, ...",0.003691
6,0.687043,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='rbf',\n max_iter=-1, ...",0.006276
0,0.659136,"MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto',\n beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,\n ...",0.223193
1,0.565227,"Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,\n fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_chang...",0.006509
4,0.541971,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='sigmoid',\n max_iter=...",0.006637
7,0.541971,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='sigmoid',\n max_iter=...",0.003629
2,0.490476,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',\n max_iter=-1,...",0.002806
5,0.490476,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='poly',\n max_iter=-1,...",0.007367


In [11]:
dataframe.to_csv('glass_results.csv',index=False)

In [12]:
# Normalizing the Iris dataset: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
scaler.fit(data_iris)
data_iris[:] = scaler.transform(data_iris)
data_iris.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
84,-0.537178,-0.131979,0.421734,0.395774
69,-0.294842,-1.282963,0.080709,-0.130755
12,-1.264185,-0.131979,-1.340227,-1.447076
118,2.249683,-1.052767,1.785832,1.448832
139,1.28034,0.098217,0.933271,1.185567


In [13]:
models, results, tempos = list(), list(), list()

In [14]:
# Restarting the models (would them keep the parameters of last run?)
all_models=[MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(16, 8), activation='logistic', 
                            random_state=5), 
            Perceptron(tol=1e-3, random_state=5), 
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovr', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovr', random_state=5),
            svm.SVC(gamma='scale', kernel='poly', decision_function_shape='ovo', random_state=5),
            svm.SVC(gamma='scale', kernel='rbf', decision_function_shape='ovo', random_state=5), 
            svm.SVC(gamma='scale', kernel='sigmoid', decision_function_shape='ovo', random_state=5)]
models.extend(all_models)

Running models for Iris dataset:

In [15]:
#For each model:
for model in all_models:
    print('\n\n======= Model: ', model, '=======')
    # for each fold:
    accuracies = list()
    times = list()
    
    for i, (train_index, test_index) in enumerate(kf.split(data_iris)):
        #Builds the train and validation dataset, according to the current fold:
        y_train, y_valid = y_iris.iloc[train_index].copy(), y_iris.iloc[test_index]
        X_train, X_valid = data_iris.iloc[train_index,:].copy(), data_iris.iloc[test_index,:].copy()
        # print("Fold", i)
        start = time.time()
        model.fit(X_train, y_train)
        end = time.time()
        times.append(end - start)
        pred = model.predict(X_valid)
        # print(confusion_matrix(y_valid, pred))
        acc = accuracy_score(pred, y_valid)
        accuracies.append(acc)
        
    print('====\nFinal results: \nMean accuracy:', np.mean(accuracies))
    results.append(np.mean(accuracies))
    tempos.append(np.mean(times))
    print('Mean traning model time: ', np.mean(times))

       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(16, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=5, shuffle=True, solver='lbfgs', tol=0.0001,
('====\nFinal results: \nMean accuracy:', 0.9466666666666667)
('Mean traning model time: ', 0.1121152400970459)
      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=5, shuffle=True, tol=0.001,
('====\nFinal results: \nMean accuracy:', 0.8)
('Mean traning model time: ', 0.0050450325012207035)
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
  max_iter=-1, probability=False, random_state=5, shrinking=True,
('====\nFinal results: \nMean accuracy:', 0.9133333333333333)
('Mean traning model time: ', 0.004116535186767578)
  decision_function_shape='ovr', degree=3, gamma='scale', kernel

In [16]:
data = {'Accuracy': results, 'Time': tempos, 'Model': models}
dataframe = pd.DataFrame(data=data)
dataframe = dataframe.sort_values(by=['Accuracy'], ascending=False)
dataframe

Unnamed: 0,Accuracy,Model,Time
3,0.953333,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n max_iter=-1, ...",0.002921
6,0.953333,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='rbf',\n max_iter=-1, ...",0.001636
0,0.946667,"MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto',\n beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,\n ...",0.112115
2,0.913333,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',\n max_iter=-1,...",0.004117
5,0.913333,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='poly',\n max_iter=-1,...",0.002222
4,0.873333,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovr', degree=3, gamma='scale', kernel='sigmoid',\n max_iter=...",0.001877
7,0.873333,"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n decision_function_shape='ovo', degree=3, gamma='scale', kernel='sigmoid',\n max_iter=...",0.001624
1,0.8,"Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,\n fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_chang...",0.005045


In [17]:
dataframe.to_csv('iris_results.csv',index=False)