In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Perceptron
from sklearn.metrics import make_scorer, roc_auc_score

from Classificadores import ELM, RBF, ELMHebbiano, getRegressionScores

In [2]:
# Importação dos dados
data = pd.read_excel('./datasets/Concrete_Compressive_Strength/Concrete_Data.xls')
data.head()

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075


In [3]:
data.shape# taking the count of Null/Nan of the entire dataframe 
data.isnull().sum().sum()

0

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column                                                 Non-Null Count  Dtype  
---  ------                                                 --------------  -----  
 0   Cement (component 1)(kg in a m^3 mixture)              1030 non-null   float64
 1   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  1030 non-null   float64
 2   Fly Ash (component 3)(kg in a m^3 mixture)             1030 non-null   float64
 3   Water  (component 4)(kg in a m^3 mixture)              1030 non-null   float64
 4   Superplasticizer (component 5)(kg in a m^3 mixture)    1030 non-null   float64
 5   Coarse Aggregate  (component 6)(kg in a m^3 mixture)   1030 non-null   float64
 6   Fine Aggregate (component 7)(kg in a m^3 mixture)      1030 non-null   float64
 7   Age (day)                                              1030 non-null   int64  
 8   Concrete compressive strength(MPa, megapascals)  

In [4]:
# data information
y = np.array(data.iloc[0:data.shape[0], -1])

X = np.array(data.iloc[0:data.shape[0], 0:-1])

In [5]:
# Divide os atributos em conjunto de treinamento e de testes na razaõ 75%/ 25%
X_train, X_test, y_train, y_test = train_test_split(X, y) 

In [6]:
# Aplica a mesma escala para ambos os datasets
scaler = StandardScaler()
X_train_scl = scaler.fit_transform(X_train)
X_test_scl = scaler.transform(X_test) # note that we transform rather than fit_transform

In [7]:
pd.DataFrame(X_train_scl).describe()

Unnamed: 0,0,1,2,3,4,5,6,7
count,772.0,772.0,772.0,772.0,772.0,772.0,772.0,772.0
mean,-9.753281e-16,-6.224152e-16,-3.21562e-16,6.82327e-15,1.175801e-15,1.399427e-15,3.562061e-15,1.66102e-17
std,1.000648,1.000648,1.000648,1.000648,1.000648,1.000648,1.000648,1.000648
min,-1.715476,-0.8694585,-0.8416605,-2.7822,-1.049681,-2.186301,-2.163904,-0.7074567
25%,-0.8489622,-0.8694585,-0.8416605,-0.7716863,-1.049681,-0.5280828,-0.5394293,-0.6108545
50%,-0.07907599,-0.588583,-0.8416605,0.1182511,0.04657124,-0.07618667,0.06711053,-0.2727469
75%,0.7210092,0.8248644,1.025979,0.4909998,0.6717906,0.7067201,0.6256288,0.1780634
max,2.379614,3.336652,2.318182,3.053647,4.406163,2.168104,2.712884,5.153076


### ELM

In [8]:
NNeuronios = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
lambdas = [0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]

parameters = {'p':NNeuronios, 'lambdar':lambdas}
model = ELM( classificator = False)

clf = GridSearchCV(model, parameters, n_jobs = -1, cv = 10)
clf.fit(X_train_scl, y_train)

In [None]:
idx = np.where(clf.cv_results_['mean_test_score'] == np.amax(clf.cv_results_['mean_test_score']))[0][0]
best_score = clf.cv_results_['mean_test_score'][idx]
best_score_std = clf.cv_results_['std_test_score'][idx]

best_Nneuronios = clf.best_params_['p']
best_lambda = clf.best_params_['lambdar']

print('Ao utilizar {} neurônios na camada intermediária e fator de regularização igual à {} foi encontrado o maior coeficiente de determinação R^2 médio de validação cruzada sobre o conjunto de treinamento: {} +/- {}%'.format(best_Nneuronios, best_lambda, round(best_score,3), round(best_score_std, 3)))

In [None]:
r2s, costs = getRegressionScores(ELM, X_train_scl, y_train, X_test_scl, y_test, best_Nneuronios, best_lambda)

print('R^2 score médio e desvio padrão após dez tentativas {} +/- {}'.format(round(np.mean(r2s),3), round(np.std(r2s),3)))
print('Custo médio e desvio padrão após dez tentativas {} +/- {}'.format(round(np.mean(costs),3), round(np.std(costs),3)))

RBF

In [12]:
NNeuronios = [32, 64]
lambdas = [0, 10, 100, 1000, 10000, 100000]

parameters = {'p':NNeuronios, 'lambdar':lambdas}
model = RBF( classificator = False)

clf = GridSearchCV(model, parameters, n_jobs = -1, cv = 10)
clf.fit(X_train_scl, y_train)

GridSearchCV(cv=10, estimator=RBF(classificator=False), n_jobs=-1,
             param_grid={'lambdar': [0, 10, 100, 1000, 10000, 100000],
                         'p': [32, 64]})

In [13]:
idx = np.where(clf.cv_results_['mean_test_score'] == np.amax(clf.cv_results_['mean_test_score']))[0][0]
best_score = clf.cv_results_['mean_test_score'][idx]
best_score_std = clf.cv_results_['std_test_score'][idx]

best_Nneuronios = clf.best_params_['p']
best_lambda = clf.best_params_['lambdar']

print('Ao utilizar {} neurônios na camada intermediária e fator de regularização igual à {} foi encontrado o maior coeficiente de determinação R^2 médio de validação cruzada sobre o conjunto de treinamento: {} +/- {}%'.format(best_Nneuronios, best_lambda, round(best_score,3), round(best_score_std, 3)))

Ao utilizar 64 neurônios na camada intermediária e fator de regularização igual à 0 foi encontrado o maior coeficiente de determinação R^2 médio de validação cruzada sobre o conjunto de treinamento: 0.202 +/- 0.095%


In [14]:
r2s, costs = getRegressionScores(RBF, X_train_scl, y_train, X_test_scl, y_test, best_Nneuronios, best_lambda)

print('R^2 score médio e desvio padrão após dez tentativas {} +/- {}'.format(round(np.mean(r2s),3), round(np.std(r2s),3)))
print('Custo médio e desvio padrão após dez tentativas {} +/- {}'.format(round(np.mean(costs),3), round(np.std(costs),3)))

LinAlgError: Singular matrix

Hebbiano

In [9]:
NNeuronios = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]

parameters = {'p':NNeuronios}
model = ELMHebbiano( classificator = False)

clf = GridSearchCV(model, parameters, n_jobs = -1, cv = 10)
clf.fit(X_train_scl, y_train)

GridSearchCV(cv=10, estimator=ELMHebbiano(classificator=False), n_jobs=-1,
             param_grid={'p': [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]})

In [10]:
idx = np.where(clf.cv_results_['mean_test_score'] == np.amax(clf.cv_results_['mean_test_score']))[0][0]
best_score = clf.cv_results_['mean_test_score'][idx]
best_score_std = clf.cv_results_['std_test_score'][idx]

best_Nneuronios = clf.best_params_['p']

print('Ao utilizar {} neurônios na camada intermediária foi encontrado o maior coeficiente de determinação R^2 médio de validação cruzada sobre o conjunto de treinamento: {} +/- {}%'.format(best_Nneuronios, round(best_score,3), round(best_score_std, 3)))

Ao utilizar 1024 neurônios na camada intermediária foi encontrado o maior coeficiente de determinação R^2 médio de validação cruzada sobre o conjunto de treinamento: -3.244 +/- 0.59%


In [11]:
clf.cv_results_['mean_test_score']

array([-4.80670527, -4.74577329, -4.72468039, -4.64563789, -4.55121189,
       -4.43538036, -4.25238313, -4.00660735, -3.69282846, -3.24376061])