In [193]:
import pandas as pd
from sklearn.neural_network import MLPRegressor 
import sklearn.feature_selection as fs
from sklearn.model_selection import cross_val_score
import numpy as np

In [203]:
# Selecionando X e y do dataframe
dataframe = pd.read_csv('../normalizados/dados/3500105.csv')
#print(dataframe.columns)

X = dataframe[['Total 2ª Dose', 'Total Unica', 'Total Doses Aplicadas',
       'População Vacinada dose1/População Total', 'População Vacinada/População Total', 
        'media_movel_casos', 'media_movel_obitos', 'ocupacao_leitos','media_isolamento', 'Mean.R']].values.tolist()

y = dataframe['diagnostico_covid19']


## Seleção de atributos

In [228]:
#Remoção de características com variancia 0
sel = fs.VarianceThreshold()
X = sel.fit_transform(X)
mlp = MLPRegressor(random_state=None, max_iter=5000, tol=0.1, hidden_layer_sizes=(1000,1000))
scores_kfold = cross_val_score(estimator=mlp, X=X, y=y, cv=5)
score = scores_kfold.mean()
print("Score com VarianceThreshold -> {}" .format(score))

#Seleciona os k melhores atributos e faz a validação cruzada
for i in range(9, 0, -1):
    sel = fs.SelectKBest(fs.mutual_info_regression, k=i)
    X_sel = sel.fit_transform(X, y)
    mlp = MLPRegressor(random_state=None, max_iter=5000, tol=0.1, hidden_layer_sizes=(1000,1000))
    scores_kfold = cross_val_score(estimator=mlp, X=X_sel, y=y, cv=5)
    score = scores_kfold.mean()
    print("Score com k = {} -> {}" .format(i, score))


Score com VarianceThreshold -> -1.106517575647716
Score com k = 9 -> -1.2471416494259286
Score com k = 8 -> -1.097475937493403
Score com k = 7 -> -1.1053109463846744
Score com k = 6 -> -0.8776541191799854
Score com k = 5 -> -0.6292659844903948
Score com k = 4 -> -0.685855819397319
Score com k = 3 -> -0.2166318019470534
Score com k = 2 -> -0.16123200892803785
Score com k = 1 -> 0.03476722209844514


In [174]:
#escolhendo o atributo com melhor performance
sel = fs.SelectKBest(fs.mutual_info_regression, k=1)
X_sel = sel.fit_transform(X, y)
X_sel

array([[0.44584795],
       [0.46656642],
       [0.48421053],
       [0.50085213],
       [0.51906433],
       [0.54225564],
       [0.55548872],
       [0.60364244],
       [0.6778279 ],
       [0.71692565],
       [0.75131161],
       [0.78603175],
       [0.8431746 ],
       [0.88531328],
       [0.89981621],
       [0.90111947],
       [0.92912281],
       [0.98429407],
       [0.99124478],
       [0.99451963],
       [1.        ],
       [0.99903091],
       [0.99856307],
       [0.97012531],
       [0.91619048],
       [0.89794486],
       [0.87007519],
       [0.86228906],
       [0.83365079],
       [0.786934  ],
       [0.76695071],
       [0.73837928],
       [0.72284043],
       [0.69299916],
       [0.63378446],
       [0.60457811],
       [0.55812865],
       [0.52407686],
       [0.50121972],
       [0.50422723],
       [0.47756057],
       [0.46743525],
       [0.45951546],
       [0.46827068],
       [0.4659315 ],
       [0.46225564],
       [0.42643275],
       [0.421

In [128]:
parametros = {'solver': ['lbfgs', 'sgd', 'adam'], 'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,), (1000,1000)],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive', 'invscaling'],
    'max_iter':[200,500,750,1000], 'tol':[0.01,0.1,0.001,0.0001]
}

In [130]:
from sklearn.model_selection import GridSearchCV
mlp = MLPRegressor()
clf = GridSearchCV(mlp, parametros, n_jobs=-1, cv=3)
clf.fit(X_sel, y)

GridSearchCV(cv=3, estimator=MLPRegressor(), n_jobs=-1,
             param_grid={'activation': ['identity', 'logistic', 'tanh', 'relu'],
                         'alpha': [0.0001, 0.05],
                         'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50),
                                                (100,), (1000, 1000)],
                         'learning_rate': ['constant', 'adaptive',
                                           'invscaling'],
                         'max_iter': [200, 500, 750, 1000],
                         'solver': ['lbfgs', 'sgd', 'adam'],
                         'tol': [0.01, 0.1, 0.001, 0.0001]})

In [131]:
clf.best_params_

{'activation': 'identity',
 'alpha': 0.0001,
 'hidden_layer_sizes': (50, 50, 50),
 'learning_rate': 'constant',
 'max_iter': 1000,
 'solver': 'adam',
 'tol': 0.1}

In [266]:
mlp = MLPRegressor(max_iter=5000, tol=0.1, hidden_layer_sizes=(50, 50, 50), activation='identity', alpha=0.0001)
scores_kfold = cross_val_score(estimator=mlp, X=X_sel, y=y, cv=5)
score = scores_kfold.mean()
score

0.11786877159025995