In [1]:
# Importação dos Módulos
from pandas import read_csv
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Definindo os valores que serão testados
valores_grid = {'penalty' : ['l1', 'l2'], 'C' : [0.001, 0.01, 0.1, 1 , 10, 100, 1000]}

In [11]:
# Carregando os dados
arquivo = 'pima-data.csv'
colunas = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dados = read_csv(arquivo, names = colunas)
array = dados.values

In [12]:
dados

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [13]:
# Separando o array em componentes de input e output
X = array[:,0:8]
Y = array[: , 8]

In [14]:
# Criando o modelo
modelo = LogisticRegression()

In [18]:
# Criando o grid
grid = GridSearchCV(estimator = modelo, param_grid= valores_grid)
grid.fit(X, Y)

GridSearchCV(cv=None, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                         'penalty': ['l1', 'l2']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [20]:
# Print o resultado
print("Acurácia : %.3f" % (grid.best_score_ * 100))
print("Melhores Parâmetros do modelo :\n", grid.best_estimator_)


Acurácia : 77.610
Melhores Parâmetros do modelo :
 LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


In [21]:
# Ajuste de Parâmetros de Pesquisa Aleatório
# Random Search Parameter Tuning
# Importação dos Módulos
from pandas import read_csv
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')


In [22]:
# Carregando os dados
arquivo = 'pima-data.csv'
colunas = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dados = read_csv(arquivo, names = colunas)
array = dados.values

# Separando o array em componentes de input e output
X = array[:,0:8]
Y = array[: , 8]

In [23]:
# Definindo os valores que serão testados
seed = 7
iterations = 14

# Definindo os valores que serão testados
valores_grid = {'penalty' : ['l1', 'l2'], 'C' : [0.001, 0.01, 0.1, 1 , 10, 100, 1000]}

# Criando o modelo
modelo = LogisticRegression()

# Criando o grid
rsearch = RandomizedSearchCV(estimator = modelo,
                             param_distributions= valores_grid,
                             n_iter = iterations,
                             random_state = seed)
rsearch.fit(X, Y)

RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=LogisticRegression(C=1.0, class_weight=None,
                                                dual=False, fit_intercept=True,
                                                intercept_scaling=1,
                                                l1_ratio=None, max_iter=100,
                                                multi_class='auto', n_jobs=None,
                                                penalty='l2', random_state=None,
                                                solver='lbfgs', tol=0.0001,
                                                verbose=0, warm_start=False),
                   iid='deprecated', n_iter=14, n_jobs=None,
                   param_distributions={'C': [0.001, 0.01, 0.1, 1, 10, 100,
                                              1000],
                                        'penalty': ['l1', 'l2']},
                   pre_dispatch='2*n_jobs', random_state=7, refit=True,
           

In [24]:
# Print o resultado
print("Acurácia : %.3f" % (rsearch.best_score_ * 100))
print("Melhores Parâmetros do modelo :\n", rsearch.best_estimator_)

Acurácia : 77.610
Melhores Parâmetros do modelo :
 LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


In [27]:
# Algoritmo Decision Tree- àrvore de Decisão
# Importação dos Módulos
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.filterwarnings('ignore')

# Separando o array em componentes de input e output
X = array[:,0:8]
Y = array[: , 8]

# Definindo os valores que serão testados
seed = 7
iterations = 14

# Definindo os valores que serão testados
valores_grid = {'max_features': [3, 5, 7, 9, 10], 'max_depth': [2,3, 4, 5, 10, 15]}


# Criando o modelo
modelo = DecisionTreeClassifier()

# Criando o grid
rsearch = RandomizedSearchCV(estimator = modelo,
                             param_distributions= valores_grid,
                             n_iter = iterations,
                             random_state = seed)
rsearch.fit(X, Y)

# Print o resultado
print("Acurácia : %.3f" % (rsearch.best_score_ * 100))
print("Melhores Parâmetros do modelo :\n", rsearch.best_estimator_)

Acurácia : 74.742
Melhores Parâmetros do modelo :
 DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=5, max_features=5, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')


In [28]:
## SEM Otimizar os Hyperparâmetros
# Importação dos Módulos
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.filterwarnings('ignore')

# Separando o array em componentes de input e output
X = array[:,0:8]
Y = array[: , 8]

# Definindo os valores que serão testados
seed = 7
num_folds = 10

# Separando os dados em Folds
kfold = KFold(num_folds, True, random_state= seed)

# Cross Validation
resultado = cross_val_score(modelo, X, Y, cv = kfold)

# Criando o modelo
modelo = DecisionTreeClassifier()

# Print o resultado
print("Acurácia : %.3f" % (resultado.mean() * 100))


Acurácia : 69.416
