In [3]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
import pandas as pd
# define dataset
data = pd.read_csv("heart.csv")
X = data.drop('target',axis=1)
y = data['target']
# define models and parameters
model = LogisticRegression()
solvers = ['newton-cg', 'lbfgs', 'liblinear','sag','saga']
penalty =  ['none', 'l1', 'l2', 'elasticnet']
c_values = [100, 10, 1.0, 0.1, 0.01]
# define grid search
grid = dict(solver=solvers,penalty=penalty,C=c_values)
cv = RepeatedStratifiedKFold(n_splits=15, n_repeats=5, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.845238 using {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}
0.825365 (0.079683) with: {'C': 100, 'penalty': 'none', 'solver': 'newton-cg'}
0.825333 (0.080051) with: {'C': 100, 'penalty': 'none', 'solver': 'lbfgs'}
0.000000 (0.000000) with: {'C': 100, 'penalty': 'none', 'solver': 'liblinear'}
0.715048 (0.092614) with: {'C': 100, 'penalty': 'none', 'solver': 'sag'}
0.705714 (0.096113) with: {'C': 100, 'penalty': 'none', 'solver': 'saga'}
0.000000 (0.000000) with: {'C': 100, 'penalty': 'l1', 'solver': 'newton-cg'}
0.000000 (0.000000) with: {'C': 100, 'penalty': 'l1', 'solver': 'lbfgs'}
0.826032 (0.079677) with: {'C': 100, 'penalty': 'l1', 'solver': 'liblinear'}
0.000000 (0.000000) with: {'C': 100, 'penalty': 'l1', 'solver': 'sag'}
0.705714 (0.096113) with: {'C': 100, 'penalty': 'l1', 'solver': 'saga'}
0.826032 (0.079677) with: {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
0.828032 (0.072951) with: {'C': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
0.826032 (0.079677) with: {'C': 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [4]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import RidgeClassifier
# define models and parameters
model = RidgeClassifier()
alpha = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,5.0]
# define grid search
grid = dict(alpha=alpha)
cv = RepeatedStratifiedKFold(n_splits=15, n_repeats=5, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.832762 using {'alpha': 5.0}
0.831429 (0.076332) with: {'alpha': 0.1}
0.831429 (0.076332) with: {'alpha': 0.2}
0.831429 (0.076332) with: {'alpha': 0.3}
0.831429 (0.076332) with: {'alpha': 0.4}
0.831429 (0.076332) with: {'alpha': 0.5}
0.830762 (0.076821) with: {'alpha': 0.6}
0.830762 (0.076821) with: {'alpha': 0.7}
0.830762 (0.076821) with: {'alpha': 0.8}
0.831429 (0.075454) with: {'alpha': 0.9}
0.831429 (0.075454) with: {'alpha': 1.0}
0.832762 (0.073989) with: {'alpha': 5.0}


In [6]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
# define dataset
#X, y = make_blobs(n_samples=1000, centers=2, n_features=100, cluster_std=20)
# define models and parameters
model = KNeighborsClassifier()
n_neighbors = range(1, 21, 2)
weights = ['uniform', 'distance']
metric = ['euclidean', 'manhattan', 'minkowski']
# define grid search
grid = dict(n_neighbors=n_neighbors,weights=weights,metric=metric)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.701864 using {'metric': 'manhattan', 'n_neighbors': 19, 'weights': 'uniform'}
0.585520 (0.064357) with: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}
0.585520 (0.064357) with: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'distance'}
0.624839 (0.062070) with: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
0.628136 (0.061723) with: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'distance'}
0.650143 (0.063493) with: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}
0.642473 (0.061191) with: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}
0.657993 (0.078702) with: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
0.642688 (0.086641) with: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}
0.642652 (0.091571) with: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'uniform'}
0.632760 (0.088583) with: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'distance'}
0.646953 

In [7]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
# define dataset
#X, y = make_blobs(n_samples=1000, centers=2, n_features=100, cluster_std=20)
# define model and parameters
model = SVC()
kernel = ['poly', 'rbf', 'sigmoid']
C = [50, 10, 1.0, 0.1, 0.01]
gamma = ['scale']
# define grid search
grid = dict(kernel=kernel,C=C,gamma=gamma)
cv = RepeatedStratifiedKFold(n_splits=15, n_repeats=5, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.754032 using {'C': 50, 'gamma': 'scale', 'kernel': 'poly'}
0.754032 (0.084703) with: {'C': 50, 'gamma': 'scale', 'kernel': 'poly'}
0.708540 (0.083386) with: {'C': 50, 'gamma': 'scale', 'kernel': 'rbf'}
0.518159 (0.106075) with: {'C': 50, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.706444 (0.087646) with: {'C': 10, 'gamma': 'scale', 'kernel': 'poly'}
0.690698 (0.092083) with: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
0.518825 (0.107221) with: {'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.674159 (0.088413) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
0.650032 (0.085551) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
0.544762 (0.010476) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.564603 (0.038953) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
0.544762 (0.010476) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
0.544762 (0.010476) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.544762 (0.010476) with: {'C': 0.01, 'gamma': 'scale',

In [8]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
# define dataset
#X, y = make_blobs(n_samples=1000, centers=2, n_features=100, cluster_std=20)
# define models and parameters
model = RandomForestClassifier()
n_estimators = [10, 100, 1000]
max_features = ['sqrt', 'log2']
# define grid search
grid = dict(n_estimators=n_estimators,max_features=max_features)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.821935 using {'max_features': 'log2', 'n_estimators': 1000}
0.794409 (0.076042) with: {'max_features': 'sqrt', 'n_estimators': 10}
0.816595 (0.064862) with: {'max_features': 'sqrt', 'n_estimators': 100}
0.818638 (0.062393) with: {'max_features': 'sqrt', 'n_estimators': 1000}
0.791219 (0.076860) with: {'max_features': 'log2', 'n_estimators': 10}
0.818638 (0.062909) with: {'max_features': 'log2', 'n_estimators': 100}
0.821935 (0.061706) with: {'max_features': 'log2', 'n_estimators': 1000}


In [10]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
param_dict = {"criterion": ['gini', 'entropy'],"max_depth": [2,4,6,8,10,12],"min_samples_split":range(1,10),"min_samples_leaf":range(1,10),"max_features":['auto']} 
#grid = GridSearchCV(decision_tree,param_grid = param_dict,n_jobs=-1, cv=10,error_score=0)
grid = GridSearchCV(estimator=model, param_grid=param_dict, n_jobs=-1, cv=10, scoring='accuracy',error_score=0)
grid_result = grid.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param)) 

Best: 0.818065 using {'criterion': 'gini', 'max_depth': 4, 'max_features': 'auto', 'min_samples_leaf': 6, 'min_samples_split': 7}
0.000000 (0.000000) with: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 1}
0.688925 (0.097108) with: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2}
0.751935 (0.071642) with: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 3}
0.708387 (0.101484) with: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 4}
0.739032 (0.053388) with: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 5}
0.736022 (0.095960) with: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 6}
0.719032 (0.061213) with: {'criterion': 'gini', 'max_depth':