In [1]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
import pandas as pd
# define dataset
data = pd.read_csv("ha.csv")
data.columns = ['age', 'sex', 'chest_pain_type', 'resting_blood_pressure', 'cholesterol', 'fasting_blood_sugar', 'rest_ecg', 'max_heart_rate_achieved',
       'exercise_induced_angina', 'st_depression', 'st_slope','target']
X = data.drop('target',axis=1)
y = data['target']
# define models and parameters
model = LogisticRegression()
solvers = ['newton-cg', 'lbfgs', 'liblinear','sag','saga']
penalty =  ['none', 'l1', 'l2', 'elasticnet']
c_values = [100, 10, 1.0, 0.1, 0.01]
# define grid search
grid = dict(solver=solvers,penalty=penalty,C=c_values)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.829412 using {'C': 1.0, 'penalty': 'l2', 'solver': 'newton-cg'}
0.828571 (0.034416) with: {'C': 100, 'penalty': 'none', 'solver': 'newton-cg'}
0.818207 (0.033366) with: {'C': 100, 'penalty': 'none', 'solver': 'lbfgs'}
0.000000 (0.000000) with: {'C': 100, 'penalty': 'none', 'solver': 'liblinear'}
0.748179 (0.037549) with: {'C': 100, 'penalty': 'none', 'solver': 'sag'}
0.728571 (0.039958) with: {'C': 100, 'penalty': 'none', 'solver': 'saga'}
0.000000 (0.000000) with: {'C': 100, 'penalty': 'l1', 'solver': 'newton-cg'}
0.000000 (0.000000) with: {'C': 100, 'penalty': 'l1', 'solver': 'lbfgs'}
0.828571 (0.034756) with: {'C': 100, 'penalty': 'l1', 'solver': 'liblinear'}
0.000000 (0.000000) with: {'C': 100, 'penalty': 'l1', 'solver': 'sag'}
0.728571 (0.039958) with: {'C': 100, 'penalty': 'l1', 'solver': 'saga'}
0.828571 (0.034416) with: {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
0.818207 (0.032364) with: {'C': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
0.828571 (0.034756) with: {'

In [2]:
#Ridge Classifier
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import RidgeClassifier
# define models and parameters
model = RidgeClassifier()
alpha = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,5.0]
# define grid search
grid = dict(alpha=alpha)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.828291 using {'alpha': 0.3}
0.828011 (0.031121) with: {'alpha': 0.1}
0.828011 (0.031121) with: {'alpha': 0.2}
0.828291 (0.031042) with: {'alpha': 0.3}
0.828291 (0.031042) with: {'alpha': 0.4}
0.828291 (0.031042) with: {'alpha': 0.5}
0.828291 (0.031042) with: {'alpha': 0.6}
0.828291 (0.031042) with: {'alpha': 0.7}
0.828291 (0.031042) with: {'alpha': 0.8}
0.828291 (0.031042) with: {'alpha': 0.9}
0.828291 (0.031042) with: {'alpha': 1.0}
0.828291 (0.030352) with: {'alpha': 5.0}


In [3]:
#KNN
# example of grid searching key hyperparametres for KNeighborsClassifier
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
# define dataset
#X, y = make_blobs(n_samples=1000, centers=2, n_features=100, cluster_std=20)
# define models and parameters
model = KNeighborsClassifier()
n_neighbors = range(1, 21, 2)
weights = ['uniform', 'distance']
metric = ['euclidean', 'manhattan', 'minkowski']
# define grid search
grid = dict(n_neighbors=n_neighbors,weights=weights,metric=metric)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.868347 using {'metric': 'manhattan', 'n_neighbors': 17, 'weights': 'distance'}
0.811485 (0.029566) with: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}
0.811485 (0.029566) with: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'distance'}
0.688796 (0.033366) with: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
0.807843 (0.030970) with: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'distance'}
0.699440 (0.038306) with: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}
0.828571 (0.025359) with: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}
0.714006 (0.030414) with: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
0.836134 (0.022522) with: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}
0.708403 (0.037023) with: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'uniform'}
0.831653 (0.024305) with: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'distance'}
0.715406

In [4]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
# define dataset
#X, y = make_blobs(n_samples=1000, centers=2, n_features=100, cluster_std=20)
# define model and parameters
model = SVC()
kernel = ['poly', 'rbf', 'sigmoid']
C = [50, 10, 1.0, 0.1, 0.01]
gamma = ['scale']
# define grid search
grid = dict(kernel=kernel,C=C,gamma=gamma)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.807843 using {'C': 50, 'gamma': 'scale', 'kernel': 'poly'}
0.807843 (0.032163) with: {'C': 50, 'gamma': 'scale', 'kernel': 'poly'}
0.791877 (0.032305) with: {'C': 50, 'gamma': 'scale', 'kernel': 'rbf'}
0.548179 (0.060117) with: {'C': 50, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.773950 (0.032208) with: {'C': 10, 'gamma': 'scale', 'kernel': 'poly'}
0.740616 (0.034968) with: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
0.550140 (0.058411) with: {'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.725770 (0.035947) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
0.710084 (0.035834) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
0.547899 (0.052047) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.702521 (0.032734) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
0.692157 (0.040504) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
0.579552 (0.023717) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.627171 (0.034064) with: {'C': 0.01, 'gamma': 'scale',

In [5]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
# define dataset
#X, y = make_blobs(n_samples=1000, centers=2, n_features=100, cluster_std=20)
# define models and parameters
model = RandomForestClassifier()
n_estimators = [10, 100, 1000]
max_features = ['sqrt', 'log2']
# define grid search
grid = dict(n_estimators=n_estimators,max_features=max_features)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.944258 using {'max_features': 'sqrt', 'n_estimators': 100}
0.921289 (0.018216) with: {'max_features': 'sqrt', 'n_estimators': 10}
0.944258 (0.016874) with: {'max_features': 'sqrt', 'n_estimators': 100}
0.941457 (0.018345) with: {'max_features': 'sqrt', 'n_estimators': 1000}
0.922969 (0.024747) with: {'max_features': 'log2', 'n_estimators': 10}
0.942017 (0.021298) with: {'max_features': 'log2', 'n_estimators': 100}
0.942017 (0.018834) with: {'max_features': 'log2', 'n_estimators': 1000}


In [None]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
data = pd.read_csv("ha.csv")
data.columns = ['age', 'sex', 'chest_pain_type', 'resting_blood_pressure', 'cholesterol', 'fasting_blood_sugar', 'rest_ecg', 'max_heart_rate_achieved',
       'exercise_induced_angina', 'st_depression', 'st_slope','target']
X = data.drop('target',axis=1)
y = data['target']
# define dataset
#X, y = make_blobs(n_samples=1000, centers=2, n_features=100, cluster_std=20)
# define models and parameters
model = GradientBoostingClassifier()
n_estimators = [10, 100, 1000]
learning_rate = [0.001, 0.01, 0.1]
subsample = [0.5, 0.7, 1.0]
max_depth = [3, 7, 9]
# define grid search
grid = dict(learning_rate=learning_rate, n_estimators=n_estimators, subsample=subsample, max_depth=max_depth)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))