In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold, cross_val_score, train_test_split 
from sklearn import preprocessing

In [2]:
from hyperopt import fmin, tpe, hp, SparkTrials, STATUS_OK, Trials

In [3]:
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [4]:
df = pd.read_csv("heart.csv", sep=',')
X = df.drop("target",axis=1)
y = df["target"].values

X = pd.get_dummies(X, columns = ['cp','thal','slope'])

In [16]:
standard_scaler = preprocessing.StandardScaler()

In [17]:
X_standard = standard_scaler.fit_transform(X)

In [None]:
svm = SVC()
scores = cross_val_score(svm, X_standard, y, cv=10)

In [None]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

In [None]:
space4svm = {
    'C': hp.uniform('C', 0, 20),
    'kernel': hp.choice('kernel', ['linear', 'sigmoid', 'poly', 'rbf']),
    'gamma': hp.uniform('gamma', 0, 20)
}

In [None]:
def Objective4svm(params):
    clf = SVC(**params)
    accuracy = cross_val_score(clf, X_standard, y, cv = 10).mean()
    return {'loss': -accuracy, 'status': STATUS_OK}  


In [None]:
trials4svm = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=Objective4svm, 
    space=space4svm,
    algo=algo,
    max_evals=20,
    trials=trials4svm)

In [None]:
space4svmWithScaling = {
    'C': hp.uniform('C', 0, 20),
    'kernel': hp.choice('kernel', ['linear', 'sigmoid', 'poly', 'rbf']),
    'gamma': hp.uniform('gamma', 0, 20),
    'scale': hp.choice('scale', [0, 1]),
    'normalize': hp.choice('normalize', [0, 1])
}

In [None]:
def Objective4svmWithScale(params):
    X_ = X
    if 'normalize' in params:
        if params['normalize'] == 1:
            X_ = preprocessing.normalize(X_)
        del params['normalize']      
    if 'scale' in params:
        if params['scale'] == 1:
            X_ = preprocessing.scale(X_)       
        del params['scale']
    clf = SVC(**params)
    accuracy = cross_val_score(clf, X_, y, cv = 10).mean()
    return {'loss': -accuracy, 'status': STATUS_OK}  

In [None]:
trials4svmScale = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=Objective4svmWithScale, 
    space=space4svmWithScaling,
    algo=algo,
    max_evals=20,
    trials=trials4svmScale)

In [5]:
Ridge = RidgeClassifier()
scores = cross_val_score(Ridge, X, y, cv=10)

In [6]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.84 (+/- 0.09)


In [10]:
def Objective4Ringe(params):
    clf = RidgeClassifier(**params)
    accuracy = cross_val_score(clf, X, y,cv = 10).mean()
    
    return {'loss': -accuracy, 'status': STATUS_OK}

In [8]:
space4Ridge = {
    'alpha': hp.uniform('alpha', 0.0, 2.0)
}

In [11]:
trials4Ridge = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=Objective4Ringe, 
    space=space4Ridge,
    algo=algo,
    max_evals=20,
    trials=trials4Ridge)

100%|██████████| 20/20 [00:01<00:00, 10.47trial/s, best loss: -0.8413978494623656]


In [12]:
normalizer = preprocessing.Normalizer()
X_normalize = normalizer.fit_transform(X)

In [13]:
RF = RandomForestClassifier()
scores = cross_val_score(Ridge, X_normalize, y, cv=10)


In [14]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.68 (+/- 0.17)


In [18]:
RF = RandomForestClassifier()
scores = cross_val_score(Ridge, X_standard, y, cv=10)

In [20]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.84 (+/- 0.09)


In [21]:
space4Forest = {
    'max_depth': hp.choice('max_depth', range(1,20)),
        'max_features': hp.choice('max_features', range(1,5)),
        'n_estimators': hp.choice('n_estimators', range(1,20)),
        'criterion': hp.choice('criterion', ["gini", "entropy"])
}

In [22]:
def Objective4Forest(params):
    clf = RandomForestClassifier(**params)
    accuracy = cross_val_score(clf, X, y,cv = 10).mean()
    
    return {'loss': -accuracy, 'status': STATUS_OK}

In [23]:
trials4Forest = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=Objective4Forest, 
    space=space4Forest,
    algo=algo,
    max_evals=20,
    trials=trials4Forest)

100%|██████████| 20/20 [00:04<00:00,  4.06trial/s, best loss: -0.8417204301075267]


In [24]:
minMax_scaler = preprocessing.MinMaxScaler()
X_minMax  = minMax_scaler.fit_transform(X)

In [25]:
 knn = KNeighborsClassifier()
 scores = cross_val_score(Ridge, X_minMax, y, cv=10)

In [26]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.84 (+/- 0.09)


In [27]:
def Objective4knn(params):
    clf = KNeighborsClassifier(**params)
    accuracy = cross_val_score(clf, X_minMax, y,cv = 10).mean()
    
    return {'loss': -accuracy, 'status': STATUS_OK}

In [28]:
space4knn = {
  'n_neighbors': hp.choice('knn_n_neighbors', range(1,50)),
        'weights' :hp.choice('weights', ['uniform','distance']),
        'metric': hp.choice('metric',['euclidean','manhattan', 'minkowski'])
}

In [29]:
trials4knn = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=Objective4knn, 
    space=space4knn,
    algo=algo,
    max_evals=20,
    trials=trials4knn)

100%|██████████| 20/20 [00:01<00:00, 13.98trial/s, best loss: -0.8450537634408603]


In [30]:
quantile_Tranformer_uniform = preprocessing.QuantileTransformer()
X_quantile = quantile_Tranformer_uniform.fit_transform(X)

In [33]:
 logistic = LogisticRegression()
 scores = cross_val_score(logistic, X_quantile, y, cv=10)

In [34]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.84 (+/- 0.10)


In [35]:
space4Logistic = {
        'penalty': hp.choice('penalty', ['l1', 'l2', 'elasticnet', 'none']),
          'C': hp.uniform('C', 0, 2.0)
}

In [36]:
def Objective4Logistic(params):
    clf = LogisticRegression(**params)
    accuracy = cross_val_score(clf, X_quantile, y,cv = 10).mean()
    
    return {'loss': -accuracy, 'status': STATUS_OK}

In [37]:
trials4Logistic = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=Objective4Logistic, 
    space=space4Logistic,
    algo=algo,
    max_evals=20,
    trials=trials4Logistic)

100%|██████████| 20/20 [00:01<00:00, 12.39trial/s, best loss: -0.8447311827956989]


In [38]:
power_Transformer = preprocessing.PowerTransformer()
X_power = power_Transformer.fit_transform(X)

In [39]:
 Bayes = BernoulliNB()
 scores = cross_val_score(Bayes, X_power, y, cv=10)

In [40]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.84 (+/- 0.11)


In [41]:
scores = cross_val_score(Bayes, X, y, cv=10)

In [42]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.81 (+/- 0.10)


In [43]:
scores = cross_val_score(Bayes, X_standard, y, cv=10)

In [44]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.85 (+/- 0.09)


In [45]:
scores = cross_val_score(Bayes, X_minMax, y, cv=10)

In [46]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.81 (+/- 0.10)
