In [1]:
# loading packages
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold, cross_val_score, train_test_split 
from sklearn import preprocessing

In [8]:
from hyperopt import fmin, tpe, hp, SparkTrials, STATUS_OK, Trials

In [9]:
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [2]:
# loading data
df = pd.read_csv("heart.csv", sep=',')

In [None]:
# checking for null
df.isnull().sum(axis = 0)

In [3]:
# split
X = df.drop("target",axis=1)
y = df["target"].values


In [4]:
# create dummies
#TODO: Try this way
X = pd.get_dummies(X, columns = ['cp','thal','slope'])

In [None]:
X.head(10)

In [10]:
standard_scaler = preprocessing.StandardScaler()
minMax_scaler = preprocessing.MinMaxScaler()
quantile_Tranformer_uniform = preprocessing.QuantileTransformer()
normalizer = preprocessing.Normalizer()
power_Transformer = preprocessing.PowerTransformer()

X_standard = standard_scaler.fit_transform(X)
X_minMax  = minMax_scaler.fit_transform(X)
X_quantile = quantile_Tranformer_uniform.fit_transform(X)
X_normalize = normalizer.fit_transform(X)
X_power = power_Transformer.fit_transform(X)

In [6]:
svm = SVC()
scores = cross_val_score(svm, X_standard, y, cv=5)

In [7]:
 print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.83 (+/- 0.06)


In [11]:
def objective(params):
    classifier_type = params['type']
    del params['type']
    if classifier_type == 'naive_bayes':
        clf = BernoulliNB(**params)
        data = X_power
    elif classifier_type == 'ridge_Classifier':
        clf = RidgeClassifier(**params)
        data = X
    elif classifier_type == 'logistic_regression':
        clf = LogisticRegression(**params)
        data = X_quantile
    elif classifier_type == 'svm':
        clf = SVC(**params)
        data = X_standard
    elif classifier_type == 'knn':
        clf = KNeighborsClassifier(**params)
        data = X_minMax  
    elif classifier_type == 'randomforest':
        clf = RandomForestClassifier(**params)
        data = X_normalize                
    else:
        return 0
    accuracy = cross_val_score(clf, data, y).mean()
    
    return {'loss': -accuracy, 'status': STATUS_OK}


In [12]:
search_space = hp.choice('classifier_type', [
    {
        'type': 'naive_bayes',
    },
    {
        'type':'ridge_Classifier',
        'alpha': hp.uniform('alpha', 0.0, 2.0)
    },
    {
        'type': 'logistic_regression',
        'penalty': hp.choice('penalty', ['l1', 'l2', 'elasticnet', 'none'])
    },
    {
        'type': 'svm',
        'C': hp.lognormal('C', 0, 1.0),
        'kernel': hp.choice('kernel', ['linear', 'rbf','poly']),
        'gamma': hp.uniform('gamma', 0, 20.0)

    },
    {
        'type': 'knn',
        'n_neighbors': hp.choice('knn_n_neighbors', range(1,50)),
        'weights' :hp.choice('weights', ['uniform','distance']),
        'metric': hp.choice('metric',['euclidean','manhattan', 'minkowski'])
        

    },
    { 'type': 'randomforest',
        'max_depth': hp.choice('max_depth', range(1,20)),
        'max_features': hp.choice('max_features', range(1,5)),
        'n_estimators': hp.choice('n_estimators', range(1,20)),
        'criterion': hp.choice('criterion', ["gini", "entropy"])
    }
])

In [13]:
trials = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=objective, 
    space=search_space,
    algo=algo,
    max_evals=100,
    trials=trials)

100%|██████████| 100/100 [00:04<00:00, 20.33trial/s, best loss: -0.838360655737705]


In [14]:
print(best_result)

{'classifier_type': 0}


In [12]:
def objectiveWithoutScaling(params):
    classifier_type = params['type']
    del params['type']
    if classifier_type == 'naive_bayes':
        clf = BernoulliNB(**params)
    elif classifier_type == 'ridge_Classifier':
        clf = RidgeClassifier(**params)
    elif classifier_type == 'logistic_regression':
        clf = LogisticRegression(**params)
    elif classifier_type == 'svm':
        clf = SVC(**params)
    elif classifier_type == 'knn':
        clf = KNeighborsClassifier(**params)  
    elif classifier_type == 'randomforest':
        clf = RandomForestClassifier(**params)                
    else:
        return 0
    accuracy = cross_val_score(clf, X_standard, y).mean()
    
    return {'loss': -accuracy, 'status': STATUS_OK}

In [16]:
trials2 = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=objectiveWithoutScaling, 
    space=search_space,
    algo=algo,
    max_evals=100,
    trials=trials2)

100%|██████████| 100/100 [00:04<00:00, 23.39trial/s, best loss: -0.8415846994535519]


In [17]:
print(best_result)

{'classifier_type': 0}
