In [None]:
# loading packages
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold, cross_val_score, train_test_split 

In [None]:
# loading data
df = pd.read_csv("heart.csv", sep=',')

In [None]:
# checking for null
df.isnull().sum(axis = 0)

In [None]:
# split
X = df.drop("target",axis=1)
y = df["target"].values


In [None]:
# create dummies
#TODO: Try this way
X = pd.get_dummies(X, columns = ['cp','thal','slope'])

In [None]:
X.head(10)

In [None]:
#Scaling for SVM
#TODO:
#check if all columns should be scalled
 
from sklearn import preprocessing
standard_scaler = preprocessing.StandardScaler()
X_standard = standard_scaler.fit_transform(X)

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
svm = SVC()
scores = cross_val_score(svm, X_standard, y, cv=5)

In [None]:
 print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

In [None]:
from hyperopt import fmin, tpe, hp, SparkTrials, STATUS_OK, Trials

In [None]:
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [None]:
def objective(params):
    classifier_type = params['type']
    del params['type']
    if classifier_type == 'naive_bayes':
        clf = BernoulliNB(**params)
    elif classifier_type == 'ridge_Classifier':
        clf = RidgeClassifier(**params)
    elif classifier_type == 'logistic_regression':
        clf = LogisticRegression(**params)
    elif classifier_type == 'svm':
        clf = SVC(**params)
    elif classifier_type == 'knn':
        clf = KNeighborsClassifier(**params)  
    elif classifier_type == 'randomforest':
        clf = RandomForestClassifier(**params)                
    else:
        return 0
    accuracy = cross_val_score(clf, X, y).mean()
    
    return {'loss': -accuracy, 'status': STATUS_OK}


In [None]:
search_space = hp.choice('classifier_type', [
    {
        'type': 'naive_bayes',
    },
    {
        'type':'ridge_Classifier',
        'alpha': hp.uniform('alpha', 0.0, 2.0)
    }
    {
        'type': 'logistic_regression',
        'penalty': hp.choice('penalty', ['l1', 'l2', 'elasticnet', 'none']),
        'C': hp.uniform('C',0, 2)
    },
    {
        'type': 'svm',
        'C': hp.lognormal('C', 0, 1.0),
        'kernel': hp.choice('kernel', ['linear', 'rbf','poly']),
        'gamma': hp.uniform('gamma', 0, 20.0)

    },
    {
        'type': 'knn',
        'n_neighbors': hp.choice('knn_n_neighbors', range(1,50)),
        'weights' :hp.choise('weights', ['uniform','distance'])
        'metric': hp.choice('metric',['euclidean','manhattan', 'minkowski'])
        

    }
    { 'type': 'randomforest',
        'max_depth': hp.choice('max_depth', range(1,20)),
        'max_features': hp.choice('max_features', range(1,5)),
        'n_estimators': hp.choice('n_estimators', range(1,20)),
        'criterion': hp.choice('criterion', ["gini", "entropy"])
    }
])

In [None]:
trials = Trials()
algo=tpe.suggest
best_result = fmin(
    fn=objective, 
    space=search_space,
    algo=algo,
    max_evals=16,
    trials= trials())

In [None]:
print(best_result)