In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold, RandomizedSearchCV


In [None]:
def model_list():
    """
    Generates a list of models & params,
    Add more manually, for more options
    gb = GradientBoostingClassifier()
    rf = RandomForestClassifier()
    bg = BaggingClassifier()
    ad = AdaBoostClassifier()
    knn = KNeighborsClassifier()
    """    
    models = []    

    #gradient
    gb_tuned_parameters = [{"n_estimators": [x for x in range(100, 400, 2)],
                            'min_samples_split' : [2, 3, 4],
                            'max_depth': [3]}]
    models.append(['GradientBoosted', GradientBoostingClassifier(), gb_tuned_parameters])
 
    #random forest
    rf_tuned_parameters = [{'n_estimators' : [x for x in range(100, 400, 2)],
                            'min_samples_split' : [2, 3, 4]}]
    models.append(["RandomForest",RandomForestClassifier(n_jobs=-1),rf_tuned_parameters])

    #Bagged
    bg_tuned_parameters = [{"n_estimators": [x for x in range(100, 400, 2)]}]
    models.append(['Bagging', BaggingClassifier(), bg_tuned_parameters])

    #Ada Model
    ad_tuned_parameters = [{"n_estimators": [x for x in range(100, 400, 2)]}]
    models.append(['AdaBoost', AdaBoostClassifier(), ad_tuned_parameters])
    
    """
    #kNN Model
    knn_tuned_parameters = [{"n_neighbors": [1, 3, 5, 10, 20], 
                             'weights':['uniform','distance'],
                            'metric':['euclidean','manhattan']}]
    models.append(["kNN", KNeighborsClassifier(),knn_tuned_parameters])
    """
    return models

def best_model(model_list, X_train, y_train, cv=StratifiedKFold(), scoring='roc_auc', best=False):
    """
    Runs a randomized search of 10 iters, and strat-kfold cv5 for each model
    
    If best = False
    And produces a list of classifiers - parameters - scores
    for the best of each type of model
    
    If best = True
    Returns the actual best model
    """
    
    best_score = 0.0
    best_classifier = None
    classifiers = []
    for name, model, parameters in model_list:
        print('modeling {}'.format(name))
        clf = RandomizedSearchCV(model, parameters[0], 
                            scoring=scoring, cv=cv, 
                            n_iter=4, verbose=1, n_jobs=2)
        
        clf.fit(X_train, y_train)
        classifiers.append([str(clf.best_params_), clf.best_score_, clf.best_estimator_])
        print('best score for {} : {}'.format(name,clf.best_score_))
    for name, score, classifier in classifiers:
        if (score > best_score):
            best_score = score
            best_classifier = [name, classifier]
    if best == True:
        return best_classifier[1]
    return classifiers