### Random Forest

In [None]:
def find_best_parameters(clf, par, x_train, y_train):
    grid_classifiers = GridSearchCV(clf, par, n_jobs=-1, iid=True, cv=5)
    grid_classifiers.fit(x_train, y_train)

    print("Best parameters set found:")
    print(grid_classifiers.best_params_)

    print("Score with best parameters:")
    print(grid_classifiers.best_score_)

    print("All scores on the grid:")
    scores = grid_classifiers.cv_results_['mean_test_score']
    params = grid_classifiers.cv_results_['params']
    for sc,pars in zip(scores,params):
        print(sc, '\t\t', pars)
    return

In [None]:
class ensemble_random_forest():
    
    def __init__(self, n_classifiers, useful=True):
        self.n_classifiers = n_classifiers
        self.useful = useful
        
    def fit(self, data_bkg, data_sig_train):
        Classifiers = []
        for iteraction in range(self.n_classifiers):
            data_bkg_train = data_bkg.sample(frac=1).reset_index(drop=True)[:900]
            data_tot = data_bkg_train.append(data_sig_train, ignore_index=True)
            data_tot = data_tot.sample(frac=1).reset_index(drop=True)
            if self.useful:
                forest_clf = RandomForestClassifier(n_estimators=300, max_features=3, bootstrap=True, n_jobs=-1) 
            else:
                forest_clf = RandomForestClassifier(n_estimators=300, max_features=5, bootstrap=True, n_jobs=-1)
            forest_clf.fit(data_tot.iloc[:,:-1], data_tot.iloc[:,-1])
            Classifiers.append(forest_clf)
            #print(iteraction)
        self.classifiers = Classifiers
        return
    
    def predict(self, data):
        voting = np.zeros(data.shape[0])
        for classifier in self.classifiers:
            votes = classifier.predict(data)
            voting += np.array(votes)
        voting = np.round(voting/self.n_classifiers)
        return voting
            
    def predict_proba(self, data):
        voting = np.zeros((data.shape[0],2))
        for classifier in self.classifiers:
            prob_vote = classifier.predict_proba(data)
            #proba = [prob[0] for prob in prob_vote]
            voting += np.array(prob_vote)
        voting = voting/self.n_classifiers
        return voting

In [None]:
def ensemble_give_test(data_sig_test, data_bkg_test):
    data_tot = data_sig_test.append(data_bkg_test[:len(data_sig_test)], ignore_index=True)
    data_tot = data_tot.sample(frac=1).reset_index(drop=True)
    return [data_tot.iloc[:,:-1], data_tot.iloc[:,-1]]

## Join Classifiers

In [None]:
class JoinClassifiers():
    
    def __init__(self, classifiers, weights=None):
        self.classifiers = classifiers
        if weights==None:
            self.weights = np.ones(len(self.classifiers))
        else:
            self.weights = weights
            
    def predict(self, data):
        voting = np.zeros(data.shape[0])
        for classifier, weight in zip(self.classifiers, self.weights):
            votes = classifier.predict(data)
            voting += np.array(votes)*weight
        voting = np.round(voting/np.sum(self.weights))
        return voting
    
    def predict_proba(self, data):
        voting = np.zeros((data.shape[0],2))
        for classifier, weight in zip(self.classifiers, self.weights):
            prob_vote = classifier.predict_proba(data)
            #proba = [prob[0] for prob in prob_vote]
            voting += np.array(prob_vote)*weight
        voting = voting/np.sum(self.weights)
        return voting