# Ensemble methods. Exercises


In this section we have only one exercise:

1. Find the best three classifier in the stacking method using the classifiers from scikit-learn package, such as:


* Linear regression,
* Nearest Neighbors,
* Linear SVM,
* Decision Tree,
* Naive Bayes,
* QDA.

In [1]:
%store -r data_set
%store -r labels
%store -r test_data_set
%store -r test_labels
%store -r unique_labels

## Exercise 1: Find the best three classifier in the stacking method

In [2]:
import numpy as np
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [24]:
def build_classifiers():
    
    # fill this part

    classifiers = []
    
    neighbors = KNeighborsClassifier()
    neighbors.fit(data_set, labels)

    linear_regression = LinearRegression()
    linear_regression.fit(data_set, labels)

    qda = QuadraticDiscriminantAnalysis()
    qda.fit(data_set, labels)

    svc = SVC()
    svc.fit(data_set, labels)
    
    gaussianNB = GaussianNB()
    gaussianNB.fit(data_set, labels)
    
    classifiers.append(neighbors)
    classifiers.append(linear_regression)
    classifiers.append(qda)
    classifiers.append(svc)
    classifiers.append(gaussianNB)
    
    return classifiers # and here

In [73]:
def get_all_classifiers():
    
    # fill this part

    classifiers = []
    
    neighbors = KNeighborsClassifier()
    neighbors.fit(data_set, labels)

    linear_regression = LinearRegression()
    linear_regression.fit(data_set, labels)

    qda = QuadraticDiscriminantAnalysis()
    qda.fit(data_set, labels)

    svc = SVC()
    svc.fit(data_set, labels)
    
    gaussianNB = GaussianNB()
    gaussianNB.fit(data_set, labels)
    
    tree = DecisionTreeClassifier()
    tree.fit(data_set, labels)
    
    classifiers.append(neighbors)
    classifiers.append(linear_regression)
    classifiers.append(qda)
    classifiers.append(svc)
    classifiers.append(gaussianNB)
    classifiers.append(tree)
    
    return classifiers # and here

In [35]:
def build_stacked_classifier(classifiers):
    output = []
    for classifier in classifiers:
        output.append(classifier.predict(data_set))
    output = np.array(output).reshape((130,3))
    
    # stacked classifier part:
    stacked_classifier = DecisionTreeClassifier() # set here
    stacked_classifier.fit(output.reshape((130,3)), labels.reshape((130,)))
    test_set = []
    for classifier in classifiers:
        test_set.append(classifier.predict(test_data_set))
    test_set = np.array(test_set).reshape((len(test_set[0]),3))
    predicted = stacked_classifier.predict(test_set)
    return predicted

In [85]:
import itertools

all_classifiers = get_all_classifiers()
combinations = list(itertools.combinations(range(len(all_classifiers)), 3))
print(len(combinations), combinations)

def make_classifier(id):
    
    if id == 0:
        neighbors = KNeighborsClassifier()
        neighbors.fit(data_set, labels)
        return neighbors

    if id == 1:
        linear_regression = LinearRegression()
        linear_regression.fit(data_set, labels)
        return linear_regression

    if id == 2:
        qda = QuadraticDiscriminantAnalysis()
        qda.fit(data_set, labels)
        return qda

    if id == 3:
        svc = SVC()
        svc.fit(data_set, labels)
        return svc
    
    if id == 4:
        gaussianNB = GaussianNB()
        gaussianNB.fit(data_set, labels)
        return gaussianNB
    
    if id == 5:
        tree = DecisionTreeClassifier()
        tree.fit(data_set, labels)
        return tree
    

combinations = [ (make_classifier(x[0]), make_classifier(x[1]), make_classifier(x[2])) for x in combinations ]

results = []

for combination in combinations:
    predicted = build_stacked_classifier(combination)
    accuracy = accuracy_score(test_labels, predicted)
    results.append((accuracy, combination))

max_val = max(results, key = lambda tup : tup[0])
bests = [(x, cl) for i, (x,cl) in enumerate(results) if x == max_val[0]]

# print(bests)
for best in bests:
    print(best)
    

# classifiers = build_classifiers()
# predicted = build_stacked_classifier(classifiers)
# accuracy = accuracy_score(test_labels, predicted)
# print(accuracy)

20 [(0, 1, 2), (0, 1, 3), (0, 1, 4), (0, 1, 5), (0, 2, 3), (0, 2, 4), (0, 2, 5), (0, 3, 4), (0, 3, 5), (0, 4, 5), (1, 2, 3), (1, 2, 4), (1, 2, 5), (1, 3, 4), (1, 3, 5), (1, 4, 5), (2, 3, 4), (2, 3, 5), (2, 4, 5), (3, 4, 5)]
(0.9, (KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'), QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0,
               store_covariance=False, store_covariances=None, tol=0.0001), DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')))


