# Ensemble methods. Exercises


In this section we have only one exercise:

1. Find the best three classifier in the stacking method using the classifiers from scikit-learn package, such as:


* Linear regression,
* Nearest Neighbors,
* Linear SVM,
* Decision Tree,
* Naive Bayes,
* QDA.

In [1]:
%store -r data_set
%store -r labels
%store -r test_data_set
%store -r test_labels
%store -r unique_labels

## Exercise 1: Find the best three classifier in the stacking method

In [2]:
import numpy as np
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import itertools as it

In [37]:
def build_classifiers(): 
      
    neighbors = KNeighborsClassifier()
    neighbors.fit(data_set, labels)

    linear_regression = LinearRegression()
    linear_regression.fit(data_set, labels)

    qda = QuadraticDiscriminantAnalysis()
    qda.fit(data_set, labels)
    
    gauss = GaussianNB()
    gauss.fit(data_set, labels)

    svc = SVC()
    svc.fit(data_set, labels)

    return neighbors, linear_regression, qda, gauss, svc

In [74]:
def build_stacked_classifier(classifiers):
    output = [] 
    classifiers_list = []
    
    classifiers_combinations = it.combinations(classifiers, 3)
    
    
    for model in classifiers_combinations:
        tmp_output = []
        tmp_model = []
        for classifier in model:
            tmp_output.append(classifier.predict(data_set)) 
            tmp_model.append(classifier)
        tmp_output = np.array(tmp_output).reshape((130,3))
        output.append(tmp_output)  # ten set of data, for each of 10 combination 3-elements from 5 elements set
        classifiers_list.append(tmp_model)
        
    model_fit = []
    for model_output in output:
        stacked_classifier = DecisionTreeClassifier()
        model_fit.append(stacked_classifier.fit(model_output.reshape((130,3)), labels.reshape((130,))))
 

    test_predictions_list = []   
    for i in range(len(model_fit)):
        classifiers = classifiers_list[i]
        stacked_classifier = model_fit[i]
        test_set = []
        
        for classifier in classifiers:
            test_set.append(classifier.predict(test_data_set))
        test_set = np.array(test_set).reshape((len(test_set[0]),3))
        predicted = stacked_classifier.predict(test_set)
        test_predictions_list.append(predicted)
    
    accuracy_list = []
    for prediction in test_predictions_list:
        accuracy_list.append(accuracy_score(test_labels, prediction))
        accuracy = accuracy_score(test_labels, prediction) ##
        #print(accuracy)
    
    best_prediction_index = np.argmax(accuracy_list)
    
    best_classifiers = classifiers_list[best_prediction_index]
    best_stacked_classifier = model_fit[best_prediction_index]
    best_predicted = best_stacked_classifier.predict(test_set)
    
    return best_classifiers, best_predicted


In [75]:
raw_list = [KNeighborsClassifier(),LinearRegression(), QuadraticDiscriminantAnalysis(),GaussianNB(),SVC()]  

classifiers = build_classifiers()
best_classifiers, predicted = build_stacked_classifier(classifiers)
accuracy = accuracy_score(test_labels, predicted)
print("\nBest set of classifiers:", best_classifiers)
print("\nBest accuracy:",accuracy)


Best set of classifiers: [KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'), GaussianNB(priors=None, var_smoothing=1e-09), SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)]

Best accuracy: 0.75


