In [None]:
import numpy as np
import chainer
from itertools import product
import matplotlib.pyplot as plt
import scipy
from pprint import pprint
import pickle 
from scipy import io

from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import GradientBoostingClassifier


from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score



%matplotlib inline

In [None]:
n_train = 5000
n_test = 1000

train, test = chainer.datasets.get_mnist()

y_train = np.array([train[i][1] for i in range(n_train)])

selected_epoch = 15

X_train = np.load("./results/train_features"+str(selected_epoch)+".npy")
X_test = np.load("./results/features_test_epoch"+str(selected_epoch)+".npy")[:n_test]
y_train = np.array([train[i][1] for i in range(n_train)])
y_test = np.array([test[i][1] for i in range(n_test)])

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


In [None]:
# Define individual classifiers
clf_svc = SVC(
    C=1,
    kernel='linear',
    gamma='scale',
    random_state=42,
)

clf_svc_p = SVC(
    C=1,
    kernel='linear',
    gamma='scale',
    random_state=42,
    probability=True
    
)

clf_xgb = XGBClassifier(
    max_depth=3,
    learning_rate=0.2,
    n_estimators=200,
    eval_metric="mlogloss",
)

clf_knn = KNeighborsClassifier(
    n_neighbors=7,
    weights='uniform',
    metric='manhattan',
)

clf_rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=None,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
)

clf_bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(random_state=42),
    n_estimators=100,
    max_samples=1.0,
    max_features=0.5,
    bootstrap=False,
    bootstrap_features=True,
    random_state=42,
)

clf_lda = LinearDiscriminantAnalysis()

clf_mnb = MultinomialNB()

clf_gb = GradientBoostingClassifier()


classifiers = {
    "svc": clf_svc,
    "xgb": clf_xgb,
    "knn": clf_knn,
    "rf": clf_rf,
    "bag": clf_bag,
    "lda": clf_lda,
    "mnb": clf_mnb,
    "gb": clf_gb,
}

classifiers_p = {
    "svc_p": clf_svc_p,
    "xgb": clf_xgb,
    "knn": clf_knn,
    "rf": clf_rf,
    "bag": clf_bag,
    "lda": clf_lda,
    "mnb": clf_mnb,
    "gb": clf_gb,

}


In [None]:
# Dictionary to store the results
results = {}

# Train classifiers on non-normalized data
for name, clf in classifiers.items():
        clf.fit(X_train, y_train)  # Train the classifier
        predictions = clf.predict(X_test)  # Make predictions
        accuracy = accuracy_score(y_test, predictions)  # Compute accuracy
        rep = classification_report(y_test, predictions)
        results[name] = name + " " + str(accuracy) + "\n" + rep
        # with open(f"./reports/rep_{name}.txt", "w") as f:
        #         f.write(results[name])
        print(name, accuracy, " DONE!")

In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels


def make_decision_profiles(X, classifiers_pool):
    return np.concatenate(
        np.array([clf.predict_proba(X) for clf in classifiers_pool]), axis=1
    )


def make_decision_templates(decision_profiles, y):
    labels = np.unique(y)
    decision_templates = np.array(
        [decision_profiles[y == _].mean(axis=0) for _ in labels]
    )
    return decision_templates, labels


class DecisionTemplatesClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, estimators):
        self.estimators = estimators

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.classes_ = unique_labels(y)

        self.classifiers_pool_ = self.estimators

        [clf.fit(X, y) for clf in self.classifiers_pool_]

        dp = make_decision_profiles(X, self.classifiers_pool_)
        self.decision_templates_, self.decision_templates_classes_ = (
            make_decision_templates(dp, y)
        )

        return self

    def predict(self, X):
        check_is_fitted(
            self,
            [
                "classes_",
                "classifiers_pool_",
                "decision_templates_",
                "decision_templates_classes_",
            ],
        )
        X = check_array(X)

        dp = make_decision_profiles(X, self.classifiers_pool_)
        distances = np.array(
            [np.linalg.norm(x - dp, axis=1) for x in self.decision_templates_]
        )

        return self.decision_templates_classes_.take(np.argmin(distances, axis=0))

In [None]:
estimatores = [clf for _, clf in classifiers_p.items()]

dt_clf = DecisionTemplatesClassifier(estimatores)

dt_clf.fit(X_train, y_train)

print(dt_clf.score(X_test, y_test))
rep = classification_report(y_test, dt_clf.predict(X_test))
name = "DecisionTemplatesClassifier" + " " + str(accuracy_score(y_test, dt_clf.predict(X_test))) + "\n" + rep
# with open(f"./reports/rep_DecisionTemplatesClassifier.txt", "w") as f:
#     f.write(name)
