In [14]:
import numpy as np
import torch
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier, NeighborhoodComponentsAnalysis
from sklearn.pipeline import Pipeline
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_text

from src.base.training.models.architectures.lenet import LeNet
from src.base.training.models.architectures.lenet_light import LeNetLight

In [15]:
DATA_PATH = "C:\\Users\\micdu\\Code\\pythonProject\\dmtl\\data"

def load_samples(dataset_fn, n_samples, train=True):
    dataset = dataset_fn(
        DATA_PATH,
        train=train,
        download=True,
        transform=ToTensor()
    )
    loader = DataLoader(dataset, batch_size=n_samples)
    return next(iter(loader))

def shuffle(x, y):
    shuffle_index = torch.randperm(x.shape[0])
    return x[shuffle_index], y[shuffle_index]

def load_model(model_fn, path):
    model = model_fn()
    model.load_state_dict(torch.load(path))
    model.eval()
    return model

def use_models(x, model_fn, paths):
    x_out = load_model(model_fn, paths[0])(x)
    for path in paths[1:]:
        model = load_model(model_fn, path)
        x_out = torch.cat((x_out, model(x)), dim=1)
    return x_out

def load_and_prepare(n_samples=100, train=True, model_fn=LeNet):
    fmnist_x, fmnist_y = load_samples(datasets.FashionMNIST, int(n_samples/2), train=train)
    mnist_x, mnist_y = load_samples(datasets.MNIST, int(n_samples/2), train=train)
    # Off setting the fmnist labels
    fmnist_y = fmnist_y + 10
    x, y = shuffle(
        torch.cat((mnist_x, fmnist_x), dim=0),
        torch.cat((mnist_y, fmnist_y), dim=0)
    )
    x_out = use_models(x, model_fn, [
        "C:\\Users\\micdu\\Code\\pythonProject\\dmtl\\notebooks\\models\\daeclust_15\\5aa285fe2dad84e59107a2652432eeac66db9c709fe2719ba74bd80caa7f493a\\final_model.state",
        "C:\\Users\\micdu\\Code\\pythonProject\\dmtl\\notebooks\\models\\daeclust_15\\e5307874a84923007d15c8c019aa67d7756478bd3466d17a14b856a76e6ee29d\\final_model.state"
    ])
    return x_out.detach().numpy(), y.detach().numpy()

In [16]:
x_train, y_train = load_and_prepare(model_fn=LeNetLight, n_samples=400, train=True)
x_test, y_test = load_and_prepare(model_fn=LeNetLight, n_samples=800, train=False)

In [17]:
np.bincount(y_train), np.bincount(y_test)

(array([21, 26, 20, 21, 21, 13, 19, 21, 15, 23, 24, 26, 18, 17, 18, 20, 21,
        21, 16, 19], dtype=int64),
 array([33, 57, 44, 35, 46, 42, 34, 41, 27, 41, 42, 44, 54, 36, 43, 32, 39,
        40, 34, 36], dtype=int64))

In [5]:
pca = PCA(n_components=10)
x_pca_train = pca.fit_transform(x_train)
x_pca_test = pca.transform(x_test)

In [20]:
decision_tree = DecisionTreeClassifier(random_state=0, max_depth=20)
decision_tree = decision_tree.fit(x_train, y_train)

In [21]:
tree_pred = decision_tree.predict(x_test)
classification_report(tree_pred, y_test, output_dict=True)

{'0': {'precision': 0.9696969696969697,
  'recall': 1.0,
  'f1-score': 0.9846153846153847,
  'support': 32},
 '1': {'precision': 0.9824561403508771,
  'recall': 0.9655172413793104,
  'f1-score': 0.9739130434782608,
  'support': 58},
 '2': {'precision': 0.9090909090909091,
  'recall': 0.7407407407407407,
  'f1-score': 0.8163265306122449,
  'support': 54},
 '3': {'precision': 0.7142857142857143,
  'recall': 1.0,
  'f1-score': 0.8333333333333333,
  'support': 25},
 '4': {'precision': 0.8913043478260869,
  'recall': 0.8723404255319149,
  'f1-score': 0.8817204301075269,
  'support': 47},
 '5': {'precision': 0.9285714285714286,
  'recall': 0.9069767441860465,
  'f1-score': 0.9176470588235294,
  'support': 43},
 '6': {'precision': 0.7647058823529411,
  'recall': 0.8666666666666667,
  'f1-score': 0.8125,
  'support': 30},
 '7': {'precision': 0.975609756097561,
  'recall': 0.975609756097561,
  'f1-score': 0.975609756097561,
  'support': 41},
 '8': {'precision': 0.7407407407407407,
  'recall': 0

In [8]:
# LinearSVC, ovo, ovr
svm_clf = svm.SVC()
svm_clf.fit(x_train, y_train)

In [9]:
svm_pred = svm_clf.predict(x_test)
classification_report(svm_pred, y_test, output_dict=True)

{'0': {'precision': 0.9696969696969697,
  'recall': 0.9696969696969697,
  'f1-score': 0.9696969696969697,
  'support': 33},
 '1': {'precision': 1.0,
  'recall': 0.9827586206896551,
  'f1-score': 0.9913043478260869,
  'support': 58},
 '2': {'precision': 0.9772727272727273,
  'recall': 0.9555555555555556,
  'f1-score': 0.9662921348314608,
  'support': 45},
 '3': {'precision': 0.9714285714285714,
  'recall': 1.0,
  'f1-score': 0.9855072463768115,
  'support': 34},
 '4': {'precision': 0.9565217391304348,
  'recall': 1.0,
  'f1-score': 0.9777777777777777,
  'support': 44},
 '5': {'precision': 0.9761904761904762,
  'recall': 0.9761904761904762,
  'f1-score': 0.9761904761904762,
  'support': 42},
 '6': {'precision': 0.9411764705882353,
  'recall': 0.9696969696969697,
  'f1-score': 0.955223880597015,
  'support': 33},
 '7': {'precision': 0.975609756097561,
  'recall': 1.0,
  'f1-score': 0.9876543209876543,
  'support': 40},
 '8': {'precision': 0.9259259259259259,
  'recall': 0.6944444444444444

In [10]:
# https://scikit-learn.org/stable/modules/naive_bayes.html
gnb = GaussianNB()
gnb = gnb.fit(x_train, y_train)
gnb_pred = gnb.predict(x_test)
classification_report(gnb_pred, y_test, output_dict=True)

{'0': {'precision': 0.9696969696969697,
  'recall': 1.0,
  'f1-score': 0.9846153846153847,
  'support': 32},
 '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 57},
 '2': {'precision': 1.0,
  'recall': 0.8627450980392157,
  'f1-score': 0.9263157894736842,
  'support': 51},
 '3': {'precision': 0.8571428571428571,
  'recall': 1.0,
  'f1-score': 0.923076923076923,
  'support': 30},
 '4': {'precision': 0.9130434782608695,
  'recall': 0.9767441860465116,
  'f1-score': 0.9438202247191011,
  'support': 43},
 '5': {'precision': 0.9047619047619048,
  'recall': 0.9047619047619048,
  'f1-score': 0.9047619047619048,
  'support': 42},
 '6': {'precision': 0.9117647058823529,
  'recall': 0.96875,
  'f1-score': 0.9393939393939394,
  'support': 32},
 '7': {'precision': 0.9512195121951219,
  'recall': 1.0,
  'f1-score': 0.975,
  'support': 39},
 '8': {'precision': 0.7777777777777778,
  'recall': 0.7,
  'f1-score': 0.7368421052631577,
  'support': 30},
 '9': {'precision': 0.9512195121951

In [11]:
# https://scikit-learn.org/stable/modules/neighbors.html
nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(x_train)
distances, indices = nbrs.kneighbors(x_test)

In [12]:
nca = NeighborhoodComponentsAnalysis(random_state=42)
knn = KNeighborsClassifier(n_neighbors=3)
nca_pipe = Pipeline([('nca', nca), ('knn', knn)])
nca_pipe.fit(x_train, y_train)
nca_knn_preds = nca_pipe.predict(x_test)
classification_report(nca_knn_preds, y_test, output_dict=True)

{'0': {'precision': 0.9696969696969697,
  'recall': 0.9696969696969697,
  'f1-score': 0.9696969696969697,
  'support': 33},
 '1': {'precision': 1.0,
  'recall': 0.95,
  'f1-score': 0.9743589743589743,
  'support': 60},
 '2': {'precision': 0.9772727272727273,
  'recall': 0.8775510204081632,
  'f1-score': 0.9247311827956989,
  'support': 49},
 '3': {'precision': 0.9714285714285714,
  'recall': 0.918918918918919,
  'f1-score': 0.9444444444444445,
  'support': 37},
 '4': {'precision': 0.9130434782608695,
  'recall': 0.9767441860465116,
  'f1-score': 0.9438202247191011,
  'support': 43},
 '5': {'precision': 0.9285714285714286,
  'recall': 0.9069767441860465,
  'f1-score': 0.9176470588235294,
  'support': 43},
 '6': {'precision': 0.9411764705882353,
  'recall': 0.9696969696969697,
  'f1-score': 0.955223880597015,
  'support': 33},
 '7': {'precision': 0.9512195121951219,
  'recall': 0.9285714285714286,
  'f1-score': 0.9397590361445782,
  'support': 42},
 '8': {'precision': 0.8518518518518519,

In [13]:
rnd_forest = RandomForestClassifier(max_depth=10, random_state=0)
rnd_forest.fit(x_train, y_train)
forest_pred = rnd_forest.predict(x_test)
classification_report(forest_pred, y_test, output_dict=True)

{'0': {'precision': 0.9696969696969697,
  'recall': 0.9696969696969697,
  'f1-score': 0.9696969696969697,
  'support': 33},
 '1': {'precision': 1.0,
  'recall': 0.9827586206896551,
  'f1-score': 0.9913043478260869,
  'support': 58},
 '2': {'precision': 0.9772727272727273,
  'recall': 0.9148936170212766,
  'f1-score': 0.945054945054945,
  'support': 47},
 '3': {'precision': 0.9714285714285714,
  'recall': 1.0,
  'f1-score': 0.9855072463768115,
  'support': 34},
 '4': {'precision': 0.9782608695652174,
  'recall': 1.0,
  'f1-score': 0.989010989010989,
  'support': 45},
 '5': {'precision': 0.9761904761904762,
  'recall': 0.9534883720930233,
  'f1-score': 0.9647058823529412,
  'support': 43},
 '6': {'precision': 0.9411764705882353,
  'recall': 0.9411764705882353,
  'f1-score': 0.9411764705882353,
  'support': 34},
 '7': {'precision': 0.9512195121951219,
  'recall': 0.975,
  'f1-score': 0.9629629629629629,
  'support': 40},
 '8': {'precision': 0.9259259259259259,
  'recall': 0.92592592592592