In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from scikeras.wrappers import KerasClassifier
import time
import matplotlib.pyplot as plt

2024-06-23 01:28:40.204133: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-23 01:28:40.251465: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-23 01:28:40.408243: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)

In [3]:
seed = 1234
np.random.seed(seed)
tf.keras.utils.set_random_seed(seed)
tf.random.set_seed(seed)

In [4]:
def rgb_to_gray(rgb_images):
    gray_images = np.dot(rgb_images[...,: 3], [0.299, 0.587, 0.114])
    return gray_images

In [5]:
def get_dataset(directory):
    raw_npz = np.load(directory)
    X = raw_npz['images']
    y = raw_npz['labels']
    gray_images = rgb_to_gray(X)
    gray_images /= 255.0
    return gray_images, y

In [6]:
rf_hyp = {
    'n_estimators': 10,
    'criterion': 'log_loss',
    'max_depth': 5,
    'random_state': seed,
}

xgb_hyp = {
    'n_estimators': 10,
    'max_depth': 5,
    'random_state': seed,
}

svm_hyp = {
    'random_state': seed,
}

In [7]:
datasets = [
	('Dataset/car_bike_raw.npz', 'Car Bike Dataset'),
	('Dataset/cifar10_2_500.npz', 'CIFAR10 Dataset'),
	('Dataset/pizza_raw_32.npz', 'Pizza Dataset'),
	('Dataset/corals.npz', 'Corals Dataset'),
	('Dataset/eggs.npz', 'Eggs Dataset'),
	('Dataset/xray.npz', 'Xray Dataset'),
	('Dataset/covid.npz', 'Covid19 Dataset'),
]

In [8]:
class RK:
    def __init__(self, model, rf_hyp, svm_hyp):
        self.model = Model(inputs=model.input, outputs=x)
        self.rf_hyp = rf_hyp
        self.svm = SVC(**svm_hyp)

    def fit(self, X_train, y_train):
        num_images = X_train.shape[0]
        feature_maps = self.model.predict(X_train, verbose=0)

        num_filters = feature_maps.shape[-1]
        rf_predictions = []
        self.trained_rf = []
        for i in range(num_filters):
            features = feature_maps[:, :, :, i].reshape(num_images, -1)
            # If want to plot:
            # if i == 0:
            #     num_filters = feature_maps.shape[-1]
            #     plt.figure(figsize=(20, 20))
            #     for i in range(num_filters):
            #         plt.subplot(10, 10, i + 1)  # Adjust the subplot grid size based on the number of filters
            #         plt.imshow(feature_maps[0, :, :, i], cmap='gray')
            #         plt.axis('off')
            rf = RandomForestClassifier(**self.rf_hyp)
            rf.fit(features, y_train)
            self.trained_rf.append(rf)
            rf_predictions.append(rf.predict(features))
        stacked_predictions = np.stack(rf_predictions, axis=1)
        self.svm.fit(stacked_predictions, y_train)
        return self.svm.predict(stacked_predictions)
    
    def predict(self, X):
        num_images = X.shape[0]
        feature_maps = self.model.predict(X, verbose=0)

        num_filters = feature_maps.shape[-1]
        rf_predictions = []
        for i in range(num_filters):
            features = feature_maps[:, :, :, i].reshape(num_images, -1)
            rf_predictions.append(self.trained_rf[i].predict(features))
        stacked_predictions = np.stack(rf_predictions, axis=1)
        return self.svm.predict(stacked_predictions)

In [9]:
input_layer = tf.keras.Input(shape=(32, 32, 1))
conv_layer = layers.Conv2D(100, (3, 3), activation='relu')(input_layer)
x = layers.MaxPooling2D((2, 2))(conv_layer)
model = Model(inputs=input_layer, outputs=x)
model.trainable = False

In [10]:
def cv(directory):
    X, y = get_dataset(directory)
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    train_accs = []
    test_accs = []

    train_prec = []
    test_prec = []

    train_recall = []
    test_recall = []

    train_times = []
    test_times = []

    for i, (train_indices, test_indices) in enumerate(kf.split(X, y)):
        X_train, X_test, y_train, y_test = X[train_indices], X[test_indices], y[train_indices], y[test_indices]
        
        # Start of training time
        start_train_time = time.time()
        clf = RK(model, rf_hyp, svm_hyp)
        pred_train = clf.fit(X_train, y_train)
        end_train_time = time.time()
        train_time = end_train_time - start_train_time
        train_times.append(train_time)
        # End of training, beginning of evaluation/test time
        
        start_test_time = time.time()
        pred_test = clf.predict(X_test)
        end_test_time = time.time()
        test_time = end_test_time - start_test_time
        # End of test time
        test_times.append(test_time)

        train_accs.append(accuracy_score(y_train, pred_train))
        test_accs.append(accuracy_score(y_test, pred_test))

        train_prec.append(precision_score(y_train, pred_train))
        test_prec.append(precision_score(y_test, pred_test))

        train_recall.append(recall_score(y_train, pred_train))
        test_recall.append(recall_score(y_test, pred_test))

    print("Train Accuracies: " + str(train_accs))
    print("Test Accuracies: " + str(test_accs))
    print("Train Precisions: " + str(train_prec))
    print("Test Precisions: " + str(test_prec))
    print("Train Recalls: " + str(train_recall))
    print("Test Recalls: " + str(test_recall))
    print("Train Times: " + str(train_times))
    print("Test Times: " + str(test_times))
    
    return train_accs, test_accs, train_prec, test_prec, train_recall, test_recall, train_times, test_times

In [11]:
for dataset, _ in datasets:
    print(dataset)
    cv(dataset)

Dataset/car_bike_raw.npz
Train Accuracies: [0.9671875, 0.96625, 0.97125, 0.96875, 0.9684375]
Test Accuracies: [0.86875, 0.88625, 0.84, 0.85375, 0.85625]
Train Precisions: [0.9716088328075709, 0.9715549936788875, 0.9748110831234257, 0.9740834386852086, 0.9734680985470625]
Test Precisions: [0.8753180661577609, 0.87409200968523, 0.8451776649746193, 0.8528678304239401, 0.8589420654911839]
Train Recalls: [0.9625, 0.960625, 0.9675, 0.963125, 0.963125]
Test Recalls: [0.86, 0.9025, 0.8325, 0.855, 0.8525]
Train Times: [13.73506236076355, 13.586729526519775, 13.491044521331787, 13.467337846755981, 13.449049472808838]
Test Times: [0.44195103645324707, 0.3540201187133789, 0.34664297103881836, 0.35665321350097656, 0.3513641357421875]
Dataset/cifar10_2_500.npz
Train Accuracies: [0.9925, 0.9925, 0.99375, 0.99, 0.9925]
Test Accuracies: [0.865, 0.825, 0.83, 0.875, 0.885]
Train Precisions: [1.0, 0.9974747474747475, 1.0, 1.0, 0.9949748743718593]
Test Precisions: [0.8543689320388349, 0.7777777777777778, 0

In [12]:
def create_model():
    model = tf.keras.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(2)  # Output layer with number of classes
    ])
    model.compile(optimizer='adam',
                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 metrics=['accuracy'])
    return model

In [13]:
def nn_cv(directory):
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    simpleCNN = KerasClassifier(model=create_model, epochs=20, batch_size=64, verbose=0)
    X, y = get_dataset(directory)
    results = cross_validate(simpleCNN, X, y, scoring=['accuracy', 'precision', 'recall'], cv=kf, return_train_score=True)
    print(directory)
    print(results['train_accuracy'])
    print(results['test_accuracy'])
    print(results['train_precision'])
    print(results['test_precision'])
    print(results['train_recall'])
    print(results['test_recall'])
    print(results['fit_time'])
    print(results['score_time'])

In [14]:
for dataset, _ in datasets:
    nn_cv(dataset)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Dataset/car_bike_raw.npz
[0.9609375 0.98875   0.99875   0.999375  0.9703125]
[0.87625 0.89375 0.91125 0.9275  0.89625]
[0.9280325  0.99120603 0.99750623 0.999375   0.99867462]
[0.82646421 0.89473684 0.90024331 0.93181818 0.95942029]
[0.999375 0.98625  1.       0.999375 0.941875]
[0.9525 0.8925 0.925  0.9225 0.8275]
[36.80512881 37.22847247 36.97008657 37.38165283 37.62366438]
[0.21826816 0.23226643 0.21938205 0.22124624 0.22582817]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Dataset/cifar10_2_500.npz
[0.94625 0.97875 0.9575  0.96875 0.975  ]
[0.85  0.83  0.87  0.87  0.855]
[1.         0.96368039 0.92757009 0.94964029 0.97979798]
[0.92682927 0.8        0.81896552 0.81896552 0.90804598]
[0.8925 0.995  0.9925 0.99   0.97  ]
[0.76 0.88 0.95 0.95 0.79]
[10.0961647  10.05270696 10.3474791  10.1334269   9.96807146]
[0.12784433 0.12463379 0.1292181  0.12643981 0.12912345]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Dataset/pizza_raw_32.npz
[0.92175573 0.93579148 0.83534647 0.87730451 0.91163382]
[0.7715736  0.70229008 0.72773537 0.69720102 0.73791349]
[0.93561104 0.99281609 0.75581395 0.84356895 0.86719637]
[0.79888268 0.7615894  0.66300366 0.66810345 0.69262295]
[0.90585242 0.87801779 0.99110546 0.92620865 0.97201018]
[0.72588832 0.58673469 0.92346939 0.78680203 0.85786802]
[18.73720932 18.60492229 18.92166805 18.81238031 19.15651059]
[0.15422082 0.17640352 0.15357661 0.15288091 0.16455269]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Dataset/corals.npz
[0.98102981 0.8902439  0.91598916 0.89580514 0.79837618]
[0.59459459 0.59459459 0.60540541 0.64673913 0.625     ]
[0.97948718 0.86288416 0.92670157 0.90180879 0.74044266]
[0.61458333 0.59482759 0.62244898 0.66       0.60447761]
[0.98453608 0.94072165 0.91237113 0.89948454 0.94845361]
[0.60824742 0.71134021 0.62886598 0.68041237 0.83505155]
[9.53719306 9.43466949 9.35602498 9.36977506 9.33894038]
[0.12814879 0.1256485  0.12341833 0.12417054 0.12293673]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Dataset/eggs.npz
[0.91653543 0.8992126  0.91496063 0.91181102 0.93081761]
[0.83647799 0.81132075 0.87421384 0.8490566  0.87341772]
[0.92336449 0.925      0.91528545 0.93762183 0.94252874]
[0.86764706 0.88709677 0.8962963  0.88148148 0.88970588]
[0.97628458 0.95059289 0.98415842 0.95247525 0.97233202]
[0.93650794 0.87301587 0.95275591 0.93700787 0.96031746]
[8.17375875 8.0784061  8.5738039  8.14954329 8.28915787]
[0.12007904 0.12319756 0.12404823 0.11994553 0.12092972]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Dataset/xray.npz
[0.99231426 0.96115261 0.98399146 0.98890075 0.99039488]
[0.95221843 0.94961571 0.95046968 0.95559351 0.95388557]
[0.98986095 0.95019477 0.98859982 0.98782961 0.98898551]
[0.95449374 0.94026549 0.97384067 0.96091954 0.95351474]
[0.99970743 0.99912229 0.98946752 0.99707517 0.99795262]
[0.98128655 0.99415205 0.95789474 0.97892272 0.98477752]
[54.56553841 54.17141271 54.73359704 53.96796131 54.91654301]
[0.27916455 0.28050375 0.26810408 0.26918387 0.27316976]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Dataset/covid.npz
[0.99447514 1.         1.         0.99450549 0.99450549]
[0.95652174 0.97826087 0.97777778 0.97777778 1.        ]
[0.98630137 1.         1.         1.         1.        ]
[0.9        0.94736842 0.94736842 1.         1.        ]
[1.         1.         1.         0.98611111 0.98611111]
[1.         1.         1.         0.94444444 1.        ]
[3.1301775  3.58663201 3.14868474 3.16182923 3.17949271]
[0.07051206 0.07598424 0.07217169 0.06921387 0.06945086]


In [15]:
def rf_cv(directory):
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    clf = RandomForestClassifier(random_state=seed)
    X, y = get_dataset(directory)
    results = cross_validate(clf, X.reshape(X.shape[0], -1), y, scoring=['accuracy', 'precision', 'recall'], cv=kf, return_train_score=True)
    print(directory)
    print(results['train_accuracy'])
    print(results['test_accuracy'])
    print(results['train_precision'])
    print(results['test_precision'])
    print(results['train_recall'])
    print(results['test_recall'])
    print(results['fit_time'])
    print(results['score_time'])

In [16]:
for dataset, _ in datasets:
    rf_cv(dataset)

Dataset/car_bike_raw.npz
[1. 1. 1. 1. 1.]
[0.8125  0.8125  0.80875 0.83125 0.8225 ]
[1. 1. 1. 1. 1.]
[0.84530387 0.8342246  0.82077922 0.84415584 0.83247423]
[1. 1. 1. 1. 1.]
[0.765  0.78   0.79   0.8125 0.8075]
[5.52658916 5.36542726 5.36672902 5.42429924 5.38562894]
[0.01619267 0.01611876 0.01568985 0.0157516  0.01523423]
Dataset/cifar10_2_500.npz
[1. 1. 1. 1. 1.]
[0.805 0.78  0.83  0.825 0.855]
[1. 1. 1. 1. 1.]
[0.79047619 0.75       0.81730769 0.77310924 0.82568807]
[1. 1. 1. 1. 1.]
[0.83 0.84 0.85 0.92 0.9 ]
[1.06340861 1.08918571 1.08197951 1.06695867 1.04970551]
[0.00843263 0.00844359 0.00863981 0.0080483  0.00792575]
Dataset/pizza_raw_32.npz
[1. 1. 1. 1. 1.]
[0.63959391 0.66666667 0.66666667 0.6692112  0.63613232]
[1. 1. 1. 1. 1.]
[0.64705882 0.66666667 0.67567568 0.66834171 0.64673913]
[1. 1. 1. 1. 1.]
[0.6142132  0.66326531 0.6377551  0.6751269  0.60406091]
[2.73352146 2.80839634 2.74456286 2.75859356 2.59468985]
[0.01068115 0.01060915 0.01100707 0.01063395 0.01087046]
Datase

In [17]:
def svm_cv(directory):
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    clf = SVC(random_state=seed)
    X, y = get_dataset(directory)
    results = cross_validate(clf, X.reshape(X.shape[0], -1), y, scoring=['accuracy', 'precision', 'recall'], cv=kf, return_train_score=True)
    print(directory)
    print(results['train_accuracy'])
    print(results['test_accuracy'])
    print(results['train_precision'])
    print(results['test_precision'])
    print(results['train_recall'])
    print(results['test_recall'])
    print(results['fit_time'])
    print(results['score_time'])

In [18]:
for dataset, _ in datasets:
    svm_cv(dataset)

Dataset/car_bike_raw.npz
[0.9215625 0.9209375 0.9240625 0.925     0.9203125]
[0.805   0.82375 0.8175  0.81875 0.82   ]
[0.91920447 0.91858297 0.91960421 0.92288557 0.91181874]
[0.81282051 0.82294264 0.82070707 0.82442748 0.80331754]
[0.924375 0.92375  0.929375 0.9275   0.930625]
[0.7925 0.825  0.8125 0.81   0.8475]
[2.42605352 2.39635849 2.38328385 2.39845538 2.39612126]
[0.82994509 0.80815697 0.82573986 0.84168339 0.81868529]
Dataset/cifar10_2_500.npz
[0.93125 0.93375 0.9375  0.935   0.9375 ]
[0.785 0.78  0.815 0.81  0.855]
[0.90973872 0.91211401 0.91273585 0.92028986 0.91666667]
[0.77142857 0.74137931 0.82474227 0.76271186 0.8317757 ]
[0.9575 0.96   0.9675 0.9525 0.9625]
[0.81 0.86 0.8  0.9  0.89]
[0.07622099 0.07469702 0.07737947 0.07600284 0.07727265]
[0.03808951 0.03701735 0.03782439 0.03735328 0.03949332]
Dataset/pizza_raw_32.npz
[0.89185751 0.89383344 0.89574062 0.90336936 0.89637635]
[0.65482234 0.68447837 0.69465649 0.69211196 0.63613232]
[0.93258427 0.9454023  0.943101   0.94

In [19]:
def embedding_rf(directory):
    X, y = get_dataset(directory)
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

    feature_extractor = tf.keras.Sequential([
        layers.Conv2D(100, (3, 3), activation='relu', input_shape=(32, 32, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
    ])
    
    train_accs = []
    test_accs = []

    train_prec = []
    test_prec = []

    train_recall = []
    test_recall = []

    train_times = []
    test_times = []

    for i, (train_indices, test_indices) in enumerate(kf.split(X, y)):
        X_train, X_test, y_train, y_test = X[train_indices], X[test_indices], y[train_indices], y[test_indices]
        
        # Start of training time
        start_train_time = time.time()
        train_features = feature_extractor.predict(X_train, verbose=0)
        clf = RandomForestClassifier(random_state=seed)
        clf.fit(train_features, y_train)
        pred_train = clf.predict(train_features)
        end_train_time = time.time()
        train_time = end_train_time - start_train_time
        train_times.append(train_time)
        # End of training, beginning of evaluation/test time
        
        start_test_time = time.time()
        test_features = feature_extractor.predict(X_test, verbose=0)
        pred_test = clf.predict(test_features)
        end_test_time = time.time()
        test_time = end_test_time - start_test_time
        # End of test time
        test_times.append(test_time)

        train_accs.append(accuracy_score(y_train, pred_train))
        test_accs.append(accuracy_score(y_test, pred_test))

        train_prec.append(precision_score(y_train, pred_train))
        test_prec.append(precision_score(y_test, pred_test))

        train_recall.append(recall_score(y_train, pred_train))
        test_recall.append(recall_score(y_test, pred_test))

    print("Train Accuracies: " + str(train_accs))
    print("Test Accuracies: " + str(test_accs))
    print("Train Precisions: " + str(train_prec))
    print("Test Precisions: " + str(test_prec))
    print("Train Recalls: " + str(train_recall))
    print("Test Recalls: " + str(test_recall))
    print("Train Times: " + str(train_times))
    print("Test Times: " + str(test_times))
    
    return train_accs, test_accs, train_prec, test_prec, train_recall, test_recall, train_times, test_times

In [20]:
for dataset, _ in datasets:
    embedding_rf(dataset)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Accuracies: [0.85125, 0.8675, 0.84875, 0.87, 0.86375]
Train Precisions: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Precisions: [0.8746666666666667, 0.8712121212121212, 0.8661417322834646, 0.8814432989690721, 0.875968992248062]
Train Recalls: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Recalls: [0.82, 0.8625, 0.825, 0.855, 0.8475]
Train Times: [20.90194082260132, 20.98516345024109, 21.023071765899658, 20.713107109069824, 20.86342763900757]
Test Times: [0.2371227741241455, 0.2581017017364502, 0.21907496452331543, 0.21593379974365234, 0.22736501693725586]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Accuracies: [0.865, 0.805, 0.825, 0.865, 0.895]
Train Precisions: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Precisions: [0.8613861386138614, 0.7699115044247787, 0.8037383177570093, 0.811965811965812, 0.898989898989899]
Train Recalls: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Recalls: [0.87, 0.87, 0.86, 0.95, 0.89]
Train Times: [3.7263596057891846, 3.6614792346954346, 3.6954641342163086, 3.739593505859375, 3.6081812381744385]
Test Times: [0.0906522274017334, 0.07952094078063965, 0.08064770698547363, 0.07780051231384277, 0.07779741287231445]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Accuracies: [0.7208121827411168, 0.7175572519083969, 0.7251908396946565, 0.7048346055979644, 0.6692111959287532]
Train Precisions: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Precisions: [0.7351351351351352, 0.7093596059113301, 0.7244897959183674, 0.7396449704142012, 0.6892655367231638]
Train Recalls: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Recalls: [0.6903553299492385, 0.7346938775510204, 0.7244897959183674, 0.6345177664974619, 0.6192893401015228]
Train Times: [8.576080799102783, 8.638179540634155, 8.509258270263672, 8.372812986373901, 8.339315176010132]
Test Times: [0.12489795684814453, 0.12600111961364746, 0.1252143383026123, 0.12340044975280762, 0.12376832962036133]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.9905149051490515, 0.9932249322493225, 0.9905149051490515, 0.9905277401894452, 0.9945872801082544]
Test Accuracies: [0.6810810810810811, 0.6702702702702703, 0.5891891891891892, 0.6413043478260869, 0.6630434782608695]
Train Precisions: [0.9948051948051948, 0.9922879177377892, 0.9948051948051948, 0.9922480620155039, 0.9948453608247423]
Test Precisions: [0.6862745098039216, 0.6914893617021277, 0.6082474226804123, 0.6396396396396397, 0.6881720430107527]
Train Recalls: [0.9871134020618557, 0.9948453608247423, 0.9871134020618557, 0.9896907216494846, 0.9948453608247423]
Test Recalls: [0.7216494845360825, 0.6701030927835051, 0.6082474226804123, 0.7319587628865979, 0.6597938144329897]
Train Times: [3.098048448562622, 3.079367160797119, 2.9781153202056885, 3.0857112407684326, 2.9271042346954346]
Test Times: [0.07663893699645996, 0.07822299003601074, 0.07861852645874023, 0.07634878158569336, 0.07541584968566895]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.9984251968503937, 1.0, 0.9984251968503937, 0.9968503937007874, 0.9968553459119497]
Test Accuracies: [0.779874213836478, 0.8364779874213837, 0.8238993710691824, 0.8113207547169812, 0.8481012658227848]
Train Precisions: [0.9980276134122288, 1.0, 0.9980237154150198, 0.9960552268244576, 0.9980237154150198]
Test Precisions: [0.8137931034482758, 0.8571428571428571, 0.8278145695364238, 0.8299319727891157, 0.8445945945945946]
Train Recalls: [1.0, 1.0, 1.0, 1.0, 0.9980237154150198]
Test Recalls: [0.9365079365079365, 0.9523809523809523, 0.984251968503937, 0.9606299212598425, 0.9920634920634921]
Train Times: [3.8070507049560547, 3.5516347885131836, 3.66693115234375, 3.874662399291992, 3.6080830097198486]
Test Times: [0.07114410400390625, 0.0697786808013916, 0.06794857978820801, 0.06711149215698242, 0.06752872467041016]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Accuracies: [0.9377133105802048, 0.9436379163108455, 0.9453458582408198, 0.9496157130657558, 0.9299743808710503]
Train Precisions: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Precisions: [0.9473684210526315, 0.9477866061293984, 0.9561707035755479, 0.9595375722543352, 0.9356659142212189]
Train Recalls: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Recalls: [0.968421052631579, 0.9766081871345029, 0.9695906432748538, 0.9718969555035128, 0.9707259953161592]
Train Times: [41.06705284118652, 43.825302600860596, 42.66103982925415, 42.92843437194824, 42.2479190826416]
Test Times: [0.3173487186431885, 0.314162015914917, 0.3044731616973877, 0.31594419479370117, 0.3095574378967285]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Accuracies: [0.9130434782608695, 0.9782608695652174, 1.0, 0.9777777777777777, 0.9777777777777777]
Train Precisions: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Precisions: [0.8181818181818182, 1.0, 1.0, 1.0, 1.0]
Train Recalls: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Recalls: [1.0, 0.9444444444444444, 1.0, 0.9444444444444444, 0.9444444444444444]
Train Times: [0.3679628372192383, 0.3483891487121582, 0.3499319553375244, 0.3646206855773926, 0.34339094161987305]
Test Times: [0.047281503677368164, 0.04512143135070801, 0.044986724853515625, 0.04628467559814453, 0.04607844352722168]


In [21]:
def embedding_svm(directory):
    X, y = get_dataset(directory)
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

    feature_extractor = tf.keras.Sequential([
        layers.Conv2D(100, (3, 3), activation='relu', input_shape=(32, 32, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
    ])
    
    train_accs = []
    test_accs = []

    train_prec = []
    test_prec = []

    train_recall = []
    test_recall = []

    train_times = []
    test_times = []

    for i, (train_indices, test_indices) in enumerate(kf.split(X, y)):
        X_train, X_test, y_train, y_test = X[train_indices], X[test_indices], y[train_indices], y[test_indices]
        
        # Start of training time
        start_train_time = time.time()
        train_features = feature_extractor.predict(X_train, verbose=0)
        clf = SVC(random_state=seed)
        clf.fit(train_features, y_train)
        pred_train = clf.predict(train_features)
        end_train_time = time.time()
        train_time = end_train_time - start_train_time
        train_times.append(train_time)
        # End of training, beginning of evaluation/test time
        
        start_test_time = time.time()
        test_features = feature_extractor.predict(X_test, verbose=0)
        pred_test = clf.predict(test_features)
        end_test_time = time.time()
        test_time = end_test_time - start_test_time
        # End of test time
        test_times.append(test_time)

        train_accs.append(accuracy_score(y_train, pred_train))
        test_accs.append(accuracy_score(y_test, pred_test))

        train_prec.append(precision_score(y_train, pred_train))
        test_prec.append(precision_score(y_test, pred_test))

        train_recall.append(recall_score(y_train, pred_train))
        test_recall.append(recall_score(y_test, pred_test))

    print("Train Accuracies: " + str(train_accs))
    print("Test Accuracies: " + str(test_accs))
    print("Train Precisions: " + str(train_prec))
    print("Test Precisions: " + str(test_prec))
    print("Train Recalls: " + str(train_recall))
    print("Test Recalls: " + str(test_recall))
    print("Train Times: " + str(train_times))
    print("Test Times: " + str(test_times))
    
    return train_accs, test_accs, train_prec, test_prec, train_recall, test_recall, train_times, test_times

In [22]:
for dataset, _ in datasets:
    embedding_svm(dataset)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.9075, 0.905625, 0.9075, 0.899375, 0.905]
Test Accuracies: [0.8625, 0.8675, 0.87125, 0.87875, 0.8775]
Train Precisions: [0.9152866242038217, 0.9139030612244898, 0.9142312579415502, 0.9096153846153846, 0.910126582278481]
Test Precisions: [0.868020304568528, 0.893048128342246, 0.8721804511278195, 0.8894601542416453, 0.8682926829268293]
Train Recalls: [0.898125, 0.895625, 0.899375, 0.886875, 0.89875]
Test Recalls: [0.855, 0.835, 0.87, 0.865, 0.89]
Train Times: [122.91563510894775, 121.0625672340393, 122.46501278877258, 119.72188091278076, 121.49348402023315]
Test Times: [21.62704038619995, 22.098002672195435, 22.09329581260681, 21.84291958808899, 22.461766242980957]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.84375, 0.8475, 0.845, 0.84, 0.8225]
Test Accuracies: [0.81, 0.75, 0.8, 0.83, 0.825]
Train Precisions: [0.8117913832199547, 0.8188073394495413, 0.8165137614678899, 0.8192488262910798, 0.7986111111111112]
Test Precisions: [0.81, 0.7192982456140351, 0.7884615384615384, 0.775, 0.8155339805825242]
Train Recalls: [0.895, 0.8925, 0.89, 0.8725, 0.8625]
Test Recalls: [0.81, 0.82, 0.82, 0.93, 0.84]
Train Times: [8.651260614395142, 8.950068473815918, 9.217454195022583, 9.456204891204834, 9.558130025863647]
Test Times: [1.6458098888397217, 1.683002233505249, 1.8530967235565186, 1.7876145839691162, 1.7937114238739014]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.787531806615776, 0.782581055308328, 0.7914812460267006, 0.7800381436745073, 0.7851239669421488]
Test Accuracies: [0.7055837563451777, 0.7302798982188295, 0.7251908396946565, 0.7379134860050891, 0.6819338422391857]
Train Precisions: [0.8284883720930233, 0.8306092124814265, 0.8410104011887073, 0.8133903133903134, 0.8255813953488372]
Test Precisions: [0.7579617834394905, 0.7647058823529411, 0.7682926829268293, 0.7764705882352941, 0.7142857142857143]
Train Recalls: [0.7251908396946565, 0.7102922490470139, 0.7191867852604829, 0.7264631043256997, 0.72264631043257]
Test Recalls: [0.6040609137055838, 0.6632653061224489, 0.6428571428571429, 0.6700507614213198, 0.6091370558375635]
Train Times: [39.6759831905365, 40.212639808654785, 41.51070785522461, 39.93765306472778, 39.759843587875366]
Test Times: [7.233294725418091, 7.298090696334839, 6.850047588348389, 7.340104818344116, 7.172684669494629]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.7425474254742548, 0.7411924119241192, 0.7357723577235772, 0.7239512855209743, 0.7483085250338295]
Test Accuracies: [0.6702702702702703, 0.6162162162162163, 0.6324324324324324, 0.6521739130434783, 0.6739130434782609]
Train Precisions: [0.776536312849162, 0.7871720116618076, 0.7765042979942693, 0.7771084337349398, 0.7700534759358288]
Test Precisions: [0.6956521739130435, 0.6585365853658537, 0.6705882352941176, 0.6896551724137931, 0.7078651685393258]
Train Recalls: [0.7164948453608248, 0.6958762886597938, 0.6984536082474226, 0.6649484536082474, 0.7422680412371134]
Test Recalls: [0.6597938144329897, 0.5567010309278351, 0.5876288659793815, 0.6185567010309279, 0.6494845360824743]
Train Times: [9.303575277328491, 9.056434154510498, 8.9516282081604, 8.519039869308472, 8.694988250732422]
Test Times: [1.6941263675689697, 1.534087896347046, 1.5339951515197754, 1.6122868061065674, 1.6277191638946533]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.8110236220472441, 0.8031496062992126, 0.7968503937007874, 0.8031496062992126, 0.8034591194968553]
Test Accuracies: [0.7987421383647799, 0.7924528301886793, 0.7987421383647799, 0.7987421383647799, 0.7974683544303798]
Train Precisions: [0.8083067092651757, 0.8019017432646592, 0.7965299684542587, 0.8015873015873016, 0.8019017432646592]
Test Precisions: [0.7974683544303798, 0.7924528301886793, 0.7987421383647799, 0.7987421383647799, 0.7974683544303798]
Train Recalls: [1.0, 1.0, 1.0, 1.0, 1.0]
Test Recalls: [1.0, 1.0, 1.0, 1.0, 1.0]
Train Times: [4.366586923599243, 4.255768775939941, 4.61046576499939, 4.400219678878784, 4.3446455001831055]
Test Times: [0.8214547634124756, 0.7926383018493652, 0.8672618865966797, 0.8422045707702637, 0.7689864635467529]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.9538855678906917, 0.9502668089647812, 0.9543223052294557, 0.9543223052294557, 0.9547491995731057]
Test Accuracies: [0.9505119453924915, 0.9573014517506405, 0.946199829205807, 0.9453458582408198, 0.9393680614859095]
Train Precisions: [0.9582140812821981, 0.9551300371534724, 0.9582379862700229, 0.9585121602288984, 0.9593239759381266]
Test Precisions: [0.9575200918484501, 0.9610538373424972, 0.956221198156682, 0.9468325791855203, 0.9433748584371461]
Train Recalls: [0.9795201872440024, 0.9777647747220597, 0.9801053247513165, 0.9798186604270255, 0.9795261772448084]
Test Recalls: [0.9754385964912281, 0.9812865497076023, 0.9707602339181286, 0.9800936768149883, 0.9754098360655737]
Train Times: [121.00946807861328, 123.27836322784424, 118.70619440078735, 122.19456219673157, 122.267991065979]
Test Times: [20.94132947921753, 22.592478275299072, 21.705448627471924, 21.815021276474, 20.091691255569458]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracies: [0.988950276243094, 0.988950276243094, 0.978021978021978, 0.989010989010989, 0.989010989010989]
Test Accuracies: [0.9565217391304348, 0.9347826086956522, 0.9777777777777777, 0.9777777777777777, 1.0]
Train Precisions: [0.9861111111111112, 0.9861111111111112, 0.9594594594594594, 0.9861111111111112, 0.972972972972973]
Test Precisions: [0.9, 0.8947368421052632, 0.9473684210526315, 1.0, 1.0]
Train Recalls: [0.9861111111111112, 0.9861111111111112, 0.9861111111111112, 0.9861111111111112, 1.0]
Test Recalls: [1.0, 0.9444444444444444, 1.0, 0.9444444444444444, 1.0]
Train Times: [0.35775184631347656, 0.2951338291168213, 0.3045616149902344, 0.37577152252197266, 0.30847787857055664]
Test Times: [0.11256623268127441, 0.08081603050231934, 0.08230137825012207, 0.09107780456542969, 0.08461189270019531]
