In [None]:
import os

# Set environment variables to limit threading
os.environ["OMP_NUM_THREADS"] = "1"    # Limits OpenMP to one thread
os.environ["OPENBLAS_NUM_THREADS"] = "1"  # Limits OpenBLAS to one thread
os.environ["MKL_NUM_THREADS"] = "1"    # Limits MKL (Intel Math Kernel Library) to one thread
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"  # Limits Apple's Accelerate library to one thread
os.environ["NUMEXPR_NUM_THREADS"] = "1"  # Limits NumExpr to one thread

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from scikeras.wrappers import KerasClassifier
import time
import matplotlib.pyplot as plt

import pickle
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
# setting number of threads for tensorflow
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)

In [None]:
seed = 1234
np.random.seed(seed)
tf.keras.utils.set_random_seed(seed)
tf.random.set_seed(seed)

In [None]:
def rgb_to_gray(rgb_images):
    gray_images = np.dot(rgb_images[...,: 3], [0.299, 0.587, 0.114])
    return gray_images

In [None]:
def get_dataset(directory):
    raw_npz = np.load(directory)
    X = raw_npz['images']
    y = raw_npz['labels']
    gray_images = rgb_to_gray(X)
    gray_images /= 255.0
    return gray_images, y

In [None]:
rf_hyp = {
    'n_estimators': 10,
    'criterion': 'log_loss',
    'max_depth': 5,
    'random_state': seed,
    'n_jobs': 1
}

svm_hyp = {
    'random_state': seed,
}

dt_hyp = {
    'max_depth': 5,
    'random_state': seed,
    'criterion': 'log_loss'
}

lr_hyp = {
    'random_state': seed,
    'n_jobs': 1,
}

rf_stack_hyp = {
    'n_jobs': 1,
    'random_state': seed,
    'n_estimators': 30,
}

In [None]:
datasets = [
	('Dataset/car_bike_raw.npz', 'Car Bike Dataset'),
	('Dataset/cifar10_2_500.npz', 'CIFAR10 Dataset'),
	('Dataset/pizza_raw_32.npz', 'Pizza Dataset'),
	('Dataset/corals.npz', 'Corals Dataset'),
	('Dataset/eggs.npz', 'Eggs Dataset'),
	('Dataset/xray.npz', 'Xray Dataset'),
	('Dataset/covid.npz', 'Covid19 Dataset'),
]

In [None]:
class RK:
    def __init__(self, model, weak_hyp, stack_hyp, weak_learner, stack_learner):
        self.model = Model(inputs=model.input, outputs=x)
        self.weak_hyp = weak_hyp
        self.weak_learner = weak_learner
        self.stack_learner = stack_learner(**stack_hyp)
        print(f'stack learner hyp parameter: {self.stack_learner.get_params()}')

    def fit(self, X_train, y_train):
        num_images = X_train.shape[0]
        feature_maps = self.model.predict(X_train, verbose=0)

        num_filters = feature_maps.shape[-1]
        predictions = []
        self.trained_weak_learner = []
        for i in range(num_filters):
            features = feature_maps[:, :, :, i].reshape(num_images, -1)
            weak_learner = self.weak_learner(**self.weak_hyp)
            if i == 0:
                print(f'weak learner hyp parameter: {weak_learner.get_params()}')
            weak_learner.fit(features, y_train)
            self.trained_weak_learner.append(weak_learner)
            predictions.append(weak_learner.predict(features))
        stacked_predictions = np.stack(predictions, axis=1)
        self.stack_learner.fit(stacked_predictions, y_train)
        return self.stack_learner.predict(stacked_predictions)
    
    def predict(self, X):
        num_images = X.shape[0]
        feature_maps = self.model.predict(X, verbose=0)

        num_filters = feature_maps.shape[-1]
        predictions = []
        for i in range(num_filters):
            features = feature_maps[:, :, :, i].reshape(num_images, -1)
            predictions.append(self.trained_weak_learner[i].predict(features))
        stacked_predictions = np.stack(predictions, axis=1)
        return self.stack_learner.predict(stacked_predictions)

In [None]:
input_layer = tf.keras.Input(shape=(32, 32, 1))
conv_layer = layers.Conv2D(100, (3, 3), activation='relu')(input_layer)
x = layers.MaxPooling2D((2, 2))(conv_layer)
model = Model(inputs=input_layer, outputs=x)
model.trainable = False

In [None]:
def cv(directory, weak_hyp, stack_hyp, weak_learner, stack_learner):
    X, y = get_dataset(directory)
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    
    results = {
        'model_name': [],
        'directory': [],
        'train_acc': [],
        'test_acc': [],
        'train_prec': [],
        'test_prec': [],
        'train_recall': [],
        'test_recall': [],
        'train_time': [],
        'test_time': []
    }

    for i, (train_indices, test_indices) in enumerate(kf.split(X, y)):
        X_train, X_test, y_train, y_test = X[train_indices], X[test_indices], y[train_indices], y[test_indices]
        
        # Start of training time
        start_train_time = time.time()
        clf = RK(model, weak_hyp, stack_hyp, weak_learner, stack_learner)
        pred_train = clf.fit(X_train, y_train)
        end_train_time = time.time()
        train_time = end_train_time - start_train_time
        results['train_time'].append(train_time)
        # End of training, beginning of evaluation/test time
        
        start_test_time = time.time()
        pred_test = clf.predict(X_test)
        end_test_time = time.time()
        test_time = end_test_time - start_test_time
        # End of test time
        results['test_time'].append(test_time)

        results['train_acc'].append(accuracy_score(y_train, pred_train))
        results['test_acc'].append(accuracy_score(y_test, pred_test))

        results['train_prec'].append(precision_score(y_train, pred_train))
        results['test_prec'].append(precision_score(y_test, pred_test))

        results['train_recall'].append(recall_score(y_train, pred_train))
        results['test_recall'].append(recall_score(y_test, pred_test))
        
        # Add directory for each row
        results['directory'].append(directory)
        
        results['model_name'].append(f'{weak_learner.__name__}_{stack_learner.__name__}')

    df_results = pd.DataFrame(results)
    return df_results

In [None]:
def model_save(weak_hyp, stack_hyp, weak_learner, stack_learner):
    X1, y1 = get_dataset('Dataset/covid.npz')
    X2, y2 = get_dataset('Dataset/car_bike_raw.npz')
    fs1 = RK(model, weak_hyp, stack_hyp, weak_learner, stack_learner)
    fs1.fit(X1, y1)
    
    fs2 = RK(model, weak_hyp, stack_hyp, weak_learner, stack_learner)
    fs2.fit(X2, y2)
    
    pickle.dump(fs1, open(f'VariantsSingleThread/filter_stack_{str(weak_learner)}_{str(stack_learner)}_S', 'wb'))
    pickle.dump(fs2, open(f'VariantsSingleThread/filter_stack_{str(weak_learner)}_{str(stack_learner)}_L', 'wb'))

In [None]:
all_dfs = []
for dataset, _ in datasets:
    print(dataset)
    df = cv(dataset, rf_hyp, svm_hyp, RandomForestClassifier, SVC)
    all_dfs.append(df)

merged_df = pd.concat(all_dfs, ignore_index=True)
merged_df.to_csv('VariantsCVSingleThread/merged_results_rf_svm.csv', index=False)
print("All results have been merged and saved to merged_results.csv")
model_save(rf_hyp, svm_hyp, RandomForestClassifier, SVC)
print('Models pickled')

In [None]:
all_dfs = []
for dataset, _ in datasets:
    print(dataset)
    df = cv(dataset, dt_hyp, rf_stack_hyp, DecisionTreeClassifier, RandomForestClassifier)
    all_dfs.append(df)

merged_df = pd.concat(all_dfs, ignore_index=True)
merged_df.to_csv('VariantsCVSingleThread/merged_results_dt_rf.csv', index=False)
print("All results have been merged and saved to merged_results.csv")
model_save(dt_hyp, rf_stack_hyp, DecisionTreeClassifier, RandomForestClassifier)
print('Models pickled')

In [None]:
all_dfs = []
for dataset, _ in datasets:
    print(dataset)
    df = cv(dataset, dt_hyp, svm_hyp, DecisionTreeClassifier, SVC)
    all_dfs.append(df)

merged_df = pd.concat(all_dfs, ignore_index=True)
merged_df.to_csv('VariantsCVSingleThread/merged_results_dt_svm.csv', index=False)
print("All results have been merged and saved to merged_results.csv")
model_save(dt_hyp, svm_hyp, DecisionTreeClassifier, SVC)
print('Models pickled')

In [None]:
all_dfs = []
for dataset, _ in datasets:
    print(dataset)
    df = cv(dataset, rf_hyp, rf_stack_hyp, RandomForestClassifier, RandomForestClassifier)
    all_dfs.append(df)

merged_df = pd.concat(all_dfs, ignore_index=True)
merged_df.to_csv('VariantsCVSingleThread/merged_results_rf_rf.csv', index=False)
print("All results have been merged and saved to merged_results.csv")
model_save(rf_hyp, rf_stack_hyp, RandomForestClassifier, RandomForestClassifier)
print('Models pickled')

In [None]:
all_dfs = []
for dataset, _ in datasets:
    print(dataset)
    df = cv(dataset, rf_hyp, lr_hyp, RandomForestClassifier, LogisticRegression)
    all_dfs.append(df)

merged_df = pd.concat(all_dfs, ignore_index=True)
merged_df.to_csv('VariantsCVSingleThread/merged_results_rf_lr.csv', index=False)
print("All results have been merged and saved to merged_results.csv")
model_save(rf_hyp, lr_hyp, RandomForestClassifier, LogisticRegression)
print('Models pickled')

In [None]:
all_dfs = []
for dataset, _ in datasets:
    print(dataset)
    df = cv(dataset, dt_hyp, lr_hyp, DecisionTreeClassifier, LogisticRegression)
    all_dfs.append(df)

merged_df = pd.concat(all_dfs, ignore_index=True)
merged_df.to_csv('VariantsCVSingleThread/merged_results_dt_lr.csv', index=False)
print("All results have been merged and saved to merged_results.csv")
model_save(dt_hyp, lr_hyp, DecisionTreeClassifier, LogisticRegression)
print('Models pickled')

In [None]:
import glob

In [None]:
# Define the path for all CSV files
path = "VariantsCVSingleThread/*.csv"
all_files = glob.glob(path)

df_list = []

# Loop through all files and read them into a list of DataFrames
for file in all_files:
    df = pd.read_csv(file)
    df_list.append(df)

# Concatenate all DataFrames into a single DataFrame
merged_df = pd.concat(df_list, ignore_index=True)

# Save the merged DataFrame to a new CSV file
merged_df.to_csv("VariantsCVSingleThread/merged_df.csv", index=False)

print("Files merged successfully!")
