In [1]:
import numpy as np
import os
import json
import math

import helper
from feature_optimization import FeatureOptimizer
import feature_opt_functions as funcs
from indices import *

from scipy.stats import spearmanr, pearsonr

from sklearn.preprocessing import RobustScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import jaccard_score, average_precision_score

from imblearn.over_sampling import RandomOverSampler

In [2]:
dictOfStressedImageNames = {}
for v in helper.getStressedImagesNames('stress_date.xlsx'):
    if v is not None:
        dictOfStressedImageNames[v[0]] = v[1]

dictOfReferenceImageNames = {}
for v in helper.getStressedImagesNames('reference_date.xlsx'):
    if v is not None:
        dictOfReferenceImageNames[v[0]] = v[1]

tilesPerGroup = None
with open(os.path.join("subdivs", "tiles_2img_2.json"), 'r') as fout:
    tilesPerGroup = json.load(fout)

train_tiles = tilesPerGroup[0]
test_tiles = tilesPerGroup[1]

train_data = helper.getH_and_S(dictOfStressedImageNames, train_tiles)
train_data_ref = helper.getH_and_S(dictOfReferenceImageNames, train_tiles)
test_data = helper.getH_and_S(dictOfStressedImageNames, test_tiles)
test_data_ref = helper.getH_and_S(dictOfReferenceImageNames, test_tiles)

(92, 4)
(92, 4)


In [3]:
data = [None]*2
data[0] = helper.leaveFinite(train_data[0])
data[1] = helper.leaveFinite(train_data[1])

In [4]:
def evaluateModel(name, config):
    encoder = IndicesClassEncoderEq(config["classes"], list(range(1, 12)))
    inform_cache = {}
    indep_cache = {}

    args = { 
        "num_generations":150, 
        "num_parents_mating":3,
        "parent_selection_type":"sss",
        "keep_elitism":1,
        "sol_per_pop":150,
		"mutation_probability":0.25,
        "parallel_processing":8
    }

    eval_results = {}
    counter = 0
    for informativeness_threshold in config["thresholds"]["informativeness"]:
        for independency_threshold in config["thresholds"]["independency"]:
            opt = FeatureOptimizer(encoder, config["max_feature_count"],
                    funcs.bhattacharyya_distance, 
                    funcs.spearman_independency, 
                    optimization_method="genetic",
                    optimizer_args=args,
                    informativeness_threshold=informativeness_threshold, 
                    independency_threshold=independency_threshold,
                    set_independency=config["smoother"])
            
            opt.informativeness_cache = inform_cache
            opt.independency_cache = indep_cache


            if (name == "BANDS"):
                opt.fit(data, data[1], False, False)
                opt.selected_features = list(range(encoder.total_length))
            else:
                opt.fit(data, data[1], False)

            # BEGIN SCALING & DATA PREPARATION BLOCK
            indices_train_H = helper.leaveFinite(opt.transform_series([train_data[0], train_data_ref[0]], config["insert"])).swapaxes(0, 1)
            indices_train_S = helper.leaveFinite(opt.transform_series([train_data[1], train_data_ref[1]], config["insert"])).swapaxes(0, 1)
            indices_test_H = helper.leaveFinite(opt.transform_series([test_data[0], test_data_ref[0]], config["insert"])).swapaxes(0, 1)
            indices_test_S = helper.leaveFinite(opt.transform_series([test_data[1], test_data_ref[1]], config["insert"])).swapaxes(0, 1)

            scaler = RobustScaler(unit_variance=True)

            indices_train_H = scaler.fit_transform(indices_train_H)
            indices_train_S = scaler.transform(indices_train_S)
            indices_test_H = scaler.transform(indices_test_H)
            indices_test_S = scaler.transform(indices_test_S)

            train_X, train_y = helper.joinData(indices_train_H, indices_train_S)
            test_X, test_y = helper.joinData(indices_test_H, indices_test_S)
            # END SCALING & DATA PREPARATION BLOCK

            for i in range(config["models"]):
                local_results = {}
                eval_results[f"eval_{counter}"] = local_results
                counter += 1

                seed = int(np.random.randint(2**31))
                local_results["seed"] = seed
                local_results["fitness"] = opt.get_fitness_()
                local_results["features"] = [int(f) for f in opt.selected_features]

                # BEGIN MODEL TRAINING BLOCK
                features_count = len(opt.selected_features)
                clf = MLPClassifier(hidden_layer_sizes=(16), max_iter=60, early_stopping=True, random_state=np.random.RandomState(seed))
                # clf = MLPClassifier(tol=1e-2, 
                #                     alpha=1e-4, 
                #                     learning_rate="adaptive", 
                #                     activation='relu', 
                #                     hidden_layer_sizes=(2 * (features_count + (0 if not config["insert"] else 11))), 
                #                     shuffle=True, 
                #                     early_stopping=False, 
                #                     max_iter=60,
                #                     random_state=np.random.RandomState(seed))
                
                clf.fit(train_X, train_y)
                # END MODEL TRAINING BLOCK

                # BEGIN MODEL EVALUATION BLOCK
                predict_metrics = [("iou", jaccard_score)]
                predict_proba_metrics = [("pr_auc", average_precision_score)]

                pred_train = clf.predict(train_X)
                pred_proba_train = clf.predict_proba(train_X)[:, 1]

                pred_test = clf.predict(test_X)
                pred_proba_test = clf.predict_proba(test_X)[:, 1]

                for key, metric in predict_metrics:
                    local_results[key + " (train)"] = metric(train_y, pred_train)
                    local_results[key + " (test)"] = metric(test_y, pred_test)

                for key, metric in predict_proba_metrics:
                    local_results[key + " (train)"] = metric(train_y, pred_proba_train)
                    local_results[key + " (test)"] = metric(test_y, pred_proba_test)
                # END MODEL EVALUATION BLOCK

                print(name, "IFT:", informativeness_threshold, "IDT:", independency_threshold, "M:", i)
                for metric in local_results.keys():
                    print(f"    {metric}: {local_results[metric]}")

    return eval_results

In [5]:
# configurations = {}
# configurations["BANDS"] = { "classes": [B], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORMP"] = { "classes": [NORMP], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORMP4"] = { "classes": [NORMP4], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["DIST2"] = { "classes": [DIST2], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORPP4"] = { "classes": [NORPP4], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["CVIbased"] = { "classes": [CVIbased], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NMDIbased"] = { "classes": [NMDIbased], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["MCARIbased"] = { "classes": [MCARIbased], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["HUE"] = { "classes": [Hue], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["HUESIMP"] = { "classes": [HueSimp], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["FRAC3"] = { "classes": [FRAC3], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["FRAC4"] = { "classes": [FRAC4], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORMP3"] = { "classes": [NORMP3], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }

# results = {}
# for key in configurations.keys():
#     results[key] = evaluateModel(key, configurations[key])

# with open(os.path.join("models_results_default", f"indices_16.json"), 'w') as fout:
#     json.dump(results, fout)

In [6]:
# configurations = {}
# configurations["NORMP"] = { "classes": [NORMP], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORMP4"] = { "classes": [NORMP4], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["DIST2"] = { "classes": [DIST2], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORPP4"] = { "classes": [NORPP4], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["CVIbased"] = { "classes": [CVIbased], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NMDIbased"] = { "classes": [NMDIbased], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["MCARIbased"] = { "classes": [MCARIbased], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["HUE"] = { "classes": [Hue], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["HUESIMP"] = { "classes": [HueSimp], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["FRAC3"] = { "classes": [FRAC3], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["FRAC4"] = { "classes": [FRAC4], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORMP3"] = { "classes": [NORMP3], "insert": True, "max_feature_count": 6, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }

# results = {}
# for key in configurations.keys():
#     results[key] = evaluateModel(key, configurations[key])

# with open(os.path.join("models_results_default", f"bands+indices_6_unlim.json"), 'w') as fout:
#     json.dump(results, fout)

In [7]:
# configurations = {}
# configurations["NORMP"] = { "classes": [NORMP], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORMP4"] = { "classes": [NORMP4], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["DIST2"] = { "classes": [DIST2], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORPP4"] = { "classes": [NORPP4], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["CVIbased"] = { "classes": [CVIbased], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NMDIbased"] = { "classes": [NMDIbased], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["MCARIbased"] = { "classes": [MCARIbased], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["HUE"] = { "classes": [Hue], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["HUESIMP"] = { "classes": [HueSimp], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["FRAC3"] = { "classes": [FRAC3], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["FRAC4"] = { "classes": [FRAC4], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
# configurations["NORMP3"] = { "classes": [NORMP3], "insert": True, "max_feature_count": 11, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }

# results = {}
# for key in configurations.keys():
#     results[key] = evaluateModel(key, configurations[key])
# #
# with open(os.path.join("models_results_default", f"bands+indices_11_unlim.json"), 'w') as fout:
#     json.dump(results, fout)

In [8]:
configurations = {}
configurations["FEST_geom"] = { "classes": [Hue, FRAC3, NORMP3], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "geometric_mean", "models": 2 }
configurations["FEST_harm"] = { "classes": [Hue, FRAC3, NORMP3], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "harmonic_mean", "models": 2 }
configurations["FEST_min"] = { "classes": [Hue, FRAC3, NORMP3], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "min", "models": 2 }
configurations["FEST_mult"] = { "classes": [Hue, FRAC3, NORMP3], "insert": False, "max_feature_count": 16, "thresholds": {"informativeness": [0.05, 0.05], "independency": [0.05, 0.05]}, "smoother": "default", "models": 2 }

results = {}
for key in configurations.keys():
    results[key] = evaluateModel(key, configurations[key])

with open(os.path.join("models_results_default", f"fest_16.json"), 'w') as fout:
    json.dump(results, fout)

Fitness (Gen 150): 2.9670808651604994
FEST_geom IFT: 0.05 IDT: 0.05 M: 0
    seed: 1716638751
    fitness: 2.9670808651604994
    features: [603, 3885, 3232, 1639, 2516, 241, 344, 300, 254, 3519, 1872, 2852, 3490, 3195, 131, 1757]
    iou (train): 0.609903695991671
    iou (test): 0.5410380544922315
    pr_auc (train): 0.8373527522092848
    pr_auc (test): 0.750130399941264
FEST_geom IFT: 0.05 IDT: 0.05 M: 1
    seed: 1207273822
    fitness: 2.9670808651604994
    features: [603, 3885, 3232, 1639, 2516, 241, 344, 300, 254, 3519, 1872, 2852, 3490, 3195, 131, 1757]
    iou (train): 0.6189537329127235
    iou (test): 0.544765725110034
    pr_auc (train): 0.8403438769628016
    pr_auc (test): 0.7489562224099897
Fitness (Gen 150): 3.0973925737553873
FEST_geom IFT: 0.05 IDT: 0.05 M: 0
    seed: 76991657
    fitness: 3.0973925737553873
    features: [587, 1662, 2470, 3011, 1639, 32, 949, 1170, 2489, 596, 69, 829, 2560, 3525, 2963, 2750]
    iou (train): 0.6239830676632052
    iou (test): 0.53