In [1]:
import numpy as np
import os
import json
import math

import helper
from feature_optimization import FeatureOptimizer
import feature_opt_functions as funcs
from indices import *

from scipy.stats import spearmanr, pearsonr

from sklearn.preprocessing import RobustScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import jaccard_score, average_precision_score

from imblearn.over_sampling import RandomOverSampler

In [2]:
dictOfStressedImageNames = {}
for v in helper.getStressedImagesNames('stress_date.xlsx'):
    if v is not None:
        dictOfStressedImageNames[v[0]] = v[1]

dictOfReferenceImageNames = {}
for v in helper.getStressedImagesNames('reference_date.xlsx'):
    if v is not None:
        dictOfReferenceImageNames[v[0]] = v[1]

tilesPerGroup = None
with open(os.path.join("subdivs", "tiles_2img_2.json"), 'r') as fout:
    tilesPerGroup = json.load(fout)

train_tiles = tilesPerGroup[0]
test_tiles = tilesPerGroup[1]

train_data = helper.getH_and_S(dictOfStressedImageNames, train_tiles)
train_data_ref = helper.getH_and_S(dictOfReferenceImageNames, train_tiles)
test_data = helper.getH_and_S(dictOfStressedImageNames, test_tiles)
test_data_ref = helper.getH_and_S(dictOfReferenceImageNames, test_tiles)

(92, 4)
(92, 4)


In [3]:
inform_cache = {}
indep_cache = {}

print("Start baking features")
# bakedIndices = BakedIndiceClassEncoder([NORMP4], list(range(1, 12)))
# bakedIndices.bake(funcs.bhattacharyya_distance, helper.leaveFinite(train_data[0]), helper.leaveFinite(train_data[1]))

# inform_cache = bakedIndices.inform_dict

Start baking features


In [4]:
def evaluateModel(informativeness_treshold, independency_treshold):
    # BEGIN FEATURE OPTIMIZER BLOCK
    print("-"*33)
    print("START NEW MODEL")
    print("INFO:", informativeness_treshold, "INDEP:", independency_treshold)
    print("Feature Optimization")
    args = { 
        "num_generations":150, 
        "num_parents_mating":3,
        "parent_selection_type":"sss",
        "keep_elitism":1,
        "sol_per_pop":150,
		"mutation_probability":0.25,
        "parallel_processing":8
        }
    
    # bakedIndices.total_length = next(x for x, val in enumerate(bakedIndices.instances) if val[1] < informativeness_treshold)
    encoder = IndicesClassEncoder([FRAC3, NORMP3], list(range(1, 12)))
    print(f"Avaible features: {encoder.total_length}")

    opt = FeatureOptimizer(encoder, 20,
                           funcs.bhattacharyya_distance, 
                           funcs.spearman_independency, 
                           optimization_method="genetic",
                           optimizer_args=args,
                           informativeness_threshold=informativeness_treshold, 
                           independency_threshold=independency_treshold)
    
    opt.informativeness_cache = inform_cache
    opt.independency_cache = indep_cache

    data = [None]*2
    data[0] = helper.leaveFinite(train_data[0])
    data[1] = helper.leaveFinite(train_data[1])
    opt.fit(data, data[1], False)
    # END FEATURE OPTIMIZER BLOCK

    # BEGIN SCALING & DATA PREPARATION BLOCK
    print("Data Preparation")
    indices_train_H = helper.leaveFinite(opt.transform_series([train_data[0], train_data_ref[0]])).swapaxes(0, 1)
    indices_train_S = helper.leaveFinite(opt.transform_series([train_data[1], train_data_ref[1]])).swapaxes(0, 1)
    indices_test_H = helper.leaveFinite(opt.transform_series([test_data[0], test_data_ref[0]])).swapaxes(0, 1)
    indices_test_S = helper.leaveFinite(opt.transform_series([test_data[1], test_data_ref[1]])).swapaxes(0, 1)

    scaler = RobustScaler(unit_variance=True)

    indices_train_H = scaler.fit_transform(indices_train_H)
    indices_train_S = scaler.transform(indices_train_S)
    indices_test_H = scaler.transform(indices_test_H)
    indices_test_S = scaler.transform(indices_test_S)

    train_X, train_y = helper.joinData(indices_train_H, indices_train_S)
    test_X, test_y = helper.joinData(indices_test_H, indices_test_S)
    # END SCALING & DATA PREPARATION BLOCK

    # BEGIN MODEL TRAINING BLOCK
    features_count = len(opt.selected_features)
    print("Model Training")
    clf = MLPClassifier(tol=1e-4, 
                        alpha=1e-4, 
                        learning_rate="adaptive", 
                        activation='relu', 
                        hidden_layer_sizes=(5 * features_count, 2 * features_count), 
                        shuffle=True, 
                        early_stopping=True, 
                        max_iter=250)
    
    clf.fit(train_X, train_y)
    # END MODEL TRAINING BLOCK

    # BEGIN MODEL EVALUATION BLOCK
    print("Model Evaluation")
    predict_metrics = [("iou", jaccard_score)]
    predict_proba_metrics = [("pr_auc", average_precision_score)]

    pred = clf.predict(test_X)
    pred_proba = clf.predict_proba(test_X)[:, 1]

    metric_results = {}
    for key, metric in predict_metrics:
        metric_results[key] = metric(test_y, pred)

    for key, metric in predict_proba_metrics:
        metric_results[key] = metric(test_y, pred_proba)
    # END MODEL EVALUATION BLOCK

    # BEGIN PRINTING BLOCK
    metric_results["fitness"] = opt.get_fitness_()
    metric_results["feature_count"] = len(opt.selected_features)
    for metric in metric_results.keys():
        print(f"    {metric}: {metric_results[metric]}")

    print("-"*33)
    # END PRINTING BLOCK

    return metric_results

In [5]:
informativeness_values = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5]
independency_values = [0.5, 0.5, 0.6, 0.8, 0.9, 1, 1.1]#[0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

print("Num of evaluations: ", len(informativeness_values) * len(independency_values))

result_matrix = [[None]*len(independency_values) for i in range(len(informativeness_values))]
for i in range(len(informativeness_values)):
    for j in range(len(independency_values)):
        result_matrix[i][j] = evaluateModel(informativeness_values[i], independency_values[j])

with open(os.path.join("grid_search", f"normp_results_v2_mlp_ext.json"), 'w') as fout:
    json.dump(result_matrix, fout)

Num of evaluations:  42
---------------------------------
START NEW MODEL
INFO: 0.01 INDEP: 0.5
Feature Optimization
Avaible features: 2662
Fitness (Gen 150): 1.3436684548752855
Data Preparation
Model Training
Model Evaluation
    iou: 0.49099292301093717
    pr_auc: 0.6831017798452421
    fitness: 1.3436684548752855
    feature_count: 13
---------------------------------
---------------------------------
START NEW MODEL
INFO: 0.01 INDEP: 0.5
Feature Optimization
Avaible features: 2662
Fitness (Gen 150): 1.3393615207951484
Data Preparation
Model Training
Model Evaluation
    iou: 0.5006074663132317
    pr_auc: 0.7020265451160919
    fitness: 1.3393615207951484
    feature_count: 13
---------------------------------
---------------------------------
START NEW MODEL
INFO: 0.01 INDEP: 0.6
Feature Optimization
Avaible features: 2662
Fitness (Gen 150): 1.2938669904342566
Data Preparation
Model Training
Model Evaluation
    iou: 0.5252785859362135
    pr_auc: 0.7299424487570376
    fitness: 



Fitness (Gen 150): 0.6342796495890185
Data Preparation
Model Training
Model Evaluation
    iou: 0.4979174765369023
    pr_auc: 0.6277751813632244
    fitness: 0.6342796495890185
    feature_count: 1
---------------------------------
---------------------------------
START NEW MODEL
INFO: 0.01 INDEP: 1.1
Feature Optimization
Avaible features: 2662
Fitness (Gen 29): 0.6342796495890185

list index out of range
Traceback (most recent call last):
  File "c:\Users\yevhe\mambaforge\envs\swiftt\Lib\site-packages\pygad\pygad.py", line 1765, in cal_pop_fitness
    for index, fitness in zip(solutions_to_submit_indices, executor.map(self.fitness_func, [self]*len(solutions_to_submit_indices), solutions_to_submit, solutions_to_submit_indices)):
  File "c:\Users\yevhe\mambaforge\envs\swiftt\Lib\concurrent\futures\_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\yevhe\mambaforge\envs\swiftt\Lib\concurrent\futures\_base.py", line 317, in _result_or_cancel
    return fut.result(timeout)
           ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\yevhe\mambaforge\envs\swiftt\Lib\concurrent\futures\_base.py", line 449, in result
    return self.__get_result()
           ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\yevhe\mambaforge\envs\swiftt\Lib\concurrent\futures\_base.py", line 401, in __get_result
    raise self._exce

AttributeError: 'tuple' object has no attribute 'tb_frame'