In [6]:
import numpy as np
import os
import json
import math

import helper
from feature_optimization import FeatureOptimizer
import feature_opt_functions as funcs
from indices import *

from scipy.stats import spearmanr, pearsonr

from sklearn.preprocessing import RobustScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import jaccard_score, average_precision_score

from imblearn.over_sampling import RandomOverSampler

In [7]:
dictOfStressedImageNames = {}
for v in helper.getStressedImagesNames('stress_date.xlsx'):
    if v is not None:
        dictOfStressedImageNames[v[0]] = v[1]

dictOfReferenceImageNames = {}
for v in helper.getStressedImagesNames('reference_date.xlsx'):
    if v is not None:
        dictOfReferenceImageNames[v[0]] = v[1]

tilesPerGroup = None
with open(os.path.join("subdivs", "tiles_2img_2.json"), 'r') as fout:
    tilesPerGroup = json.load(fout)

train_tiles = tilesPerGroup[0]
test_tiles = tilesPerGroup[1]

train_data = helper.getH_and_S(dictOfStressedImageNames, train_tiles)
train_data_ref = helper.getH_and_S(dictOfReferenceImageNames, train_tiles)
test_data = helper.getH_and_S(dictOfStressedImageNames, test_tiles)
test_data_ref = helper.getH_and_S(dictOfReferenceImageNames, test_tiles)

(92, 4)
(92, 4)


In [8]:
inform_cache = {}
indep_cache = {}

data = [None]*2
data[0] = helper.leaveFinite(train_data[0])
data[1] = helper.leaveFinite(train_data[1])

In [9]:
def evaluateModel(informativeness_treshold, independency_treshold):
    # BEGIN FEATURE OPTIMIZER BLOCK
    print("-"*33)
    print("START NEW MODEL")
    print("INFO:", informativeness_treshold, "INDEP:", independency_treshold)
    print("Feature Optimization")
    args = { 
        "num_generations":150, 
        "num_parents_mating":3,
        "parent_selection_type":"sss",
        "keep_elitism":1,
        "sol_per_pop":150,
		"mutation_probability":0.25,
        "parallel_processing":8
        }
    
    encoder = IndicesClassEncoderEq([HueSimp, B, NORMP, DIST2], list(range(1, 12)))
    print(f"Avaible features: {encoder.total_length}")

    opt = FeatureOptimizer(encoder, 48,
                           funcs.bhattacharyya_distance, 
                           funcs.spearman_independency, 
                           optimization_method="genetic",
                           optimizer_args=args,
                           informativeness_threshold=informativeness_treshold, 
                           independency_threshold=independency_treshold,
                           set_independency='weighted_harmonic_mean')
    
    opt.informativeness_cache = inform_cache
    opt.independency_cache = indep_cache

    opt.fit(data, data[1], False)
    # END FEATURE OPTIMIZER BLOCK

    # BEGIN SCALING & DATA PREPARATION BLOCK
    print("Data Preparation")
    indices_train_H = helper.leaveFinite(opt.transform_series([train_data[0], train_data_ref[0]])).swapaxes(0, 1)
    indices_train_S = helper.leaveFinite(opt.transform_series([train_data[1], train_data_ref[1]])).swapaxes(0, 1)
    indices_test_H = helper.leaveFinite(opt.transform_series([test_data[0], test_data_ref[0]])).swapaxes(0, 1)
    indices_test_S = helper.leaveFinite(opt.transform_series([test_data[1], test_data_ref[1]])).swapaxes(0, 1)

    scaler = RobustScaler(unit_variance=True)

    indices_train_H = scaler.fit_transform(indices_train_H)
    indices_train_S = scaler.transform(indices_train_S)
    indices_test_H = scaler.transform(indices_test_H)
    indices_test_S = scaler.transform(indices_test_S)

    train_X, train_y = helper.joinData(indices_train_H, indices_train_S)
    test_X, test_y = helper.joinData(indices_test_H, indices_test_S)
    # END SCALING & DATA PREPARATION BLOCK

    # BEGIN MODEL TRAINING BLOCK
    features_count = len(opt.selected_features)
    print("Model Training")
    clf = MLPClassifier(tol=1e-2,
                        learning_rate="adaptive", 
                        activation='relu', 
                        hidden_layer_sizes=(2 * features_count), 
                        early_stopping=True, 
                        max_iter=60)
    
    clf.fit(train_X, train_y)
    # END MODEL TRAINING BLOCK

    # BEGIN MODEL EVALUATION BLOCK
    print("Model Evaluation")
    predict_metrics = [("iou", jaccard_score)]
    predict_proba_metrics = [("pr_auc", average_precision_score)]

    pred_train = clf.predict(train_X)
    pred_proba_train = clf.predict_proba(train_X)[:, 1]

    pred_test = clf.predict(test_X)
    pred_proba_test = clf.predict_proba(test_X)[:, 1]

    metric_results = {}
    for key, metric in predict_metrics:
        metric_results[key + " (train)"] = metric(train_y, pred_train)
        metric_results[key + " (test)"] = metric(test_y, pred_test)

    for key, metric in predict_proba_metrics:
        metric_results[key + " (train)"] = metric(train_y, pred_proba_train)
        metric_results[key + " (test)"] = metric(test_y, pred_proba_test)
    # END MODEL EVALUATION BLOCK

    # BEGIN PRINTING BLOCK
    metric_results["fitness"] = opt.get_fitness_()
    metric_results["feature_count"] = len(opt.selected_features)
    metric_results["features"] = [int(f) for f in opt.selected_features]
    for metric in metric_results.keys():
        print(f"    {metric}: {metric_results[metric]}")

    print("-"*33)
    # END PRINTING BLOCK

    return metric_results

In [11]:
informativeness_values = [0.1]
independency_values = [0.1]

print("Num of evaluations: ", len(informativeness_values) * len(independency_values))

result_matrix = [[None]*len(independency_values) for i in range(len(informativeness_values))]
for i in range(len(informativeness_values)):
    for j in range(len(independency_values)):
        result_matrix[i][j] = evaluateModel(informativeness_values[i], independency_values[j])

# with open(os.path.join("grid_search", f"hue_simp_normp4_16_1_2_eq_weighted_harmonic_mean.json"), 'w') as fout:
#     json.dump(result_matrix, fout)

Num of evaluations:  1
---------------------------------
START NEW MODEL
INFO: 0.1 INDEP: 0.1
Feature Optimization
Avaible features: 5324
Fitness (Gen 150): 4.387208725734177
Data Preparation
Model Training
Model Evaluation
    iou (train): 0.6451073506788844
    iou (test): 0.5519338918856881
    pr_auc (train): 0.8686522227302897
    pr_auc (test): 0.7632305653850626
    fitness: 4.387208725734177
    feature_count: 31
    features: [3684, 603, 1142, 3236, 735, 3969, 3786, 3094, 725, 2729, 344, 2257, 3403, 2353, 1187, 846, 254, 3274, 2670, 763, 3369, 984, 653, 3419, 877, 4018, 2546, 1452, 2096, 3087, 5288]
---------------------------------
