In [1]:
import numpy as np
import os
import json

import helper
from feature_optimization import FeatureOptimizer
import feature_opt_functions as funcs
from indices import *

from scipy.stats import spearmanr, pearsonr

from sklearn.preprocessing import RobustScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import jaccard_score, average_precision_score

from imblearn.over_sampling import RandomOverSampler

In [2]:
dictOfStressedImageNames = {}
for v in helper.getStressedImagesNames('stress_date.xlsx'):
    if v is not None:
        dictOfStressedImageNames[v[0]] = v[1]

dictOfReferenceImageNames = {}
for v in helper.getStressedImagesNames('reference_date.xlsx'):
    if v is not None:
        dictOfReferenceImageNames[v[0]] = v[1]

tilesPerGroup = None
with open(os.path.join("subdivs", "tiles_2img_2.json"), 'r') as fout:
    tilesPerGroup = json.load(fout)

train_tiles = tilesPerGroup[0]
test_tiles = tilesPerGroup[1]

train_data = helper.getH_and_S(dictOfStressedImageNames, train_tiles)
train_data_ref = helper.getH_and_S(dictOfReferenceImageNames, train_tiles)
test_data = helper.getH_and_S(dictOfStressedImageNames, test_tiles)
test_data_ref = helper.getH_and_S(dictOfReferenceImageNames, test_tiles)

(92, 4)
(92, 4)


In [3]:
def evaluateModel(informativeness_treshold, independency_treshold):
    # BEGIN FEATURE OPTIMIZER BLOCK
    print("-"*33)
    print("START NEW MODEL")
    print("INFO:", informativeness_treshold, "INDEP:", independency_treshold)
    print("Feature Optimization")
    args = { 
        "num_generations":150, 
        "num_parents_mating":2,
        "parent_selection_type":"sss",
        "crossover_type":"uniform",
        "keep_parents":1,
        "keep_elitism":1,
        "sol_per_pop":150,
		"mutation_probability":0.3,
        "parallel_processing":8
        }
    
    opt = FeatureOptimizer(IndicesClassEncoder([NORMP4], 12),
                           12, 
                            funcs.bhattacharyya_distance, 
                            funcs.spearman_independency, 
                            optimization_method="genetic", 
                            optimizer_args=args,
                            informativeness_threshold=informativeness_treshold, 
                            independency_threshold=independency_treshold)
    
    data = [None]*2
    data[0] = helper.leaveFinite(train_data[0])
    data[1] = helper.leaveFinite(train_data[1])
    opt.fit(data, data[1])
    # END FEATURE OPTIMIZER BLOCK

    # BEGIN SCALING & DATA PREPARATION BLOCK
    print("Data Preparation")
    indices_train_H = helper.leaveFinite(opt.transform_series([train_data[0], train_data_ref[0]])).swapaxes(0, 1)
    indices_train_S = helper.leaveFinite(opt.transform_series([train_data[1], train_data_ref[1]])).swapaxes(0, 1)
    indices_test_H = helper.leaveFinite(opt.transform_series([test_data[0], test_data_ref[0]])).swapaxes(0, 1)
    indices_test_S = helper.leaveFinite(opt.transform_series([test_data[1], test_data_ref[1]])).swapaxes(0, 1)

    scaler = RobustScaler(unit_variance=True)

    indices_train_H = scaler.fit_transform(indices_train_H)
    indices_train_S = scaler.transform(indices_train_S)
    indices_test_H = scaler.transform(indices_test_H)
    indices_test_S = scaler.transform(indices_test_S)

    # oversample to make training set balanced
    # ros = RandomOverSampler(random_state=15151515)
    train_X, train_y = helper.joinData(indices_train_H, indices_train_S)
    # train_X, train_y = ros.fit_resample(train_X, train_y)

    test_X, test_y = helper.joinData(indices_test_H, indices_test_S)
    # END SCALING & DATA PREPARATION BLOCK

    # BEGIN MODEL TRAINING BLOCK
    print("Model Training")
    clf = MLPClassifier(tol=1e-4, 
                        alpha=1e-4, 
                        learning_rate="adaptive", 
                        activation='relu', 
                        hidden_layer_sizes=(12, 4), 
                        shuffle=True, 
                        early_stopping=True, 
                        max_iter=80)
    
    clf.fit(train_X, train_y)
    # END MODEL TRAINING BLOCK

    # BEGIN MODEL EVALUATION BLOCK
    print("Model Evaluation")
    predict_metrics = [("iou", jaccard_score)]
    predict_proba_metrics = [("pr_auc", average_precision_score)]

    pred = clf.predict(test_X)
    pred_proba = clf.predict_proba(test_X)[:, 1]

    metric_results = {}
    for key, metric in predict_metrics:
        metric_results[key] = metric(test_y, pred)

    for key, metric in predict_proba_metrics:
        metric_results[key] = metric(test_y, pred_proba)
    # END MODEL EVALUATION BLOCK

    # BEGIN PRINTING BLOCK
    metric_results["fitness"] = opt.get_fitness_()
    metric_results["feature_count"] = len(opt.selected_features)
    for metric in metric_results.keys():
        print(f"    {metric}: {metric_results[metric]}")

    print("-"*33)
    # END PRINTING BLOCK

    return metric_results


In [4]:
informativeness_values = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5]
independency_values = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

print("Num of evaluations: ", len(informativeness_values) * len(independency_values))

result_matrix = [[None]*len(independency_values) for i in range(len(informativeness_values))]
for i in range(len(informativeness_values)):
    for j in range(len(independency_values)):
        result_matrix[i][j] = evaluateModel(informativeness_values[i], independency_values[j])

with open(os.path.join("grid_search", f"normp4_results.json"), 'w') as fout:
    json.dump(result_matrix, fout)

Num of evaluations:  54
---------------------------------
START NEW MODEL
INFO: 0.01 INDEP: 0.01
Feature Optimization
Count: 20736 12 4
Fitness (Gen 150): 0.9569122435307272
Data Preparation
Model Training




Model Evaluation
    iou: 0.5329464181029429
    pr_auc: 0.6897380894207212
    fitness: 0.9569122435307272
    feature_count: 11
---------------------------------
---------------------------------
START NEW MODEL
INFO: 0.01 INDEP: 0.1
Feature Optimization
Count: 20736 12 4
