#### settings

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/test2

/content/drive/MyDrive/test2


In [None]:
pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product

from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.feature_selection import mutual_info_regression

from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras import backend as K
from kerastuner import HyperModel, RandomSearch

  from kerastuner import HyperModel, RandomSearch


## env standardscaler / only one encoding dimension (method 2)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:12].values

scaler = StandardScaler()
X = scaler.fit_transform(X_ori)

#data split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Encoder
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Decoder
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer- with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # Even numbers to ensure symmetry
neurons_options = [2, 3, 4, 5, 6, 7]
encoding_dims_options = [1]
results = []

# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations (top 2)
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 2, Encoding dimension: 1, Neurons: 6, Test loss: 0.8446730375289917


Hidden layers: 6, Encoding dimension: 1, Neurons: 6, Test loss: 0.8931149840354919


In [None]:
from kerastuner import Hyperband
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Choice('encoding_dim', [1]) # 1D encoding dimension
    neurons = hp.Int('neurons', min_value=2, max_value=7, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)
    bottleneck = Dense(encoding_dim, activation=activation, name='bottleneck')(x)
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

In [None]:
from kerastuner.tuners import Hyperband

# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=1, hidden_layers_after=1),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning_1__only1__ss__env', project_name='model_config_1'
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=3, hidden_layers_after=3),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning_2__only1__ss__env', project_name='model_config_2'
)

In [None]:
#Run the tuning for configuration 1
tuner_config_1.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

#Best model for configuration 1
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 11s]
val_loss: 0.9547415971755981

Best val_loss So Far: 0.8084353804588318
Total elapsed time: 00h 08m 19s


In [None]:
#Run the tuning for configuration 2
tuner_config_2.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

#Best model for configuration 2
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 14s]
val_loss: 1.2414270639419556

Best val_loss So Far: 0.5241653323173523
Total elapsed time: 00h 11m 52s


#### check

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 1, 'neurons': 2, 'learning_rate': 0.01, 'batch_size': 16, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0076'}

Test Loss: 0.8084353804588318
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step

Latent Feature Correlations:
1.0


In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 1, 'neurons': 5, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'relu', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0047'}

Test Loss: 0.5241653323173523
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step

Latent Feature Correlations:
1.0


In [None]:
# Display the structure of the best model for configuration 1
print("Best Model Structure for Configuration 1:")
best_model_config_1.summary()

# Display the structure of the best model for configuration 2
print("\nBest Model Structure for Configuration 2:")
best_model_config_2.summary()

Best Model Structure for Configuration 1:



Best Model Structure for Configuration 2:


#### past

In [None]:
# Print details of each layer for the best model in configuration 1
print("Best Model for Configuration 1:")
for layer in best_model_config_1.layers:
    if hasattr(layer, 'output_shape'):
        output_shape = layer.output_shape
    else:
        output_shape = layer.get_output_shape_at(0) if hasattr(layer, 'get_output_shape_at') else 'N/A'
    print(f"Layer: {layer.name}, Type: {layer.__class__.__name__}, Output Shape: {output_shape}, Parameters: {layer.count_params()}")

# Print details of each layer for the best model in configuration 2
print("\nBest Model for Configuration 2:")
for layer in best_model_config_2.layers:
    if hasattr(layer, 'output_shape'):
        output_shape = layer.output_shape
    else:
        output_shape = layer.get_output_shape_at(0) if hasattr(layer, 'get_output_shape_at') else 'N/A'
    print(f"Layer: {layer.name}, Type: {layer.__class__.__name__}, Output Shape: {output_shape}, Parameters: {layer.count_params()}")

Best Model for Configuration 1:
Layer: input_layer, Type: InputLayer, Output Shape: N/A, Parameters: 0
Layer: dense_encoder_1, Type: Dense, Output Shape: N/A, Parameters: 20
Layer: bottleneck, Type: Dense, Output Shape: N/A, Parameters: 3
Layer: dense_decoder_1, Type: Dense, Output Shape: N/A, Parameters: 4
Layer: output_layer, Type: Dense, Output Shape: N/A, Parameters: 27

Best Model for Configuration 2:
Layer: input_layer, Type: InputLayer, Output Shape: N/A, Parameters: 0
Layer: dense_encoder_1, Type: Dense, Output Shape: N/A, Parameters: 50
Layer: dense_encoder_2, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: dense_encoder_3, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: bottleneck, Type: Dense, Output Shape: N/A, Parameters: 6
Layer: dense_decoder_1, Type: Dense, Output Shape: N/A, Parameters: 10
Layer: dense_decoder_2, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: dense_decoder_3, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: output_layer, Type: D

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [None]:
bottleneck_output_1.shape

(262, 1)

In [None]:
bottleneck_output_2.shape

(262, 1)

In [None]:
from sklearn.feature_selection import mutual_info_regression
import numpy as np
import pandas as pd

In [None]:
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)


In [None]:
mi_scores

[array([0.25983286, 0.06517243, 0.13391081, 0.55594939, 0.07319035,
        1.1582904 , 0.7877846 , 1.07035996, 0.07189837])]

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 1)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Final_Index
0,0.329887,-0.170492,0.334777,-0.022128,0.285213,-0.067833,-0.214541,-0.026632,0.872723,-0.020432
1,0.329887,0.151550,0.345356,0.263774,0.285213,-0.612542,-0.462846,-0.555577,0.617044,-0.314881
2,0.329887,-0.244809,0.360230,-0.149195,0.285213,-0.194009,-0.183503,-0.082310,0.159962,-0.093370
3,0.329887,-0.211779,0.347917,0.922937,0.285213,0.073730,1.244246,-0.011447,0.950960,0.424817
4,0.329887,-0.104432,0.332566,-0.514515,0.285213,-0.867970,-0.835302,-0.846623,-0.360163,-0.655402
...,...,...,...,...,...,...,...,...,...,...
257,-0.292741,-0.269581,-2.328779,-0.927484,0.285213,-0.883358,-0.835302,-0.849154,0.717944,-0.823378
258,0.329887,-0.269581,0.299693,-1.022785,0.285213,-0.898745,-0.835302,-0.871931,-0.563241,-0.745210
259,0.329887,-0.269581,0.020028,-1.078377,0.285213,-0.898745,-0.835302,-0.871931,-0.667147,-0.763366
260,0.328732,-0.269581,-0.341715,-1.062494,0.285213,-0.898745,-0.835302,-0.871931,1.425087,-0.736903


In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)
df.to_excel("environ_AUTOENCODER_best_auto_config1_ss__onedim_1212.xlsx", index=False)

#print("File 'socioecon_AUTOENCODER.xlsx' has been saved.")


In [None]:
##Bottleneckoutptu2
mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)


In [None]:
mi_scores

[array([0.28100194, 0.06508891, 0.10879726, 0.50515549, 0.18432626,
        0.95536776, 0.83170884, 0.81547498, 0.11390364])]

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 1)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Final_Index
0,0.329887,-0.170492,0.334777,-0.022128,0.285213,-0.067833,-0.214541,-0.026632,0.872723,-0.001589
1,0.329887,0.151550,0.345356,0.263774,0.285213,-0.612542,-0.462846,-0.555577,0.617044,-0.265999
2,0.329887,-0.244809,0.360230,-0.149195,0.285213,-0.194009,-0.183503,-0.082310,0.159962,-0.076075
3,0.329887,-0.211779,0.347917,0.922937,0.285213,0.073730,1.244246,-0.011447,0.950960,0.476540
4,0.329887,-0.104432,0.332566,-0.514515,0.285213,-0.867970,-0.835302,-0.846623,-0.360163,-0.606253
...,...,...,...,...,...,...,...,...,...,...
257,-0.292741,-0.269581,-2.328779,-0.927484,0.285213,-0.883358,-0.835302,-0.849154,0.717944,-0.755919
258,0.329887,-0.269581,0.299693,-1.022785,0.285213,-0.898745,-0.835302,-0.871931,-0.563241,-0.695418
259,0.329887,-0.269581,0.020028,-1.078377,0.285213,-0.898745,-0.835302,-0.871931,-0.667147,-0.713638
260,0.328732,-0.269581,-0.341715,-1.062494,0.285213,-0.898745,-0.835302,-0.871931,1.425087,-0.660112


In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)
df.to_excel("environ_AUTOENCODER_best_auto_config2_ss_onedim_1212.xlsx", index=False)

print("File 'socioecon_AUTOENCODER.xlsx' has been saved.")


File 'socioecon_AUTOENCODER.xlsx' has been saved.


## env minmax / only one encoding dimension (method 2)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:12].values

scaler = MinMaxScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Input
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Encoder
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Decoder
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer- with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # Only even (for symmetry)
neurons_options = [2, 3, 4, 5, 6, 7]
encoding_dims_options = [1]
results = []

# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 8, Encoding dimension: 1, Neurons: 6, Test loss: 0.017740055918693542


Hidden layers: 4, Encoding dimension: 1, Neurons: 7, Test loss: 0.018081046640872955


In [None]:
from kerastuner import Hyperband
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Choice('encoding_dim', [1]) # 1D encoding dimension
    neurons = hp.Int('neurons', min_value=2, max_value=7, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)
    bottleneck = Dense(encoding_dim, activation=activation, name='bottleneck')(x)
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

In [None]:
from kerastuner.tuners import Hyperband

# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=4, hidden_layers_after=4),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning_1_only1__mm_e_nv', project_name='model_config_1'
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=2, hidden_layers_after=2),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning_2_only1__mm_e_nv', project_name='model_config_2'
)

In [None]:
# Run the tuning for configuration 1
tuner_config_1.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Best model for configuration 1
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 14s]
val_loss: 0.02321362867951393

Best val_loss So Far: 0.010461946949362755
Total elapsed time: 00h 11m 35s


In [None]:
# Run the tuning for configuration 2
tuner_config_2.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Best model for configuration 2
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 15s]
val_loss: 0.293205201625824

Best val_loss So Far: 0.010827849619090557
Total elapsed time: 00h 12m 07s


#### check

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 1, 'neurons': 5, 'learning_rate': 0.01, 'batch_size': 16, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}

Test Loss: 0.010461946949362755
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step

Latent Feature Correlations:
1.0


In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 1, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 64, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}

Test Loss: 0.010827849619090557
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step

Latent Feature Correlations:
1.0


In [None]:
# Display the structure of the best model for configuration 1
print("Best Model Structure for Configuration 1:")
best_model_config_1.summary()

# Display the structure of the best model for configuration 2
print("\nBest Model Structure for Configuration 2:")
best_model_config_2.summary()

Best Model Structure for Configuration 1:



Best Model Structure for Configuration 2:


#### past

In [None]:
# Display the structure of the best model for configuration 1
print("Best Model Structure for Configuration 1:")
best_model_config_1.summary()

# Display the structure of the best model for configuration 2
print("\nBest Model Structure for Configuration 2:")
best_model_config_2.summary()

Best Model Structure for Configuration 1:



Best Model Structure for Configuration 2:


In [None]:
# Print details of each layer for the best model in configuration 1
print("Best Model for Configuration 1:")
for layer in best_model_config_1.layers:
    if hasattr(layer, 'output_shape'):
        output_shape = layer.output_shape
    else:
        output_shape = layer.get_output_shape_at(0) if hasattr(layer, 'get_output_shape_at') else 'N/A'
    print(f"Layer: {layer.name}, Type: {layer.__class__.__name__}, Output Shape: {output_shape}, Parameters: {layer.count_params()}")

# Print details of each layer for the best model in configuration 2
print("\nBest Model for Configuration 2:")
for layer in best_model_config_2.layers:
    if hasattr(layer, 'output_shape'):
        output_shape = layer.output_shape
    else:
        output_shape = layer.get_output_shape_at(0) if hasattr(layer, 'get_output_shape_at') else 'N/A'
    print(f"Layer: {layer.name}, Type: {layer.__class__.__name__}, Output Shape: {output_shape}, Parameters: {layer.count_params()}")

Best Model for Configuration 1:
Layer: input_layer, Type: InputLayer, Output Shape: N/A, Parameters: 0
Layer: dense_encoder_1, Type: Dense, Output Shape: N/A, Parameters: 50
Layer: dense_encoder_2, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: dense_encoder_3, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: dense_encoder_4, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: bottleneck, Type: Dense, Output Shape: N/A, Parameters: 6
Layer: dense_decoder_1, Type: Dense, Output Shape: N/A, Parameters: 10
Layer: dense_decoder_2, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: dense_decoder_3, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: dense_decoder_4, Type: Dense, Output Shape: N/A, Parameters: 30
Layer: output_layer, Type: Dense, Output Shape: N/A, Parameters: 54

Best Model for Configuration 2:
Layer: input_layer, Type: InputLayer, Output Shape: N/A, Parameters: 0
Layer: dense_encoder_1, Type: Dense, Output Shape: N/A, Parameters: 70
Layer: dense_encoder_

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [None]:
bottleneck_output_1.shape

(262, 1)

In [None]:
bottleneck_output_2.shape

(262, 1)

In [None]:
from sklearn.feature_selection import mutual_info_regression
import numpy as np
import pandas as pd

In [None]:
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)


In [None]:
mi_scores

[array([0.19907011, 0.05761238, 0.10463319, 0.76597546, 0.18883307,
        0.7502707 , 0.54858188, 0.68317104, 0.15722226])]

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 1)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Final_Index
0,1.000000,0.008158,0.997303,0.208464,1.0,0.118007,0.070423,0.107672,0.859221,0.285995
1,1.000000,0.034670,0.998424,0.264890,1.0,0.040647,0.042254,0.040297,0.819408,0.262578
2,1.000000,0.002039,1.000000,0.183386,1.0,0.100087,0.073944,0.100580,0.748232,0.270631
3,1.000000,0.004759,0.998695,0.394984,1.0,0.138112,0.235915,0.109607,0.871404,0.358904
4,1.000000,0.013596,0.997069,0.111285,1.0,0.004371,0.000000,0.003224,0.667240,0.199296
...,...,...,...,...,...,...,...,...,...,...
257,0.918714,0.000000,0.715068,0.029781,1.0,0.002185,0.000000,0.002901,0.835119,0.174880
258,1.000000,0.000000,0.993585,0.010972,1.0,0.000000,0.000000,0.000000,0.635617,0.173701
259,1.000000,0.000000,0.963952,0.000000,1.0,0.000000,0.000000,0.000000,0.619437,0.169636
260,0.999849,0.000000,0.925621,0.003135,1.0,0.000000,0.000000,0.000000,0.945234,0.183985


In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)
df.to_excel("environ_AUTOENCODER_best_auto_config1_mm_onedim_1213.xlsx", index=False)

print("File 'socioecon_AUTOENCODER.xlsx' has been saved.")


File 'socioecon_AUTOENCODER.xlsx' has been saved.


In [None]:
# Assuming X and bottleneck_output are defined
mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)


In [None]:
mi_scores

[array([0.23292938, 0.11309411, 0.        , 0.85319751, 0.11018688,
        0.96853565, 0.66585714, 0.82567055, 0.04920631])]

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 1)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Final_Index
0,1.000000,0.008158,0.997303,0.208464,1.0,0.118007,0.070423,0.107672,0.859221,0.213233
1,1.000000,0.034670,0.998424,0.264890,1.0,0.040647,0.042254,0.040297,0.819408,0.187011
2,1.000000,0.002039,1.000000,0.183386,1.0,0.100087,0.073944,0.100580,0.748232,0.200553
3,1.000000,0.004759,0.998695,0.394984,1.0,0.138112,0.235915,0.109607,0.871404,0.289337
4,1.000000,0.013596,0.997069,0.111285,1.0,0.004371,0.000000,0.003224,0.667240,0.125522
...,...,...,...,...,...,...,...,...,...,...
257,0.918714,0.000000,0.715068,0.029781,1.0,0.002185,0.000000,0.002901,0.835119,0.103490
258,1.000000,0.000000,0.993585,0.010972,1.0,0.000000,0.000000,0.000000,0.635617,0.100494
259,1.000000,0.000000,0.963952,0.000000,1.0,0.000000,0.000000,0.000000,0.619437,0.097834
260,0.999849,0.000000,0.925621,0.003135,1.0,0.000000,0.000000,0.000000,0.945234,0.102723


In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)
df.to_excel("environ_AUTOENCODER_best_auto_config2_mm_onedim_1213.xlsx", index=False)

#print("File 'socioecon_AUTOENCODER.xlsx' has been saved.")


## different numbers of nerons with regularization techiniques (alpha tuned) - minmax scaler w/ varaince (method 3)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
# Data
data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:12].values

scaler = MinMaxScaler()
X = scaler.fit_transform(X_ori)

# Split the data into training and test sets
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Input
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Encoder
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Decoder
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer- with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # Even numbers to ensure symmetry
neurons_options = [5, 6, 7]
encoding_dims_options = [1, 2, 3, 4]
results = []

# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Best configurations
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 2, Encoding dimension: 4, Neurons: 6, Test loss: 0.01733570173382759


Hidden layers: 8, Encoding dimension: 3, Neurons: 5, Test loss: 0.017404643818736076


add orthogonal regularization (w/ alpha tuned)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

# Custom layer for orthogonal regularization
class OrthogonalRegularization(Layer):
    def __init__(self, alpha=1e-2, **kwargs):
        super(OrthogonalRegularization, self).__init__(**kwargs)
        self.alpha = alpha

    def call(self, bottleneck_output):
        # Get the batch size
        batch_size = tf.cast(tf.shape(bottleneck_output)[0], tf.float32)

        # Normalize the bottleneck outputs
        normalized_output = bottleneck_output / tf.sqrt(batch_size)

        # Compute correlation matrix
        correlation = tf.matmul(
            tf.transpose(normalized_output),
            normalized_output
        )

        # Create identity matrix of the correct shape
        shape = tf.shape(correlation)[0]
        identity = tf.eye(shape)

        # Calculate loss (excluding diagonal elements)
        mask = tf.ones_like(correlation) - tf.eye(shape)
        loss = tf.reduce_sum(tf.square(correlation * mask))

        # Add loss to the layer
        self.add_loss(self.alpha * loss)

        return bottleneck_output


In [None]:
def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Int('encoding_dim', min_value=1, max_value=4, step=1)
    neurons = hp.Int('neurons', min_value=5, max_value=7, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # alpha tuned
    alpha = hp.Choice('alpha', [1e-3, 1e-2, 1e-1, 1.0])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer

    # Encoder layers
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer with orthogonal regularization
    bottleneck = Dense(
        encoding_dim,
        activation=activation,
        kernel_constraint=tf.keras.constraints.UnitNorm(axis=0),
        name='bottleneck'
    )(x)

    # Apply orthogonal regularization
    bottleneck = OrthogonalRegularization(alpha=alpha)(bottleneck) #modified alpha for tunin

    # Decoder layers
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)

    output_layer = Dense(input_dim, name='output_layer')(x)

    # Create and compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mse'  # Using standard MSE loss
    )

    return model

In [None]:
from kerastuner.tuners import Hyperband
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=1, hidden_layers_after=1),
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='hyperparam_tuning_1___orth_o_mm_al',
    project_name='model_config_1',
    overwrite=True
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=4, hidden_layers_after=4),
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='hyperparam_tuning_2___orth_o_mm_al',
    project_name='model_config_2',
    overwrite=True
)

In [None]:
# Define callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        min_delta=1e-4
    )
]


In [None]:
# Run the tuning
tuner_config_1.search(
    X_train,
    X_train,
    epochs=50,
    validation_data=(X_test, X_test),
    callbacks=callbacks,
    batch_size=32,
    verbose=1
)

Trial 90 Complete [00h 00m 12s]
val_loss: 0.009775921702384949

Best val_loss So Far: 0.004274557810276747
Total elapsed time: 00h 08m 45s


In [None]:
# Run the tuning
tuner_config_2.search(
    X_train,
    X_train,
    epochs=50,
    validation_data=(X_test, X_test),
    callbacks=callbacks,
    batch_size=32,
    verbose=1
)

#### regularization

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)

In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_1.input, outputs=best_model_config_1.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_1.layers if 'dense_decoder' in l.name]:
    x = best_model_config_1.get_layer(layer_name)(x)
decoder_output = best_model_config_1.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
#calculate bottleneck contributions
def calculate_bottleneck_contributions(autoencoder, encoder, decoder, X):
    bottleneck_output = encoder.predict(X)
    contributions = []

    for dim in range(bottleneck_output.shape[1]):
        # Isolate one dimension at a time
        isolated_latent = np.zeros_like(bottleneck_output)
        isolated_latent[:, dim] = bottleneck_output[:, dim]

        # Reconstruct input using only the isolated latent dimension
        reconstructed = decoder.predict(isolated_latent)

        # Compute reconstruction loss (e.g., MSE)
        loss = mean_squared_error(X, reconstructed)
        contributions.append(loss)

    # Normalize contributions
    contributions = np.array(contributions)
    normalized_contributions = 1 - (contributions / np.sum(contributions))

    return normalized_contributions

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.7341
Latent Dimension 2: 0.7320
Latent Dimension 3: 0.7196
Latent Dimension 4: 0.8144


In [None]:
# Normalize the contributions so they sum to 1
def normalize_contributions(contributions):
    total = sum(contributions)
    return [c / total for c in contributions]

# Calculate contributions
#contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2447
Latent Dimension 2: 0.2440
Latent Dimension 3: 0.2399
Latent Dimension 4: 0.2715
Sum of Normalized Contributions: 1.0


In [None]:
normalized_contributions

[0.24468834911628914,
 0.24398539469686967,
 0.2398661822377539,
 0.2714600739490873]

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)

# Calculate MI scores
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
latent_contributions

array([0.24468835, 0.24398539, 0.23986618, 0.27146007])

In [None]:
normalized_mi_scores

array([[0.02165572, 0.01908296, 0.07145486, 0.03814173],
       [0.        , 0.        , 0.04007126, 0.        ],
       [0.0471649 , 0.03684772, 0.05666283, 0.04012288],
       [0.11195376, 0.50845727, 0.21681379, 0.04351235],
       [0.        , 0.02108574, 0.02291635, 0.06988601],
       [0.25401528, 0.04859069, 0.        , 0.26867108],
       [0.24015214, 0.05169776, 0.        , 0.29280207],
       [0.27020898, 0.06865596, 0.04440177, 0.24686388],
       [0.05484922, 0.24558191, 0.54767914, 0.        ]])

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Excel
df.to_excel("env_AUTOENCODER_best_auto_config1_ortho_alpha_1215_mm_with_weights.xlsx", index=False)

config 2

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_2.input, outputs=best_model_config_2.get_layer('bottleneck').output)

# Create a decoder model
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_2.layers if 'dense_decoder' in l.name]:
    x = best_model_config_2.get_layer(layer_name)(x)
decoder_output = best_model_config_2.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_2, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.6926
Latent Dimension 2: 0.7350
Latent Dimension 3: 0.5724


In [None]:
# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.3463
Latent Dimension 2: 0.3675
Latent Dimension 3: 0.2862
Sum of Normalized Contributions: 1.0


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)  # Make sure this is normalized

mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 3)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Excel
df.to_excel("env_AUTOENCODER_best_auto_config2_ortho_alpha_1205_with_weights.xlsx", index=False)

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 6, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'alpha': 1.0, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0047'}

Test Loss: 0.004190356470644474
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step

Latent Feature Correlations:
[[ 1.         -0.15442104  0.1122028  -0.27715589]
 [-0.15442104  1.         -0.21135118 -0.01751638]
 [ 0.1122028  -0.21135118  1.          0.17355153]
 [-0.27715589 -0.01751638  0.17355153  1.        ]]


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 3, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'alpha': 0.001, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}

Test Loss: 0.006319573614746332
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 104ms/step



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step

Latent Feature Correlations:
[[ 1.          0.12469219  0.04857673]
 [ 0.12469219  1.         -0.00981067]
 [ 0.04857673 -0.00981067  1.        ]]


In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()

## different numbers of nerons with regularization techiniques (alpha tuned) - standard scaler- varaince (method 3)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:12].values

scaler = StandardScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Input
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Encoder
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # Even numbers to ensure symmetry
neurons_options = [5, 6, 7]
encoding_dims_options = [1, 2, 3, 4]
results = []

# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 6, Encoding dimension: 4, Neurons: 7, Test loss: 0.6776700019836426


Hidden layers: 4, Encoding dimension: 2, Neurons: 7, Test loss: 0.6793853044509888


add orthogonal regularization (w/ alpha tuned)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

# Custom layer for orthogonal regularization
class OrthogonalRegularization(Layer):
    def __init__(self, alpha=1e-2, **kwargs):
        super(OrthogonalRegularization, self).__init__(**kwargs)
        self.alpha = alpha

    def call(self, bottleneck_output):
        # Get the batch size
        batch_size = tf.cast(tf.shape(bottleneck_output)[0], tf.float32)

        # Normalize the bottleneck outputs
        normalized_output = bottleneck_output / tf.sqrt(batch_size)

        # Compute correlation matrix
        correlation = tf.matmul(
            tf.transpose(normalized_output),
            normalized_output
        )

        # Create identity matrix of the correct shape
        shape = tf.shape(correlation)[0]
        identity = tf.eye(shape)

        # Calculate loss (excluding diagonal elements)
        mask = tf.ones_like(correlation) - tf.eye(shape)
        loss = tf.reduce_sum(tf.square(correlation * mask))

        # Add loss to the layer
        self.add_loss(self.alpha * loss)

        return bottleneck_output


In [None]:
def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Int('encoding_dim', min_value=1, max_value=4, step=1)
    neurons = hp.Int('neurons', min_value=5, max_value=7, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # alpha tuned
    alpha = hp.Choice('alpha', [1e-3, 1e-2, 1e-1, 1.0])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer

    # Encoder layers
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer with orthogonal regularization
    bottleneck = Dense(
        encoding_dim,
        activation=activation,
        kernel_constraint=tf.keras.constraints.UnitNorm(axis=0),
        name='bottleneck'
    )(x)

    # Apply orthogonal regularization
    bottleneck = OrthogonalRegularization(alpha=alpha)(bottleneck) #modified alpha for tuning

    # Decoder layers
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)

    output_layer = Dense(input_dim, name='output_layer')(x)

    # Create and compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mse'  # Using standard MSE loss
    )

    return model

In [None]:
from kerastuner.tuners import Hyperband
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=3, hidden_layers_after=3),
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='hyperparam_tuning_1____ortho_mm_al',
    project_name='model_config_1',
    overwrite=True
)



In [None]:
tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=2, hidden_layers_after=2),
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='hyperparam_tuning_2_____ortho_mm_al',
    project_name='model_config_2',
    overwrite=True
)

In [None]:
# Define callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        min_delta=1e-4
    )
]


In [None]:
# Run the tuning
tuner_config_1.search(
    X_train,
    X_train,
    epochs=50,
    validation_data=(X_test, X_test),
    callbacks=callbacks,
    batch_size=32,
    verbose=1
)

Trial 90 Complete [00h 00m 19s]
val_loss: 1.2442675828933716

Best val_loss So Far: 0.32287564873695374
Total elapsed time: 00h 25m 05s


In [None]:
# Run the tuning
tuner_config_2.search(
    X_train,
    X_train,
    epochs=50,
    validation_data=(X_test, X_test),
    callbacks=callbacks,
    batch_size=32,
    verbose=1
)

Trial 90 Complete [00h 00m 28s]
val_loss: 0.9296342134475708

Best val_loss So Far: 0.41621869802474976
Total elapsed time: 00h 30m 40s


In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_1.input, outputs=best_model_config_1.get_layer('bottleneck').output)

# Create a decoder model
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_1.layers if 'dense_decoder' in l.name]:
    x = best_model_config_1.get_layer(layer_name)(x)
decoder_output = best_model_config_1.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
#calculate bottleneck contributions
def calculate_bottleneck_contributions(autoencoder, encoder, decoder, X):
    bottleneck_output = encoder.predict(X)
    contributions = []

    for dim in range(bottleneck_output.shape[1]):
        # Isolate one dimension at a time
        isolated_latent = np.zeros_like(bottleneck_output)
        isolated_latent[:, dim] = bottleneck_output[:, dim]

        # Reconstruct input using only the isolated latent dimension
        reconstructed = decoder.predict(isolated_latent)

        # Compute reconstruction loss (e.g., MSE)
        loss = mean_squared_error(X, reconstructed)
        contributions.append(loss)

    # Normalize contributions
    contributions = np.array(contributions)
    normalized_contributions = 1 - (contributions / np.sum(contributions))

    return normalized_contributions

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.7340
Latent Dimension 2: 0.6820
Latent Dimension 3: 0.9110
Latent Dimension 4: 0.6730


In [None]:
# Normalize the contributions so sum to 1
def normalize_contributions(contributions):
    total = sum(contributions)
    return [c / total for c in contributions]

# Calculate contributions
#contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2447
Latent Dimension 2: 0.2273
Latent Dimension 3: 0.3037
Latent Dimension 4: 0.2243
Sum of Normalized Contributions: 1.0


In [None]:
normalized_contributions

[0.24465722754506625,
 0.22733997216014626,
 0.3036687883514401,
 0.22433401194334743]

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [None]:
# Assuming latent_contributions is the list of normalized contributions per bottleneck dimension
# (from your earlier bottleneck contribution analysis)
latent_contributions = np.array(normalized_contributions)  # Make sure this is normalized

# Calculate MI scores (as you have done)
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
latent_contributions

array([0.24465723, 0.22733997, 0.30366879, 0.22433401])

In [None]:
normalized_mi_scores

array([[0.05491319, 0.05443956, 0.13674809, 0.06809482],
       [0.        , 0.07347814, 0.06999454, 0.05433254],
       [0.01000749, 0.09660643, 0.07870568, 0.05640735],
       [0.13687258, 0.09104379, 0.25941411, 0.36604399],
       [0.00055738, 0.05481464, 0.10044701, 0.02421275],
       [0.10943837, 0.16318211, 0.08451129, 0.12959135],
       [0.11126754, 0.17484696, 0.05265221, 0.11884859],
       [0.11346492, 0.0611857 , 0.11429133, 0.15568355],
       [0.46347852, 0.23040268, 0.10323575, 0.02678505]])

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Excel
df.to_excel("env_AUTOENCODER_best_auto_config1_ortho_alpha_1205_standards_with_weights.xlsx", index=False)

config 2

In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_2.input, outputs=best_model_config_2.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_2.layers if 'dense_decoder' in l.name]:
    x = best_model_config_2.get_layer(layer_name)(x)
decoder_output = best_model_config_2.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_2, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.6038
Latent Dimension 2: 0.8283
Latent Dimension 3: 0.5678


In [None]:
# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify the sum of 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.3019
Latent Dimension 2: 0.4142
Latent Dimension 3: 0.2839
Sum of Normalized Contributions: 1.0


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)

mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 3)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("env_AUTOENCODER_best_auto_config2_ortho_alpha_1205_standards_with_weights.xlsx", index=False)

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'alpha': 0.001, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}

Test Loss: 0.32287564873695374
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step

Latent Feature Correlations:
[[1.         0.25143366 0.18116786 0.43535495]
 [0.25143366 1.         0.08490143 0.13977177]
 [0.18116786 0.08490143 1.         0.3420611 ]
 [0.43535495 0.13977177 0.3420611  1.        ]]


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 3, 'neurons': 5, 'learning_rate': 0.01, 'batch_size': 16, 'activation': 'tanh', 'alpha': 1.0, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}

Test Loss: 0.41621869802474976
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step

Latent Feature Correlations:
[[ 1.         -0.42658854 -0.08541906]
 [-0.42658854  1.          0.01507296]
 [-0.08541906  0.01507296  1.        ]]


In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()

## env standardscaler - original (method 1)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:12].values

scaler = StandardScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Define the input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Build the encoder part
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # Even numbers to ensure symmetry
neurons_options = [5, 6, 7]
encoding_dims_options = [1, 2, 3, 4]
results = []

# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 6, Encoding dimension: 4, Neurons: 7, Test loss: 0.5657718777656555


Hidden layers: 8, Encoding dimension: 4, Neurons: 6, Test loss: 0.6356199979782104


In [None]:
from kerastuner import Hyperband
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Int('encoding_dim', min_value=1, max_value=4, step=1)
    neurons = hp.Int('neurons', min_value=5, max_value=7, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)
    bottleneck = Dense(encoding_dim, activation=activation, name='bottleneck')(x)
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

In [None]:
from kerastuner.tuners import Hyperband

tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=3, hidden_layers_after=3),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning1_env8', project_name='model_config_1'
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=4, hidden_layers_after=4),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning2_env8', project_name='model_config_2'
)

In [None]:
# Run the tuning for configuration 1
tuner_config_1.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 1
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]


Trial 90 Complete [00h 00m 20s]
val_loss: 1.2530938386917114

Best val_loss So Far: 0.4033838212490082
Total elapsed time: 00h 13m 12s


In [None]:
# Run the tuning for configuration 2
tuner_config_2.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 2
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 21s]
val_loss: 0.526634693145752

Best val_loss So Far: 0.4050799012184143
Total elapsed time: 00h 19m 05s


#### regularization

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 6, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0080'}

Test Loss: 0.4033838212490082
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step

Latent Feature Correlations:
[[ 1.         -0.31142513  0.12038532 -0.7827916 ]
 [-0.31142513  1.         -0.21834381 -0.21059738]
 [ 0.12038532 -0.21834381  1.         -0.09728847]
 [-0.7827916  -0.21059738 -0.09728847  1.        ]]


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_1.input, outputs=best_model_config_1.get_layer('bottleneck').output)

# Create a decoder model
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_1.layers if 'dense_decoder' in l.name]:
    x = best_model_config_1.get_layer(layer_name)(x)
decoder_output = best_model_config_1.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
#calculate bottleneck contributions
def calculate_bottleneck_contributions(autoencoder, encoder, decoder, X):
    bottleneck_output = encoder.predict(X)
    contributions = []

    for dim in range(bottleneck_output.shape[1]):
        # Isolate one dimension at a time
        isolated_latent = np.zeros_like(bottleneck_output)
        isolated_latent[:, dim] = bottleneck_output[:, dim]

        # Reconstruct input using only the isolated latent dimension
        reconstructed = decoder.predict(isolated_latent)

        # Compute reconstruction loss (e.g., MSE)
        loss = mean_squared_error(X, reconstructed)
        contributions.append(loss)

    # Normalize contributions
    contributions = np.array(contributions)
    normalized_contributions = 1 - (contributions / np.sum(contributions))

    return normalized_contributions

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.8021
Latent Dimension 2: 0.8625
Latent Dimension 3: 0.7053
Latent Dimension 4: 0.6301


In [None]:
# Normalize the contributions so they sum to 1
def normalize_contributions(contributions):
    total = sum(contributions)
    return [c / total for c in contributions]

# Calculate contributions
#contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2674
Latent Dimension 2: 0.2875
Latent Dimension 3: 0.2351
Latent Dimension 4: 0.2100
Sum of Normalized Contributions: 1.0


In [None]:
normalized_contributions

[0.2673561997680646, 0.2874910033571199, 0.2351118423322335, 0.210040954542582]

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)
# Calculate MI scores
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
latent_contributions

array([0.2673562 , 0.287491  , 0.23511184, 0.21004095])

In [None]:
normalized_mi_scores

array([[1.82789358e-01, 0.00000000e+00, 2.90412525e-02, 5.20781002e-02],
       [1.40714583e-01, 1.63394763e-02, 0.00000000e+00, 3.29357181e-02],
       [1.23344218e-01, 4.46412873e-03, 1.92897602e-01, 1.83419311e-02],
       [3.64456543e-01, 1.62390538e-01, 1.53691623e-01, 1.37036389e-01],
       [9.59032049e-02, 3.54061624e-04, 3.54748268e-02, 3.45214852e-02],
       [8.33087666e-03, 3.17567113e-01, 8.56430463e-03, 2.42745157e-01],
       [3.12365266e-02, 2.58101508e-01, 4.90610718e-02, 2.05722665e-01],
       [1.69810907e-02, 2.33380011e-01, 6.81132412e-02, 2.71048045e-01],
       [3.62435994e-02, 7.40316411e-03, 4.63156078e-01, 5.57051053e-03]])

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Excel
df.to_excel("env_AUTOENCODER_best_auto_config1_1206_with_weights.xlsx", index=False)

config 2

In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 3, 'neurons': 6, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}

Test Loss: 0.4050799012184143
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step

Latent Feature Correlations:
[[ 1.         -0.19770959 -0.0084177 ]
 [-0.19770959  1.          0.32982928]
 [-0.0084177   0.32982928  1.        ]]


In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_2.input, outputs=best_model_config_2.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_2.layers if 'dense_decoder' in l.name]:
    x = best_model_config_2.get_layer(layer_name)(x)
decoder_output = best_model_config_2.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_2, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.5623
Latent Dimension 2: 0.5832
Latent Dimension 3: 0.8545


In [None]:
# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2812
Latent Dimension 2: 0.2916
Latent Dimension 3: 0.4273
Sum of Normalized Contributions: 1.0


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)

mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 3)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Excel
df.to_excel("env_AUTOENCODER_best_auto_config2_1206_with_weights.xlsx", index=False)

In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()

## env MinMaxscaler -original (method 1)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:12].values

scaler = MinMaxScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Define the input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Build the encoder part
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # Even numbers to ensure symmetry
neurons_options = [5, 6, 7]
encoding_dims_options = [1, 2, 3, 4]
results = []

# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations for reference and show model summary
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 6, Encoding dimension: 3, Neurons: 7, Test loss: 0.0164178516715765


Hidden layers: 8, Encoding dimension: 2, Neurons: 6, Test loss: 0.01662752591073513


In [None]:
from kerastuner import Hyperband
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Int('encoding_dim', min_value=1, max_value=4, step=1)
    neurons = hp.Int('neurons', min_value=5, max_value=7, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)
    bottleneck = Dense(encoding_dim, activation=activation, name='bottleneck')(x)
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

In [None]:
from kerastuner.tuners import Hyperband

tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=3, hidden_layers_after=3), #modify after getting the value from the summary
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning1_env10', project_name='model_config_1'
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=9, hidden_layers_before=4, hidden_layers_after=4),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning2_env10', project_name='model_config_2'
)

### run

In [None]:
# Run the tuning for configuration 1
tuner_config_1.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 1
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]


Trial 90 Complete [00h 00m 20s]
val_loss: 0.352390855550766

Best val_loss So Far: 0.007303156889975071
Total elapsed time: 00h 14m 51s


In [None]:
# Run the tuning for configuration 2
tuner_config_2.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 2
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

Trial 88 Complete [00h 00m 24s]
val_loss: 0.019014596939086914

Best val_loss So Far: 0.006670149974524975
Total elapsed time: 00h 20m 04s

Search: Running Trial #89

Value             |Best Value So Far |Hyperparameter
2                 |3                 |encoding_dim
5                 |7                 |neurons
0.01              |0.01              |learning_rate
32                |32                |batch_size
tanh              |tanh              |activation
50                |50                |tuner/epochs
0                 |17                |tuner/initial_epoch
0                 |3                 |tuner/bracket
0                 |3                 |tuner/round

Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 56ms/step - loss: 0.3885 - val_loss: 0.1915
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1551 - val_loss: 0.0659
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - lo

#### regularization

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 16, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0048'}

Test Loss: 0.007303156889975071
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step

Latent Feature Correlations:
[[ 1.         -0.45445476 -0.78403224 -0.12217755]
 [-0.45445476  1.         -0.16160395  0.30703635]
 [-0.78403224 -0.16160395  1.          0.10416305]
 [-0.12217755  0.30703635  0.10416305  1.        ]]


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_1.input, outputs=best_model_config_1.get_layer('bottleneck').output)

# Create a decoder model
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_1.layers if 'dense_decoder' in l.name]:
    x = best_model_config_1.get_layer(layer_name)(x)
decoder_output = best_model_config_1.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
#calculate bottleneck contributions
def calculate_bottleneck_contributions(autoencoder, encoder, decoder, X):
    bottleneck_output = encoder.predict(X)
    contributions = []

    for dim in range(bottleneck_output.shape[1]):
        # Isolate one dimension at a time
        isolated_latent = np.zeros_like(bottleneck_output)
        isolated_latent[:, dim] = bottleneck_output[:, dim]

        # Reconstruct input using only the isolated latent dimension
        reconstructed = decoder.predict(isolated_latent)

        # Compute reconstruction loss (e.g., MSE)
        loss = mean_squared_error(X, reconstructed)
        contributions.append(loss)

    # Normalize contributions
    contributions = np.array(contributions)
    normalized_contributions = 1 - (contributions / np.sum(contributions))

    return normalized_contributions

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")



[1m1/7[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 164ms/step



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.7173
Latent Dimension 2: 0.6965
Latent Dimension 3: 0.8412
Latent Dimension 4: 0.7450


In [None]:
# Normalize the contributions so sum to 1
def normalize_contributions(contributions):
    total = sum(contributions)
    return [c / total for c in contributions]

# Calculate contributions
#contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2391
Latent Dimension 2: 0.2322
Latent Dimension 3: 0.2804
Latent Dimension 4: 0.2483
Sum of Normalized Contributions: 0.9999999999999999


In [None]:
normalized_contributions

[0.23908445789531116, 0.2321787989994907, 0.28039741642983, 0.2483393266753681]

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)

# Calculate MI scores
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
latent_contributions

array([0.23908446, 0.2321788 , 0.28039742, 0.24833933])

In [None]:
normalized_mi_scores

array([[0.04097708, 0.13628392, 0.0289026 , 0.        ],
       [0.0224451 , 0.04038303, 0.05741666, 0.0292082 ],
       [0.02090681, 0.02290268, 0.        , 0.0077531 ],
       [0.43017918, 0.05324949, 0.62802842, 0.15779324],
       [0.05153972, 0.08714428, 0.00214433, 0.04825796],
       [0.13108251, 0.02538489, 0.08977909, 0.25310702],
       [0.10845656, 0.0238027 , 0.06638744, 0.16105842],
       [0.14589141, 0.07061099, 0.08941914, 0.31709371],
       [0.04852163, 0.54023803, 0.03792232, 0.02572833]])

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("env_AUTOENCODER_best_auto_config1_1206_with_weights_mm.xlsx", index=False)

config 2

In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 3, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0047'}

Test Loss: 0.006670149974524975
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step

Latent Feature Correlations:
[[ 1.         -0.21453063  0.40298506]
 [-0.21453063  1.         -0.40378934]
 [ 0.40298506 -0.40378934  1.        ]]


In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_2.input, outputs=best_model_config_2.get_layer('bottleneck').output)

# Create a decoder model
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_2.layers if 'dense_decoder' in l.name]:
    x = best_model_config_2.get_layer(layer_name)(x)
decoder_output = best_model_config_2.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_2, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.7887
Latent Dimension 2: 0.5999
Latent Dimension 3: 0.6114


In [None]:
# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.3944
Latent Dimension 2: 0.2999
Latent Dimension 3: 0.3057
Sum of Normalized Contributions: 1.0


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)

mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(9, 3)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Excel
df.to_excel("env_AUTOENCODER_best_auto_config2_1206_with_weights_mm.xlsx", index=False)

In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()