#### settings

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/test2

/content/drive/MyDrive/test2


In [None]:
pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product

from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.feature_selection import mutual_info_regression

from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras import backend as K
from kerastuner import HyperModel, RandomSearch

  from kerastuner import HyperModel, RandomSearch


## phy standardscaler (method 1)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
data = pd.read_excel('PHY_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:14].values
#X = data.iloc[:, 3:14].values

scaler = StandardScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Define the input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Build the encoder part
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # even numbers to ensure symmetry
neurons_options = [5, 6, 7, 8, 9, 10]
encoding_dims_options = [1, 2, 3, 4]
results = []


# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations for reference and show model summary
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 2, Encoding dimension: 4, Neurons: 8, Test loss: 0.5514370799064636


Hidden layers: 4, Encoding dimension: 3, Neurons: 10, Test loss: 0.6580934524536133


In [None]:
from kerastuner import Hyperband
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Int('encoding_dim', min_value=1, max_value=4, step=1)
    neurons = hp.Int('neurons', min_value=5, max_value=10, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)
    bottleneck = Dense(encoding_dim, activation=activation, name='bottleneck')(x)
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

In [None]:
from kerastuner.tuners import Hyperband

# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=1, hidden_layers_after=1), #modify after getting the value from the summary
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning1__ss_phyy', project_name='model_config_1'
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=2, hidden_layers_after=2),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning2__ss_phyy', project_name='model_config_2'
)

In [None]:
# Run the tuning for configuration 1
tuner_config_1.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 1
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]


Trial 90 Complete [00h 00m 13s]
val_loss: 0.7671194076538086

Best val_loss So Far: 0.4129227101802826
Total elapsed time: 00h 10m 50s


In [None]:
# Run the tuning for configuration 2
tuner_config_2.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 2
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 18s]
val_loss: 0.30718353390693665

Best val_loss So Far: 0.3028760254383087
Total elapsed time: 00h 14m 13s


#### regularization

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 3, 'neurons': 8, 'learning_rate': 0.01, 'batch_size': 16, 'activation': 'relu', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0080'}

Test Loss: 0.4129227101802826
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step

Latent Feature Correlations:
[[1.         0.61635163 0.26750426]
 [0.61635163 1.         0.244703  ]
 [0.26750426 0.244703   1.        ]]


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_1.input, outputs=best_model_config_1.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_1.layers if 'dense_decoder' in l.name]:
    x = best_model_config_1.get_layer(layer_name)(x)
decoder_output = best_model_config_1.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
#calculate bottleneck contributions
def calculate_bottleneck_contributions(autoencoder, encoder, decoder, X):
    bottleneck_output = encoder.predict(X)
    contributions = []

    for dim in range(bottleneck_output.shape[1]):
        # Isolate one dimension at a time
        isolated_latent = np.zeros_like(bottleneck_output)
        isolated_latent[:, dim] = bottleneck_output[:, dim]

        # Reconstruct input using only the isolated latent dimension
        reconstructed = decoder.predict(isolated_latent)

        # Compute reconstruction loss (e.g., MSE)
        loss = mean_squared_error(X, reconstructed)
        contributions.append(loss)

    # Normalize contributions
    contributions = np.array(contributions)
    normalized_contributions = 1 - (contributions / np.sum(contributions))

    return normalized_contributions

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")



[1m1/7[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 46ms/step



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.6342
Latent Dimension 2: 0.7636
Latent Dimension 3: 0.6022


In [None]:
# Normalize the contributions so they sum to 1
def normalize_contributions(contributions):
    total = sum(contributions)
    return [c / total for c in contributions]

# Calculate contributions
#contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.3171
Latent Dimension 2: 0.3818
Latent Dimension 3: 0.3011
Sum of Normalized Contributions: 1.0


In [None]:
normalized_contributions

[0.3170869086499392, 0.38179098914070914, 0.3011221022093517]

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


In [None]:
latent_contributions = np.array(normalized_contributions)

# Calculate MI scores
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 3)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
latent_contributions

array([0.31708691, 0.38179099, 0.3011221 ])

In [None]:
normalized_mi_scores

array([[0.15011097, 0.10620553, 0.08672377],
       [0.18182473, 0.12467722, 0.11374404],
       [0.16756724, 0.11552894, 0.09927581],
       [0.14175388, 0.10779206, 0.09689962],
       [0.13270866, 0.0880148 , 0.04722598],
       [0.01480394, 0.03511203, 0.        ],
       [0.02852011, 0.03585211, 0.05467267],
       [0.07211318, 0.10658665, 0.24743823],
       [0.06023722, 0.15255766, 0.16701843],
       [0.02236246, 0.04132971, 0.05307585],
       [0.02799761, 0.08634328, 0.03392559]])

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("phy_AUTOENCODER_best_auto_config1_1207_with_weights_original_ss.xlsx", index=False)

config 2

In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 10, 'learning_rate': 0.01, 'batch_size': 64, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0067'}

Test Loss: 0.3028760254383087
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step

Latent Feature Correlations:
[[ 1.00000000e+00  6.93186445e-01  3.23213563e-01 -4.24927139e-01]
 [ 6.93186445e-01  1.00000000e+00 -3.69901700e-02 -5.60255071e-04]
 [ 3.23213563e-01 -3.69901700e-02  1.00000000e+00 -3.22042351e-01]
 [-4.24927139e-01 -5.60255071e-04 -3.22042351e-01  1.00000000e+00]]


In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_2.input, outputs=best_model_config_2.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_2.layers if 'dense_decoder' in l.name]:
    x = best_model_config_2.get_layer(layer_name)(x)
decoder_output = best_model_config_2.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_2, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.7465
Latent Dimension 2: 0.6657
Latent Dimension 3: 0.7904
Latent Dimension 4: 0.7974


In [None]:
# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2488
Latent Dimension 2: 0.2219
Latent Dimension 3: 0.2635
Latent Dimension 4: 0.2658
Sum of Normalized Contributions: 1.0


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


In [None]:
latent_contributions = np.array(normalized_contributions)

mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("phy_AUTOENCODER_best_auto_config2_1207_with_weights_original_ss.xlsx", index=False)

In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()

## phy minmaxscaler - (method 1)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
#data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
data = pd.read_excel('PHY_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:14].values
#X = data.iloc[:, 3:14].values

scaler = MinMaxScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Define the input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Build the encoder part
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # even numbers to ensure symmetry
neurons_options = [5, 6, 7, 8, 9, 10]
encoding_dims_options = [1, 2, 3, 4]
results = []


# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations for reference and show model summary
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()



Top two configurations:
Hidden layers: 2, Encoding dimension: 3, Neurons: 10, Test loss: 0.02716675028204918


Hidden layers: 4, Encoding dimension: 2, Neurons: 10, Test loss: 0.027219893410801888


In [None]:
from kerastuner import Hyperband
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Int('encoding_dim', min_value=1, max_value=4, step=1)
    neurons = hp.Int('neurons', min_value=5, max_value=10, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)
    bottleneck = Dense(encoding_dim, activation=activation, name='bottleneck')(x)
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

In [None]:
from kerastuner.tuners import Hyperband

# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=1, hidden_layers_after=1), #modify after getting the value from the summary
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning1__ss_phyyy', project_name='model_config_1'
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=2, hidden_layers_after=2),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning2__ss_phyyy', project_name='model_config_2'
)

In [None]:
# Run the tuning for configuration 1
tuner_config_1.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 1
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]


In [None]:
# Run the tuning for configuration 2
tuner_config_2.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 2
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 22s]
val_loss: 0.016422847285866737

Best val_loss So Far: 0.011913023889064789
Total elapsed time: 00h 23m 00s


#### regularization

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 64, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0068'}

Test Loss: 0.008999297395348549
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step

Latent Feature Correlations:
[[ 1.         -0.14579583 -0.61019016  0.3006241 ]
 [-0.14579583  1.          0.19292236 -0.12439314]
 [-0.61019016  0.19292236  1.         -0.57178489]
 [ 0.3006241  -0.12439314 -0.57178489  1.        ]]


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_1.input, outputs=best_model_config_1.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_1.layers if 'dense_decoder' in l.name]:
    x = best_model_config_1.get_layer(layer_name)(x)
decoder_output = best_model_config_1.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
#calculate bottleneck contributions
def calculate_bottleneck_contributions(autoencoder, encoder, decoder, X):
    bottleneck_output = encoder.predict(X)
    contributions = []

    for dim in range(bottleneck_output.shape[1]):
        # Isolate one dimension at a time
        isolated_latent = np.zeros_like(bottleneck_output)
        isolated_latent[:, dim] = bottleneck_output[:, dim]

        # Reconstruct input using only the isolated latent dimension
        reconstructed = decoder.predict(isolated_latent)

        # Compute reconstruction loss (e.g., MSE)
        loss = mean_squared_error(X, reconstructed)
        contributions.append(loss)

    # Normalize contributions
    contributions = np.array(contributions)
    normalized_contributions = 1 - (contributions / np.sum(contributions))

    return normalized_contributions

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.7220
Latent Dimension 2: 0.7774
Latent Dimension 3: 0.7903
Latent Dimension 4: 0.7103


In [None]:
# Normalize the contributions so they sum to 1
def normalize_contributions(contributions):
    total = sum(contributions)
    return [c / total for c in contributions]

# Calculate contributions
#contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2407
Latent Dimension 2: 0.2591
Latent Dimension 3: 0.2634
Latent Dimension 4: 0.2368
Sum of Normalized Contributions: 1.0


In [None]:
normalized_contributions

[0.24066080516128188,
 0.2591359219330363,
 0.2634226021785207,
 0.23678067072716114]

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


In [None]:
latent_contributions = np.array(normalized_contributions)  # Make sure this is normalized

# Calculate MI scores (as you have done)
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
latent_contributions

array([0.24066081, 0.25913592, 0.2634226 , 0.23678067])

In [None]:
normalized_mi_scores

array([[0.18709086, 0.07893099, 0.16996237, 0.16918856],
       [0.19971488, 0.09700679, 0.18052688, 0.15294079],
       [0.22112277, 0.08273629, 0.17458342, 0.1519596 ],
       [0.16840351, 0.12572497, 0.1456821 , 0.14345639],
       [0.1187352 , 0.09617221, 0.17313068, 0.23203735],
       [0.02784157, 0.03872178, 0.07508269, 0.01464302],
       [0.01541664, 0.03815085, 0.        , 0.01386905],
       [0.02127288, 0.12097229, 0.03524271, 0.04303438],
       [0.01545681, 0.25092877, 0.02915499, 0.04361451],
       [0.00310137, 0.03332321, 0.00759129, 0.02013456],
       [0.02184351, 0.03733186, 0.00904288, 0.01512179]])

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("phy_AUTOENCODER_best_auto_config1_1207_with_weights_original_mm.xlsx", index=False)

config 2

In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 9, 'learning_rate': 0.01, 'batch_size': 64, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0080'}

Test Loss: 0.011913023889064789
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step

Latent Feature Correlations:
[[ 1.          0.05646785  0.23932737 -0.60939367]
 [ 0.05646785  1.         -0.71114414  0.49033197]
 [ 0.23932737 -0.71114414  1.         -0.32713269]
 [-0.60939367  0.49033197 -0.32713269  1.        ]]


In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_2.input, outputs=best_model_config_2.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_2.layers if 'dense_decoder' in l.name]:
    x = best_model_config_2.get_layer(layer_name)(x)
decoder_output = best_model_config_2.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_2, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.7104
Latent Dimension 2: 0.6850
Latent Dimension 3: 0.8217
Latent Dimension 4: 0.7828


In [None]:
# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2368
Latent Dimension 2: 0.2283
Latent Dimension 3: 0.2739
Latent Dimension 4: 0.2609
Sum of Normalized Contributions: 0.9999999999999999


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


In [None]:
latent_contributions = np.array(normalized_contributions)  # Make sure this is normalized

mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("phy_AUTOENCODER_best_auto_config2_1207_with_weights_original_mm.xlsx", index=False)

In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()

## phy standardscaler - one dimension (method2)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
#data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
data = pd.read_excel('PHY_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:14].values
#X = data.iloc[:, 3:14].values

scaler = StandardScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Define the input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Build the encoder part
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # even numbers to ensure symmetry
neurons_options = [2, 3, 4, 5, 6, 7, 8, 9, 10]
encoding_dims_options = [1]
results = []


# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations for reference and show model summary
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 8, Encoding dimension: 1, Neurons: 7, Test loss: 0.8182411193847656


Hidden layers: 6, Encoding dimension: 1, Neurons: 9, Test loss: 0.8768022656440735


In [None]:
from kerastuner import Hyperband
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Choice('encoding_dim', [1]) # Only one encoding dimension
    neurons = hp.Int('neurons', min_value=2, max_value=10, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])


    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)
    bottleneck = Dense(encoding_dim, activation=activation, name='bottleneck')(x)
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

In [None]:
from kerastuner.tuners import Hyperband

# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=4, hidden_layers_after=4), #modify after getting the value from the summary
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning1___ss_onedim__phy', project_name='model_config_1'
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=3, hidden_layers_after=3),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning2___ss_onedim__phy', project_name='model_config_2'
)

In [None]:
# Run the tuning for configuration 1
tuner_config_1.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 1
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]


Trial 90 Complete [00h 00m 16s]
val_loss: 0.7532385587692261

Best val_loss So Far: 0.7532385587692261
Total elapsed time: 00h 13m 47s


In [None]:
# Run the tuning for configuration 2
tuner_config_2.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 2
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 18s]
val_loss: 1.3939491510391235

Best val_loss So Far: 0.7208415865898132
Total elapsed time: 00h 16m 14s


In [None]:
# Display the structure of the best model for configuration 1
print("Best Model Structure for Configuration 1:")
best_model_config_1.summary()

# Display the structure of the best model for configuration 2
print("\nBest Model Structure for Configuration 2:")
best_model_config_2.summary()

Best Model Structure for Configuration 1:



Best Model Structure for Configuration 2:


In [None]:
# Print details of each layer for the best model in configuration 1
print("Best Model for Configuration 1:")
for layer in best_model_config_1.layers:
    if hasattr(layer, 'output_shape'):
        output_shape = layer.output_shape
    else:
        output_shape = layer.get_output_shape_at(0) if hasattr(layer, 'get_output_shape_at') else 'N/A'
    print(f"Layer: {layer.name}, Type: {layer.__class__.__name__}, Output Shape: {output_shape}, Parameters: {layer.count_params()}")

# Print details of each layer for the best model in configuration 2
print("\nBest Model for Configuration 2:")
for layer in best_model_config_2.layers:
    if hasattr(layer, 'output_shape'):
        output_shape = layer.output_shape
    else:
        output_shape = layer.get_output_shape_at(0) if hasattr(layer, 'get_output_shape_at') else 'N/A'
    print(f"Layer: {layer.name}, Type: {layer.__class__.__name__}, Output Shape: {output_shape}, Parameters: {layer.count_params()}")

Best Model for Configuration 1:
Layer: input_layer, Type: InputLayer, Output Shape: N/A, Parameters: 0
Layer: dense_encoder_1, Type: Dense, Output Shape: N/A, Parameters: 108
Layer: dense_encoder_2, Type: Dense, Output Shape: N/A, Parameters: 90
Layer: dense_encoder_3, Type: Dense, Output Shape: N/A, Parameters: 90
Layer: dense_encoder_4, Type: Dense, Output Shape: N/A, Parameters: 90
Layer: bottleneck, Type: Dense, Output Shape: N/A, Parameters: 10
Layer: dense_decoder_1, Type: Dense, Output Shape: N/A, Parameters: 18
Layer: dense_decoder_2, Type: Dense, Output Shape: N/A, Parameters: 90
Layer: dense_decoder_3, Type: Dense, Output Shape: N/A, Parameters: 90
Layer: dense_decoder_4, Type: Dense, Output Shape: N/A, Parameters: 90
Layer: output_layer, Type: Dense, Output Shape: N/A, Parameters: 110

Best Model for Configuration 2:
Layer: input_layer, Type: InputLayer, Output Shape: N/A, Parameters: 0
Layer: dense_encoder_1, Type: Dense, Output Shape: N/A, Parameters: 60
Layer: dense_encod

#### regularization

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 1, 'neurons': 9, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'relu', 'tuner/epochs': 50, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}

Test Loss: 0.7532385587692261
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step

Latent Feature Correlations:
1.0


In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 1, 'neurons': 5, 'learning_rate': 0.01, 'batch_size': 64, 'activation': 'relu', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0048'}

Test Loss: 0.7208415865898132
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step

Latent Feature Correlations:
1.0


In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()

### past

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [None]:
bottleneck_output_1.shape

(262, 1)

In [None]:
bottleneck_output_2.shape

(262, 1)

In [None]:
from sklearn.feature_selection import mutual_info_regression
import numpy as np
import pandas as pd

In [None]:
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)


In [None]:
mi_scores

[array([1.15895093, 1.36564518, 1.53357726, 1.22633425, 0.67421123,
        0.16912867, 0.03180238, 0.09740421, 0.05325968, 0.09536   ,
        0.14736025])]

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 1)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Feature_11,Final_Index
0,1.422145,1.238987,1.045707,0.866859,0.838260,0.527166,-0.051061,0.269188,-0.468352,-0.245020,0.382816,1.021506
1,0.315920,0.237005,0.005098,-0.072202,0.838260,0.527166,-0.051061,-2.113301,-1.998620,0.501505,0.468065,0.162716
2,0.610645,0.358437,0.087198,-0.364051,0.429276,0.527166,-0.973680,0.115928,-0.085786,0.612690,0.151746,0.201373
3,0.298276,0.059938,0.011846,-0.188818,0.838260,0.527166,-2.357609,-0.692167,-0.946561,-0.014709,-0.243092,0.097427
4,-0.277470,-0.152986,-0.169371,-0.066970,0.838260,0.527166,0.410249,-0.023398,0.679348,0.501505,0.474795,-0.008135
...,...,...,...,...,...,...,...,...,...,...,...,...
257,-0.015433,0.105083,0.168623,0.272173,0.838260,0.527166,0.410249,0.561774,1.061915,-0.427680,0.447874,0.232236
258,-0.324691,-0.201620,-0.172660,-0.073533,0.838260,0.527166,0.410249,0.784697,1.061915,-0.427680,0.465821,-0.027222
259,-0.792791,-0.707826,-0.710287,-0.635037,0.838260,0.527166,0.410249,0.784697,1.061915,-0.427680,0.474795,-0.446198
260,2.301794,2.559217,2.569099,2.455888,0.838260,0.527166,0.410249,0.074130,1.061915,-0.427680,0.465821,2.117082


In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)
df.to_excel("phy_AUTOENCODER_best_auto_config1_ss_onedim_1212.xlsx", index=False)


In [None]:
mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)


In [None]:
mi_scores

[array([1.13909552, 1.37114031, 1.46665191, 1.19225679, 0.55994148,
        0.25554028, 0.06712827, 0.03922432, 0.18321614, 0.10683522,
        0.18610143])]

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 1)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Feature_11,Final_Index
0,1.422145,1.238987,1.045707,0.866859,0.838260,0.527166,-0.051061,0.269188,-0.468352,-0.245020,0.382816,0.983149
1,0.315920,0.237005,0.005098,-0.072202,0.838260,0.527166,-0.051061,-2.113301,-1.998620,0.501505,0.468065,0.136818
2,0.610645,0.358437,0.087198,-0.364051,0.429276,0.527166,-0.973680,0.115928,-0.085786,0.612690,0.151746,0.193866
3,0.298276,0.059938,0.011846,-0.188818,0.838260,0.527166,-2.357609,-0.692167,-0.946561,-0.014709,-0.243092,0.062834
4,-0.277470,-0.152986,-0.169371,-0.066970,0.838260,0.527166,0.410249,-0.023398,0.679348,0.501505,0.474795,0.006553
...,...,...,...,...,...,...,...,...,...,...,...,...
257,-0.015433,0.105083,0.168623,0.272173,0.838260,0.527166,0.410249,0.561774,1.061915,-0.427680,0.447874,0.241231
258,-0.324691,-0.201620,-0.172660,-0.073533,0.838260,0.527166,0.410249,0.784697,1.061915,-0.427680,0.465821,-0.013589
259,-0.792791,-0.707826,-0.710287,-0.635037,0.838260,0.527166,0.410249,0.784697,1.061915,-0.427680,0.474795,-0.422228
260,2.301794,2.559217,2.569099,2.455888,0.838260,0.527166,0.410249,0.074130,1.061915,-0.427680,0.465821,2.085709


In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)
df.to_excel("phy_AUTOENCODER_best_auto_config2_ss_onedim_1212.xlsx", index=False)

#print("File 'socioecon_AUTOENCODER.xlsx' has been saved.")


## phy minmaxscaler - one dimension (method 2)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
#data = pd.read_excel('ENV_Original_(NOT SHARED) FOR USE_2020.xlsx')
data = pd.read_excel('PHY_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:14].values
#X = data.iloc[:, 3:14].values

scaler = MinMaxScaler()
X = scaler.fit_transform(X_ori)

# Split the data into training and test sets
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Define the input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Build the encoder part
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # Only even numbers to ensure symmetry
neurons_options = [2, 3, 4, 5, 6, 7, 8, 9, 10]
encoding_dims_options = [1]
results = []


# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations for reference and show model summary
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 2, Encoding dimension: 1, Neurons: 4, Test loss: 0.0537392683327198


Hidden layers: 4, Encoding dimension: 1, Neurons: 10, Test loss: 0.054001584649086


In [None]:
from kerastuner import Hyperband
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Choice('encoding_dim', [1]) # Only one encoding dimension
    neurons = hp.Int('neurons', min_value=2, max_value=10, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])


    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)
    bottleneck = Dense(encoding_dim, activation=activation, name='bottleneck')(x)
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model

In [None]:
from kerastuner.tuners import Hyperband

# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=1, hidden_layers_after=1), #modify after getting the value from the summary
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning1___mm_onedim___phy', project_name='model_config_1'
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=2, hidden_layers_after=2),
    objective='val_loss', max_epochs=50, factor=3, directory='hyperparam_tuning2___mm_onedim___phy', project_name='model_config_2'
)

In [None]:
# Run the tuning for configuration 1
tuner_config_1.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 1
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]


Trial 90 Complete [00h 00m 19s]
val_loss: 0.05940530449151993

Best val_loss So Far: 0.027111899107694626
Total elapsed time: 00h 20m 40s


In [None]:
# Run the tuning for configuration 2
tuner_config_2.search(X_train, X_train, epochs=50, validation_data=(X_test, X_test))

# Get the best model for configuration 2
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 22s]
val_loss: 0.07412304729223251

Best val_loss So Far: 0.021431969478726387
Total elapsed time: 00h 24m 16s


In [None]:
# Display the structure of the best model for configuration 1
print("Best Model Structure for Configuration 1:")
best_model_config_1.summary()

# Display the structure of the best model for configuration 2
print("\nBest Model Structure for Configuration 2:")
best_model_config_2.summary()

Best Model Structure for Configuration 1:



Best Model Structure for Configuration 2:


In [None]:
# Print details of each layer for the best model in configuration 1
print("Best Model for Configuration 1:")
for layer in best_model_config_1.layers:
    if hasattr(layer, 'output_shape'):
        output_shape = layer.output_shape
    else:
        output_shape = layer.get_output_shape_at(0) if hasattr(layer, 'get_output_shape_at') else 'N/A'
    print(f"Layer: {layer.name}, Type: {layer.__class__.__name__}, Output Shape: {output_shape}, Parameters: {layer.count_params()}")

# Print details of each layer for the best model in configuration 2
print("\nBest Model for Configuration 2:")
for layer in best_model_config_2.layers:
    if hasattr(layer, 'output_shape'):
        output_shape = layer.output_shape
    else:
        output_shape = layer.get_output_shape_at(0) if hasattr(layer, 'get_output_shape_at') else 'N/A'
    print(f"Layer: {layer.name}, Type: {layer.__class__.__name__}, Output Shape: {output_shape}, Parameters: {layer.count_params()}")

Best Model for Configuration 1:
Layer: input_layer, Type: InputLayer, Output Shape: N/A, Parameters: 0
Layer: dense_encoder_1, Type: Dense, Output Shape: N/A, Parameters: 84
Layer: bottleneck, Type: Dense, Output Shape: N/A, Parameters: 8
Layer: dense_decoder_1, Type: Dense, Output Shape: N/A, Parameters: 14
Layer: output_layer, Type: Dense, Output Shape: N/A, Parameters: 88

Best Model for Configuration 2:
Layer: input_layer, Type: InputLayer, Output Shape: N/A, Parameters: 0
Layer: dense_encoder_1, Type: Dense, Output Shape: N/A, Parameters: 72
Layer: dense_encoder_2, Type: Dense, Output Shape: N/A, Parameters: 42
Layer: bottleneck, Type: Dense, Output Shape: N/A, Parameters: 7
Layer: dense_decoder_1, Type: Dense, Output Shape: N/A, Parameters: 12
Layer: dense_decoder_2, Type: Dense, Output Shape: N/A, Parameters: 42
Layer: output_layer, Type: Dense, Output Shape: N/A, Parameters: 77


#### check

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 1, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 16, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}

Test Loss: 0.027111899107694626
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step 

Latent Feature Correlations:
1.0


In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 1, 'neurons': 6, 'learning_rate': 0.01, 'batch_size': 64, 'activation': 'tanh', 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}





Test Loss: 0.021431969478726387
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step

Latent Feature Correlations:
1.0


#### past

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


In [None]:
bottleneck_output_1.shape

(262, 1)

In [None]:
bottleneck_output_2.shape

(262, 1)

In [None]:
from sklearn.feature_selection import mutual_info_regression
import numpy as np
import pandas as pd

In [None]:
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)


In [None]:
mi_scores

[array([1.39135916, 1.70033225, 1.8827586 , 1.40031301, 1.20366501,
        0.17064396, 0.        , 0.1067879 , 0.13453293, 0.05305557,
        0.06381418])]

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 1)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Feature_11,Final_Index
0,0.704410,0.551026,0.450268,0.353767,1.000000,1.0,0.933333,0.947814,0.764706,0.023174,0.987710,0.604742
1,0.352600,0.267425,0.183437,0.132574,1.000000,1.0,0.933333,0.706629,0.529412,0.117884,0.999101,0.378342
2,0.446331,0.301795,0.204489,0.063830,0.833333,1.0,0.800000,0.932299,0.823529,0.131990,0.956835,0.377520
3,0.346989,0.217308,0.185168,0.105105,1.000000,1.0,0.600000,0.850494,0.691176,0.052393,0.904077,0.365928
4,0.163886,0.157042,0.138701,0.133806,1.000000,1.0,1.000000,0.918195,0.941176,0.117884,1.000000,0.322255
...,...,...,...,...,...,...,...,...,...,...,...,...
257,0.247221,0.230086,0.225368,0.213690,1.000000,1.0,1.000000,0.977433,1.000000,0.000000,0.996403,0.386758
258,0.148869,0.143276,0.137857,0.132260,1.000000,1.0,1.000000,1.000000,1.000000,0.000000,0.998801,0.317601
259,0.000000,0.000000,0.000000,0.000000,1.000000,1.0,1.000000,1.000000,1.000000,0.000000,1.000000,0.207153
260,0.984162,0.924704,0.840892,0.728057,1.000000,1.0,1.000000,0.928068,1.000000,0.000000,0.998801,0.890068


In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)
df.to_excel("phy_AUTOENCODER_best_auto_config1_mm_onedim_1212.xlsx", index=False)

print("File 'env_AUTOENCODER.xlsx' has been saved.")


File 'env_AUTOENCODER.xlsx' has been saved.


In [None]:
mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)


In [None]:
mi_scores

[array([1.47631129, 1.70368848, 1.7877181 , 1.35360755, 1.31744797,
        0.16748042, 0.0208115 , 0.06270383, 0.11764839, 0.08529115,
        0.06974548])]

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 1)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Feature_11,Final_Index
0,0.704410,0.551026,0.450268,0.353767,1.000000,1.0,0.933333,0.947814,0.764706,0.023174,0.987710,0.610984
1,0.352600,0.267425,0.183437,0.132574,1.000000,1.0,0.933333,0.706629,0.529412,0.117884,0.999101,0.388881
2,0.446331,0.301795,0.204489,0.063830,0.833333,1.0,0.800000,0.932299,0.823529,0.131990,0.956835,0.384737
3,0.346989,0.217308,0.185168,0.105105,1.000000,1.0,0.600000,0.850494,0.691176,0.052393,0.904077,0.374320
4,0.163886,0.157042,0.138701,0.133806,1.000000,1.0,1.000000,0.918195,0.941176,0.117884,1.000000,0.329854
...,...,...,...,...,...,...,...,...,...,...,...,...
257,0.247221,0.230086,0.225368,0.213690,1.000000,1.0,1.000000,0.977433,1.000000,0.000000,0.996403,0.392442
258,0.148869,0.143276,0.137857,0.132260,1.000000,1.0,1.000000,1.000000,1.000000,0.000000,0.998801,0.324058
259,0.000000,0.000000,0.000000,0.000000,1.000000,1.0,1.000000,1.000000,1.000000,0.000000,1.000000,0.215111
260,0.984162,0.924704,0.840892,0.728057,1.000000,1.0,1.000000,0.928068,1.000000,0.000000,0.998801,0.890463


In [None]:
# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)
df.to_excel("phy_AUTOENCODER_best_auto_config2_mm_onedim_1212.xlsx", index=False)

print("File 'socioecon_AUTOENCODER.xlsx' has been saved.")


File 'socioecon_AUTOENCODER.xlsx' has been saved.


## different numbers of nerons with regularization techiniques (alpha tuned) - minmax scaler (Dec. 5) varaince (method 3)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
data = pd.read_excel('PHY_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:14].values

scaler = MinMaxScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Define the input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Build the encoder part
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # even numbers to ensure symmetry
neurons_options = [5, 6, 7, 8, 9, 10]
encoding_dims_options = [1, 2, 3, 4]
results = []

# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations for reference and show model summary
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 2, Encoding dimension: 3, Neurons: 10, Test loss: 0.027862876653671265


Hidden layers: 4, Encoding dimension: 3, Neurons: 8, Test loss: 0.030561473220586777


add orthogonal regularization (w/ alpha tuned)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

# Custom layer for orthogonal regularization
class OrthogonalRegularization(Layer):
    def __init__(self, alpha=1e-2, **kwargs):
        super(OrthogonalRegularization, self).__init__(**kwargs)
        self.alpha = alpha

    def call(self, bottleneck_output):
        # Get the batch size
        batch_size = tf.cast(tf.shape(bottleneck_output)[0], tf.float32)

        # Normalize the bottleneck outputs
        normalized_output = bottleneck_output / tf.sqrt(batch_size)

        # Compute correlation matrix
        correlation = tf.matmul(
            tf.transpose(normalized_output),
            normalized_output
        )

        # Create identity matrix of the correct shape
        shape = tf.shape(correlation)[0]
        identity = tf.eye(shape)

        # Calculate loss (excluding diagonal elements)
        mask = tf.ones_like(correlation) - tf.eye(shape)
        loss = tf.reduce_sum(tf.square(correlation * mask))

        # Add loss to the layer
        self.add_loss(self.alpha * loss)

        return bottleneck_output


In [None]:
def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Int('encoding_dim', min_value=1, max_value=4, step=1)
    neurons = hp.Int('neurons', min_value=5, max_value=7, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # alpha tuned
    alpha = hp.Choice('alpha', [1e-3, 1e-2, 1e-1, 1.0])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer

    # Encoder layers
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer with orthogonal regularization
    bottleneck = Dense(
        encoding_dim,
        activation=activation,
        kernel_constraint=tf.keras.constraints.UnitNorm(axis=0),
        name='bottleneck'
    )(x)

    # Apply orthogonal regularization
    bottleneck = OrthogonalRegularization(alpha=alpha)(bottleneck) #modified alpha for tunin

    # Decoder layers
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)

    output_layer = Dense(input_dim, name='output_layer')(x)

    # Create and compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mse'  # Using standard MSE loss now
    )

    return model

In [None]:
from kerastuner.tuners import Hyperband
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=1, hidden_layers_after=1),
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='hyperparam_tuning_1_ortho_mm_al__c',
    project_name='model_config_1',
    overwrite=True
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=2, hidden_layers_after=2),
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='hyperparam_tuning_2_ortho_mm_al__c',
    project_name='model_config_2',
    overwrite=True
)

In [None]:
# Define callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        min_delta=1e-4
    )
]


In [None]:
# Run the tuning
tuner_config_1.search(
    X_train,
    X_train,
    epochs=50,
    validation_data=(X_test, X_test),
    callbacks=callbacks,
    batch_size=32,
    verbose=1
)

Trial 90 Complete [00h 00m 14s]
val_loss: 0.12543334066867828

Best val_loss So Far: 0.009500496089458466
Total elapsed time: 00h 12m 16s


In [None]:
# Run the tuning
tuner_config_2.search(
    X_train,
    X_train,
    epochs=50,
    validation_data=(X_test, X_test),
    callbacks=callbacks,
    batch_size=32,
    verbose=1
)

Trial 90 Complete [00h 00m 17s]
val_loss: 0.9736601710319519

Best val_loss So Far: 0.010663064196705818
Total elapsed time: 00h 15m 12s


#### regularization

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 6, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'alpha': 0.1, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0046'}

Test Loss: 0.009500496089458466
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step

Latent Feature Correlations:
[[ 1.         -0.19774729 -0.22529448 -0.22059147]
 [-0.19774729  1.         -0.22702935 -0.25515636]
 [-0.22529448 -0.22702935  1.         -0.39376357]
 [-0.22059147 -0.25515636 -0.39376357  1.        ]]


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_1.input, outputs=best_model_config_1.get_layer('bottleneck').output)

# Create a decoder model
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_1.layers if 'dense_decoder' in l.name]:
    x = best_model_config_1.get_layer(layer_name)(x)
decoder_output = best_model_config_1.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
#calculate bottleneck contributions
def calculate_bottleneck_contributions(autoencoder, encoder, decoder, X):
    bottleneck_output = encoder.predict(X)
    contributions = []

    for dim in range(bottleneck_output.shape[1]):
        # Isolate one dimension at a time
        isolated_latent = np.zeros_like(bottleneck_output)
        isolated_latent[:, dim] = bottleneck_output[:, dim]

        # Reconstruct input using only the isolated latent dimension
        reconstructed = decoder.predict(isolated_latent)

        # Compute reconstruction loss (e.g., MSE)
        loss = mean_squared_error(X, reconstructed)
        contributions.append(loss)

    # Normalize contributions
    contributions = np.array(contributions)
    normalized_contributions = 1 - (contributions / np.sum(contributions))

    return normalized_contributions

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.6968
Latent Dimension 2: 0.7021
Latent Dimension 3: 0.8087
Latent Dimension 4: 0.7924


In [None]:
# Normalize the contributions so they sum to 1
def normalize_contributions(contributions):
    total = sum(contributions)
    return [c / total for c in contributions]

# Calculate contributions
#contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2323
Latent Dimension 2: 0.2340
Latent Dimension 3: 0.2696
Latent Dimension 4: 0.2641
Sum of Normalized Contributions: 1.0


In [None]:
normalized_contributions

[0.23226851590249065,
 0.23404557968159465,
 0.26956299283523394,
 0.2641229115806808]

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)

# Calculate MI scores
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
latent_contributions

array([0.23226852, 0.23404558, 0.26956299, 0.26412291])

In [None]:
normalized_mi_scores

array([[0.1596233 , 0.13161635, 0.15535866, 0.16200158],
       [0.1503593 , 0.12102914, 0.17175481, 0.19828505],
       [0.15780151, 0.12425749, 0.17365915, 0.22259092],
       [0.13646731, 0.09453498, 0.13533439, 0.17933515],
       [0.19836023, 0.0913986 , 0.11597946, 0.1386123 ],
       [0.01436097, 0.20855986, 0.08388647, 0.02101867],
       [0.01133257, 0.02610377, 0.02004228, 0.00128214],
       [0.03617198, 0.09376921, 0.04179575, 0.01155492],
       [0.09822314, 0.06815291, 0.06178498, 0.0185723 ],
       [0.03729968, 0.02066363, 0.03073145, 0.02861931],
       [0.        , 0.01991406, 0.00967259, 0.01812766]])

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("phy_AUTOENCODER_best_auto_config1_1207_with_weights_mm.xlsx", index=False)

config 2

In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 6, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'alpha': 0.01, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0048'}

Test Loss: 0.010663064196705818
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step

Latent Feature Correlations:
[[ 1.         -0.00318444 -0.49109656 -0.40318678]
 [-0.00318444  1.          0.34796365 -0.32661871]
 [-0.49109656  0.34796365  1.         -0.37609145]
 [-0.40318678 -0.32661871 -0.37609145  1.        ]]


In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_2.input, outputs=best_model_config_2.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_2.layers if 'dense_decoder' in l.name]:
    x = best_model_config_2.get_layer(layer_name)(x)
decoder_output = best_model_config_2.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_2, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.8789
Latent Dimension 2: 0.7035
Latent Dimension 3: 0.7183
Latent Dimension 4: 0.6993


In [None]:
# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2930
Latent Dimension 2: 0.2345
Latent Dimension 3: 0.2394
Latent Dimension 4: 0.2331
Sum of Normalized Contributions: 1.0


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)
mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("phy_AUTOENCODER_best_auto_config2_1207_with_weights_mm.xlsx", index=False)

In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()

## different numbers of nerons with regularization techiniques (alpha tuned) - standard scaler - varaince - (method 3)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mutual_info_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from itertools import product
from sklearn.feature_selection import mutual_info_regression
from kerastuner import HyperModel, RandomSearch

In [None]:
data = pd.read_excel('PHY_Original_(NOT SHARED) FOR USE_2020.xlsx')
X_ori = data.iloc[:, 3:14].values

scaler = StandardScaler()
X = scaler.fit_transform(X_ori)

# Split
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

def create_autoencoder(input_dim, encoding_dim, hidden_layers_before, hidden_layers_after, neurons_before, neurons_after):
    # Define the input layer
    input_layer = Input(shape=(input_dim,), name='input_layer')

    # Build the encoder part
    x = input_layer
    for i in range(hidden_layers_before):
        x = Dense(neurons_before, activation='relu', name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer (encoding layer)
    bottleneck = Dense(encoding_dim, activation='relu', name='bottleneck')(x)

    # Build the decoder part
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons_after, activation='relu', name=f'dense_decoder_{i+1}')(x)

    # Output layer, with the same dimension as the input
    output_layer = Dense(input_dim, name='output_layer')(x)

    # Define the complete autoencoder model
    autoencoder = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')

    return autoencoder

In [None]:
# Initial tuning
hidden_layers_options = [2, 4, 6, 8]  # even numbers to ensure symmetry
neurons_options = [5, 6, 7, 8, 9, 10]
encoding_dims_options = [1, 2, 3, 4]
results = []

# Iterate through each combination of hyperparameters
for hidden_layers in hidden_layers_options:
    hidden_layers_before = hidden_layers // 2
    hidden_layers_after = hidden_layers // 2

    for neurons in neurons_options:
        for encoding_dim in encoding_dims_options:
            autoencoder = create_autoencoder(
                input_dim=X_train.shape[1],
                encoding_dim=encoding_dim,
                hidden_layers_before=hidden_layers_before,
                hidden_layers_after=hidden_layers_after,
                neurons_before=neurons,
                neurons_after=neurons
            )
            autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')
            history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, X_test))

            # Calculate average loss on the test set
            test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)
            results.append((hidden_layers, encoding_dim, neurons, test_loss))

# Sort results by the lowest test loss and select the best two configurations
sorted_results = sorted(results, key=lambda x: x[3])
best_configs = sorted_results[:2]

# Print best configurations for reference and show model summary
print("Top two configurations:")
for config in best_configs:
    hidden_layers, encoding_dim, neurons, test_loss = config
    print(f"Hidden layers: {hidden_layers}, Encoding dimension: {encoding_dim}, Neurons: {neurons}, Test loss: {test_loss}")

    # Recreate and display the model summary for the best configurations
    autoencoder = create_autoencoder(
        input_dim=X_train.shape[1],
        encoding_dim=encoding_dim,
        hidden_layers_before=hidden_layers // 2,
        hidden_layers_after=hidden_layers // 2,
        neurons_before=neurons,
        neurons_after=neurons
    )
    autoencoder.summary()

Top two configurations:
Hidden layers: 4, Encoding dimension: 4, Neurons: 9, Test loss: 0.5923401117324829


Hidden layers: 8, Encoding dimension: 2, Neurons: 10, Test loss: 0.6137497425079346


add orthogonal regularization (w/ alpha tuned)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

# Custom layer for orthogonal regularization
class OrthogonalRegularization(Layer):
    def __init__(self, alpha=1e-2, **kwargs):
        super(OrthogonalRegularization, self).__init__(**kwargs)
        self.alpha = alpha

    def call(self, bottleneck_output):
        # Get the batch size
        batch_size = tf.cast(tf.shape(bottleneck_output)[0], tf.float32)

        # Normalize the bottleneck outputs
        normalized_output = bottleneck_output / tf.sqrt(batch_size)

        # Compute correlation matrix
        correlation = tf.matmul(
            tf.transpose(normalized_output),
            normalized_output
        )

        # Create identity matrix of the correct shape
        shape = tf.shape(correlation)[0]
        identity = tf.eye(shape)

        # Calculate loss (excluding diagonal elements)
        mask = tf.ones_like(correlation) - tf.eye(shape)
        loss = tf.reduce_sum(tf.square(correlation * mask))

        # Add loss to the layer
        self.add_loss(self.alpha * loss)

        return bottleneck_output


In [None]:
def build_model(hp, input_dim, hidden_layers_before, hidden_layers_after):
    # Hyperparameters to tune
    encoding_dim = hp.Int('encoding_dim', min_value=1, max_value=4, step=1)
    neurons = hp.Int('neurons', min_value=5, max_value=7, step=1)
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    batch_size = hp.Choice('batch_size', [16, 32, 64])
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

    # alpha tuned
    alpha = hp.Choice('alpha', [1e-3, 1e-2, 1e-1, 1.0])

    # Build the model structure
    input_layer = Input(shape=(input_dim,), name='input_layer')
    x = input_layer

    # Encoder layers
    for i in range(hidden_layers_before):
        x = Dense(neurons, activation=activation, name=f'dense_encoder_{i+1}')(x)

    # Bottleneck layer with orthogonal regularization
    bottleneck = Dense(
        encoding_dim,
        activation=activation,
        kernel_constraint=tf.keras.constraints.UnitNorm(axis=0),
        name='bottleneck'
    )(x)

    # Apply orthogonal regularization
    bottleneck = OrthogonalRegularization(alpha=alpha)(bottleneck) #modified alpha for tunin

    # Decoder layers
    x = bottleneck
    for i in range(hidden_layers_after):
        x = Dense(neurons, activation=activation, name=f'dense_decoder_{i+1}')(x)

    output_layer = Dense(input_dim, name='output_layer')(x)

    # Create and compile model
    model = Model(inputs=input_layer, outputs=output_layer, name='autoencoder_model')
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mse'  # Using standard MSE loss
    )

    return model

In [None]:
from kerastuner.tuners import Hyperband
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Define search space for each configuration
tuner_config_1 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=2, hidden_layers_after=2),
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='hyperparam_tuning_1_ortho_mm_al__e',
    project_name='model_config_1',
    overwrite=True
)

tuner_config_2 = Hyperband(
    lambda hp: build_model(hp, input_dim=11, hidden_layers_before=4, hidden_layers_after=4),
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='hyperparam_tuning_2_ortho_mm_al__e',
    project_name='model_config_2',
    overwrite=True
)

In [None]:
# Define callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        min_delta=1e-4
    )
]


In [None]:
# Run the tuning
tuner_config_1.search(
    X_train,
    X_train,
    epochs=50,
    validation_data=(X_test, X_test),
    callbacks=callbacks,
    batch_size=32,
    verbose=1
)

Trial 88 Complete [00h 00m 28s]
val_loss: 0.9597678780555725

Best val_loss So Far: 0.41087406873703003
Total elapsed time: 00h 28m 59s


In [None]:
# Run the tuning
tuner_config_2.search(
    X_train,
    X_train,
    epochs=50,
    validation_data=(X_test, X_test),
    callbacks=callbacks,
    batch_size=32,
    verbose=1
)

Trial 90 Complete [00h 00m 24s]
val_loss: 1.3942735195159912

Best val_loss So Far: 0.4613499641418457
Total elapsed time: 00h 34m 52s


#### regularization

In [None]:
# Get the best model
best_model_config_1 = tuner_config_1.get_best_models(num_models=1)[0]
best_hp_config_1 = tuner_config_1.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_1.values)

# Evaluate final model
evaluation = best_model_config_1.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_1.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 4, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'alpha': 0.001, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0068'}

Test Loss: 0.41087406873703003
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step

Latent Feature Correlations:
[[ 1.          0.61159197  0.18047643  0.31910098]
 [ 0.61159197  1.         -0.08225657  0.10477074]
 [ 0.18047643 -0.08225657  1.          0.31985434]
 [ 0.31910098  0.10477074  0.31985434  1.        ]]


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer

In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_1.input, outputs=best_model_config_1.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_1.layers if 'dense_decoder' in l.name]:
    x = best_model_config_1.get_layer(layer_name)(x)
decoder_output = best_model_config_1.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)


In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
#calculate bottleneck contributions
def calculate_bottleneck_contributions(autoencoder, encoder, decoder, X):
    bottleneck_output = encoder.predict(X)
    contributions = []

    for dim in range(bottleneck_output.shape[1]):
        # Isolate one dimension at a time
        isolated_latent = np.zeros_like(bottleneck_output)
        isolated_latent[:, dim] = bottleneck_output[:, dim]

        # Reconstruct input using only the isolated latent dimension
        reconstructed = decoder.predict(isolated_latent)

        # Compute reconstruction loss (e.g., MSE)
        loss = mean_squared_error(X, reconstructed)
        contributions.append(loss)

    # Normalize contributions
    contributions = np.array(contributions)
    normalized_contributions = 1 - (contributions / np.sum(contributions))

    return normalized_contributions

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.6553
Latent Dimension 2: 0.7902
Latent Dimension 3: 0.7955
Latent Dimension 4: 0.7589


In [None]:
# Normalize the contributions so they sum to 1
def normalize_contributions(contributions):
    total = sum(contributions)
    return [c / total for c in contributions]

# Calculate contributions
#contributions = calculate_bottleneck_contributions(best_model_config_1, encoder, decoder, X_train)

# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.2184
Latent Dimension 2: 0.2634
Latent Dimension 3: 0.2652
Latent Dimension 4: 0.2530
Sum of Normalized Contributions: 1.0


In [None]:
normalized_contributions

[0.2184325019225112,
 0.26341458567645337,
 0.265182970730931,
 0.2529699416701044]

In [None]:
bottleneck_layer_config_1 = best_model_config_1.get_layer('bottleneck').output
encoder_model_config_1 = Model(inputs=best_model_config_1.input, outputs=bottleneck_layer_config_1)
bottleneck_output_1 = encoder_model_config_1.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


In [None]:
latent_contributions = np.array(normalized_contributions)

# Calculate MI scores
mi_scores = []
for i in range(bottleneck_output_1.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_1[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 4)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_1.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_1.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
latent_contributions

array([0.2184325 , 0.26341459, 0.26518297, 0.25296994])

In [None]:
normalized_mi_scores

array([[0.10606157, 0.09634898, 0.17945723, 0.13567724],
       [0.12942467, 0.10384413, 0.20002442, 0.1417229 ],
       [0.1291228 , 0.09025443, 0.19463227, 0.15097852],
       [0.13412935, 0.09413204, 0.16416404, 0.13318749],
       [0.04649303, 0.05339513, 0.12106318, 0.03450572],
       [0.19189326, 0.10638984, 0.01039195, 0.05152554],
       [0.01365389, 0.04501863, 0.00886178, 0.02335213],
       [0.08929449, 0.15823965, 0.01466093, 0.08602156],
       [0.07489767, 0.14917043, 0.00864741, 0.07757109],
       [0.03141728, 0.0545653 , 0.03274692, 0.12306305],
       [0.053612  , 0.04864145, 0.06534987, 0.04239477]])

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("phy_AUTOENCODER_best_auto_config1_1207_with_weights_ss.xlsx", index=False)

config 2

In [None]:
# Get the best model
best_model_config_2 = tuner_config_2.get_best_models(num_models=1)[0]
best_hp_config_2 = tuner_config_2.get_best_hyperparameters(num_trials=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Print best hyperparameters
print("\nBest Hyperparameters:")
print(best_hp_config_2.values)

# Evaluate final model
evaluation = best_model_config_2.evaluate(X_test, X_test, verbose=0)
print(f"\nTest Loss: {evaluation}")

# Extract and analyze bottleneck features
bottleneck_layer = best_model_config_2.get_layer('bottleneck').output
encoder = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer)
bottleneck_features = encoder.predict(X_test)

# Check correlation between latent features
correlation_matrix = np.corrcoef(bottleneck_features.T)
print("\nLatent Feature Correlations:")
print(correlation_matrix)


Best Hyperparameters:
{'encoding_dim': 3, 'neurons': 7, 'learning_rate': 0.01, 'batch_size': 32, 'activation': 'tanh', 'alpha': 0.1, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0047'}

Test Loss: 0.4613499641418457
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 251ms/step

Latent Feature Correlations:
[[ 1.          0.1164756   0.41462969]
 [ 0.1164756   1.         -0.35532778]
 [ 0.41462969 -0.35532778  1.        ]]


In [None]:
# Extract encoder and decoder from the trained autoencoder
encoder = Model(inputs=best_model_config_2.input, outputs=best_model_config_2.get_layer('bottleneck').output)

# Create a decoder model (assuming symmetric architecture)
bottleneck_input = tf.keras.Input(shape=(encoder.output.shape[1],))
x = bottleneck_input
for layer_name in [l.name for l in best_model_config_2.layers if 'dense_decoder' in l.name]:
    x = best_model_config_2.get_layer(layer_name)(x)
decoder_output = best_model_config_2.get_layer('output_layer')(x)
decoder = Model(inputs=bottleneck_input, outputs=decoder_output)

In [None]:
# Calculate contributions
contributions = calculate_bottleneck_contributions(best_model_config_2, encoder, decoder, X_train)

# Print contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.7410
Latent Dimension 2: 0.5734
Latent Dimension 3: 0.6856


In [None]:
# Normalize contributions
normalized_contributions = normalize_contributions(contributions)

# Print normalized contributions
print("Normalized Contributions of Each Bottleneck Dimension:")
for i, c in enumerate(normalized_contributions):
    print(f"Latent Dimension {i+1}: {c:.4f}")

# Verify they sum to 1
print("Sum of Normalized Contributions:", sum(normalized_contributions))

Normalized Contributions of Each Bottleneck Dimension:
Latent Dimension 1: 0.3705
Latent Dimension 2: 0.2867
Latent Dimension 3: 0.3428
Sum of Normalized Contributions: 1.0


In [None]:
bottleneck_layer_config_2 = best_model_config_2.get_layer('bottleneck').output
encoder_model_config_2 = Model(inputs=best_model_config_2.input, outputs=bottleneck_layer_config_2)
bottleneck_output_2 = encoder_model_config_2.predict(X)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [None]:
latent_contributions = np.array(normalized_contributions)  # Make sure this is normalized

mi_scores = []
for i in range(bottleneck_output_2.shape[1]):
    mi = mutual_info_regression(X, bottleneck_output_2[:, i], random_state=42)
    mi_scores.append(mi)

In [None]:
mi_scores = np.array(mi_scores).T  # Transpose to (features, bottleneck_dim)
mi_scores.shape

(11, 3)

In [None]:
# Normalize MI scores per bottleneck dimension
normalized_mi_scores = mi_scores / np.sum(mi_scores, axis=0)

# Initialize an array to store weighted values
weighted_values = np.zeros((X.shape[0], X.shape[1], bottleneck_output_2.shape[1]))

# Multiply MI scores by latent space contributions for each bottleneck dimension
for dim in range(bottleneck_output_2.shape[1]):
    weighted_values[:, :, dim] = X * normalized_mi_scores[:, dim] * latent_contributions[dim]

In [None]:
# Sum across bottleneck dimensions for a single weighted value per feature
summed_features = np.sum(weighted_values, axis=2)

# Sum across features to get the final index
final_index = np.sum(summed_features, axis=1)

# Reshape and append final index as a new column in X
final_index_column = final_index.reshape(-1, 1)
X_with_index = np.hstack((X, final_index_column))

# Create DataFrame and export to Excel
column_names = [f"Feature_{i+1}" for i in range(X.shape[1])] + ["Final_Index"]
df = pd.DataFrame(X_with_index, columns=column_names)

In [None]:
# Export to Excel
df.to_excel("phy_AUTOENCODER_best_auto_config2_1207_with_weights_ss.xlsx", index=False)

In [None]:
best_model_config_1.summary()

In [None]:
best_model_config_2.summary()