## Inverse Design usign Gradient Opt
---

We loaded test-set (2500 sample), the same one as we load to evaluate the forward model. In which, we define with variable name `df_test`. 

However, in this tutorial we store the first 100 samples in variable name `df_test_100`, and run the optimization only on these samples. Therefore, in order to produce results likely similar as in our paper. We recommed to use `df_test` (2500 samples) in loop process. 

`for idx, row in df_test.iterrows():`

## Import modules

In [2]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import time
import pymiecs
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from wgangp_model import (
    WGAN_GP,
    load_generator,
    generate_synthetic_data,
    inverse_transform_synthetic_data,
)

## Set dynamic GPU memory growth

In [3]:
# Check if GPU available
gpus = tf.config.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU is available and set to memory growth mode.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected by TensorFlow.")

GPU is available and set to memory growth mode.


### Define the own resblock class

keras requires custom classes to be defined for being able to reload

In [4]:
# decorator to register the custom resblock to allow serialziation and re-loading
@keras.utils.register_keras_serializable()  # for keras3
class ResBlock1D(keras.Model):
    def __init__(self, filters, kernel_size=3, convblock=False, **kwargs):
        super(ResBlock1D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size

        # setup all necessary layers
        self.conv1 = keras.layers.Conv1D(filters, kernel_size, padding="same")
        self.bn1 = keras.layers.BatchNormalization()

        self.conv2 = keras.layers.Conv1D(filters, kernel_size, padding="same")
        self.bn2 = keras.layers.BatchNormalization()

        # self.relu = keras.layers.LeakyReLU()
        self.relu = keras.layers.LeakyReLU(negative_slope=0.01)

        self.convblock = convblock
        if self.convblock:
            self.conv_shortcut = keras.layers.Conv1D(filters, 1)

    def call(self, input_tensor, training=False):
        x = self.conv1(input_tensor)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)

        # add shortcut. optionally pass it through a Conv
        if self.convblock:
            x_sc = self.conv_shortcut(input_tensor)
        else:
            x_sc = input_tensor
        x += x_sc
        return self.relu(x)

    def get_config(self):
        base_config = super().get_config()
        return {
            "convblock": self.convblock,
            "filters": self.filters,
            "kernel_size": self.kernel_size,
            **base_config,
        }

## Reload the forward and wgangp model

In [5]:
forward_path = "models/resnet_Mie_predictor.keras"
wgangp_path = "models/wgangp_generator.h5"

forward_model = keras.models.load_model(forward_path)
generator = load_generator(wgangp_path)

I0000 00:00:1742371994.403003 3022383 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 19690 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:21:00.0, compute capability: 8.9


## Relaod the data scaler

In [6]:
# Load the preprocessors and scalers
preprocessor_path = "datasets/scaler_particle_geometries.pkl"
scaler_Qfwd_path = "datasets/scaler_Qfwd.pkl"
scaler_Qback_path = "datasets/scaler_Qback.pkl"

# Load the preprocessors and scalers
with open(preprocessor_path, "rb") as f:
    preprocessor = pickle.load(f)
with open(scaler_Qfwd_path, "rb") as f:
    scaler_Qfwd = pickle.load(f)
with open(scaler_Qback_path, "rb") as f:
    scaler_Qback = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## Reload Test data 

In [7]:
hdf5_df_file = "datasets/core_shell_particles_raw_122500_test_with_pred.h5"
df_test = pd.read_hdf(hdf5_df_file)
df_test.head()  # 2500 samples

Unnamed: 0,mat_core,mat_shell,r_core,r_shell,wavelength,Q_sca,Q_back,Q_fwd,Q_fwd_pred,Q_back_pred
0,ZrO2,Si,39,132,"[400.0, 406.3492063492063, 412.6984126984127, ...","[2.038010369711505, 1.809512402100606, 1.48074...","[0.6439440129016083, 0.4467094204783908, 0.555...","[8.631606079996017, 7.795760005483272, 6.22831...","[8.745616, 7.7675796, 6.3042693, 4.196197, 3.2...","[0.6267073, 0.44084805, 0.55183196, 0.5212697,..."
1,Au,Si3N4,65,92,"[400.0, 406.3492063492063, 412.6984126984127, ...","[0.7478421540587286, 0.7347653970480106, 0.723...","[0.25254436594760143, 0.2641361829295818, 0.27...","[3.0595320722390036, 2.910348641419738, 2.7718...","[3.063077, 2.9394855, 2.7923834, 2.6702833, 2....","[0.25272343, 0.2650609, 0.28329864, 0.29937956..."
2,Si,Si,31,32,"[400.0, 406.3492063492063, 412.6984126984127, ...","[0.42059673950184884, 0.28402220125650635, 0.2...","[0.12245274773436954, 0.03833342713719669, 0.0...","[1.1619876883360172, 0.8312774845838197, 0.618...","[1.1966718, 0.8318415, 0.6197878, 0.48798603, ...","[0.16796815, 0.028995086, 0.03501954, 0.048387..."
3,ZrO2,ZrO2,81,114,"[400.0, 406.3492063492063, 412.6984126984127, ...","[4.515870402326752, 4.215809011163139, 4.05102...","[2.008253901402069, 1.6064395105460934, 1.4057...","[19.18042348996285, 16.158873919994733, 14.280...","[19.073994, 16.155634, 14.423496, 13.176519, 1...","[1.9703677, 1.5975323, 1.4016687, 1.3178806, 1..."
4,Au,TiO2,49,76,"[400.0, 406.3492063492063, 412.6984126984127, ...","[2.5633444541178934, 2.589277681420846, 2.5882...","[1.6053755830270218, 1.4718419539005771, 1.314...","[6.575478822430851, 6.83471245910305, 7.041375...","[6.792607, 6.961959, 7.1334324, 7.189859, 7.28...","[1.5677507, 1.4955047, 1.3478626, 1.1710209, 0..."


In [8]:
df_test["log_Qfwd"] = df_test["Q_fwd"].apply(lambda x: np.log1p(np.array(x)))
df_test["log_Qback"] = df_test["Q_back"].apply(lambda x: np.log1p(np.array(x)))

In [9]:
df_test_100 = df_test.head(100)

## Mie Utils
---

In [10]:
# %% --- Mie
def get_Mie_spec(wavelengths, r_core, r_shell, mat_core, mat_shell, n_env):

    k0 = 2 * np.pi / wavelengths
    n_core = mat_core.get_refindex(wavelengths)
    n_shell = mat_shell.get_refindex(wavelengths)

    res = pymiecs.Q(
        k0,
        r_core=r_core,
        n_core=n_core,
        r_shell=r_shell,
        n_shell=n_shell,
        n_env=n_env.real**0.5,  # host medium must be lossless
    )
    return (
        res["qsca"],
        res["qback"],
        res["qfwd"],
    )

In [11]:
Si = pymiecs.materials.MaterialDatabase("Si")
SiO2 = pymiecs.materials.MaterialDatabase("SiO2")
Si3N4 = pymiecs.materials.MaterialDatabase("Si3N4")
Au = pymiecs.materials.MaterialDatabase("Au")
Ag = pymiecs.materials.MaterialDatabase("Ag")
ZrO2 = pymiecs.materials.MaterialDatabase("ZrO2")
TiO2 = pymiecs.materials.MaterialDatabase("TiO2")


# Define a function to map material names to material objects
def get_material(material_name):
    if material_name == "Si":
        return Si
    elif material_name == "SiO2":
        return SiO2
    elif material_name == "Au":
        return Au
    elif material_name == "Ag":
        return Ag
    elif material_name == "Si3N4":
        return Si3N4
    elif material_name == "ZrO2":
        return ZrO2
    elif material_name == "TiO2":
        return TiO2
    else:
        raise ValueError(f"Unknown material: {material_name}")

## Adam with Batch Processing

In [19]:
batch_size = 200  # Batch size for parallel optimization
latent_dim = 128  # Dimension of the latent space
optimized_geometries_list = []

In [20]:
def objective_function_batch(
    z_batch, generator, forward_model, target_Qfwd_transformed, target_Qback_transformed
):
    synthetic_geometries = generator(z_batch)
    synthetic_geometries_concat = keras.ops.concatenate(synthetic_geometries, axis=1)

    predicted_batch = forward_model(
        synthetic_geometries_concat
    )  # Forward pass through the forward model

    # Calculate MSE loss for Qfwd and Qback across all vectors in the batch
    loss_Qfwd_batch = keras.ops.mean(
        keras.ops.square(predicted_batch[..., 0] - target_Qfwd_transformed), axis=1
    )
    loss_Qback_batch = keras.ops.mean(
        keras.ops.square(predicted_batch[..., 1] - target_Qback_transformed), axis=1
    )

    # Total loss for the batch (sum of both losses)
    total_loss_batch = loss_Qfwd_batch + loss_Qback_batch
    return total_loss_batch

In [21]:
def learning_rate_schedule(iteration, initial_lr=0.1):
    if iteration < 50:
        return initial_lr
    else:
        return initial_lr * 0.1

In [22]:
def optimize_latent_vector_parallel(
    z_batch,
    generator,
    forward_model,
    target_Qfwd_transformed,
    target_Qback_transformed,
    initial_lr=0.01,
    iterations=250,
    use_lr_schedule=False,
):
    # Initialize the optimizer with the initial learning rate
    if not isinstance(z_batch, tf.Variable):
        raise ValueError("z_batch must be a tf.Variable.")

    optimizer = keras.optimizers.Adam(learning_rate=initial_lr)
    loss_history = []

    for i in range(iterations):
        # Update the learning rate if using learning rate schedule
        if use_lr_schedule:
            new_learning_rate = learning_rate_schedule(i, initial_lr)
            optimizer.learning_rate.assign(new_learning_rate)

        with tf.GradientTape() as tape:
            tape.watch(z_batch)
            # Calculate the loss for the batch of latent vectors
            total_loss_batch = objective_function_batch(
                z_batch,
                generator,
                forward_model,
                target_Qfwd_transformed,
                target_Qback_transformed,
            )

        # Get gradients and update latent vectors in the batch
        gradients_batch = tape.gradient(total_loss_batch, [z_batch])
        optimizer.apply_gradients(zip(gradients_batch, [z_batch]))
        loss_history.append(total_loss_batch.numpy())

    return z_batch, loss_history, total_loss_batch

In [23]:
def process_and_optimize_sample(
    sample_row,
    generator,
    forward_model,
    scaler_Qfwd,
    scaler_Qback,
    batch_size,
    initial_lr=0.01,
    iterations=250,
    use_lr_schedule=False,
):
    # Extract and transform Qfwd and Qback for the sample
    target_Qfwd = np.array(sample_row["log_Qfwd"]).reshape(1, -1)
    target_Qback = np.array(sample_row["log_Qback"]).reshape(1, -1)

    # Apply MinMax scaling using the preloaded scalers
    target_Qfwd_transformed = scaler_Qfwd.transform(target_Qfwd)
    target_Qback_transformed = scaler_Qback.transform(target_Qback)

    # Initialize batch of latent vectors
    initial_z_batch = np.random.normal(size=(batch_size, latent_dim))
    z_batch_tf = tf.Variable(initial_z_batch, dtype=tf.float32)

    # Adam-based batch optimization function
    optimized_z_batch, loss_history, total_loss_batch = optimize_latent_vector_parallel(
        z_batch_tf,
        generator,
        forward_model,
        target_Qfwd_transformed,
        target_Qback_transformed,
        initial_lr=initial_lr,
        iterations=iterations,
        use_lr_schedule=use_lr_schedule,
    )

    # Return optimized latent vectors and corresponding losses
    return (
        optimized_z_batch,
        loss_history,
        total_loss_batch.numpy(),
    )

## Process inverse design samples

In [26]:
# Initialize a list to store the runtime for each sample
num_samples = len(df_test_100)
sample_runtimes = []
optimized_geometries_list = []

for i, sample_row in df_test_100.iterrows():
    print(f"Processing sample {i + 1}/{num_samples}")
    start_time = time.time()  # Start the timer

    # Optimize the latent vectors for the current sample
    optimized_latent_vectors, loss_history, total_loss_batch = (
        process_and_optimize_sample(
            sample_row,
            generator,
            forward_model,
            scaler_Qfwd,
            scaler_Qback,
            batch_size=batch_size,
            initial_lr=0.01,
            iterations=100,
            use_lr_schedule=False,  # set to false because we don't use leanring rate schedule
        )
    )
    # Record end time and calculate sample runtime
    end_time = time.time()
    sample_runtime = end_time - start_time

    # Append runtime to the list
    sample_runtimes.append({"Sample": i + 1, "Runtime (seconds)": sample_runtime})
    print(f"Sample {i + 1} runtime: {sample_runtime:.2f} seconds")
    print("---------------------------------------------")
    optimized_geometries_list.append((optimized_latent_vectors, total_loss_batch))
"""
##########################################################################################
####################### Save the runtimes into a DataFrame ###############################
##########################################################################################
"""
runtime_df = pd.DataFrame(sample_runtimes)
sample_runtime_path = "runtime/gradient_inverse_runtime.pkl"

# Saving DataFrames
with open(sample_runtime_path, "wb") as f:
    pickle.dump(runtime_df, f)

Processing sample 1/100
Sample 1 runtime: 15.09 seconds
---------------------------------------------
Processing sample 2/100
Sample 2 runtime: 14.96 seconds
---------------------------------------------
Processing sample 3/100
Sample 3 runtime: 15.15 seconds
---------------------------------------------
Processing sample 4/100
Sample 4 runtime: 15.09 seconds
---------------------------------------------
Processing sample 5/100
Sample 5 runtime: 14.94 seconds
---------------------------------------------
Processing sample 6/100
Sample 6 runtime: 14.90 seconds
---------------------------------------------
Processing sample 7/100
Sample 7 runtime: 15.31 seconds
---------------------------------------------
Processing sample 8/100
Sample 8 runtime: 15.08 seconds
---------------------------------------------
Processing sample 9/100
Sample 9 runtime: 15.18 seconds
---------------------------------------------
Processing sample 10/100
Sample 10 runtime: 15.14 seconds
------------------------

## Evaluate the Optimization

In [27]:
# Initialize a list to hold the DataFrames for each sample
optimized_samples_list = []
best_geometries_list = []

for i, (optimized_latent_vectors, total_loss_batch) in enumerate(
    optimized_geometries_list
):
    print(f"Generating optimized geometries for sample {i + 1}")

    # Generate the synthetic geometries using the WGAN-GP generator
    synthetic_geometries = generator(optimized_latent_vectors)
    synthetic_geometries_concat = tf.concat(synthetic_geometries, axis=1)

    # Perform the inverse transformation to convert back to the original scale
    inverse_synthetic_data = inverse_transform_synthetic_data(
        preprocessor, synthetic_geometries
    )

    # Create the DataFrame for the optimized geometries
    desired_columns = ["mat_core", "mat_shell", "r_core", "r_shell"]
    generated_columns = ["r_core", "r_shell", "mat_core", "mat_shell"]

    optimized_geometries_df = pd.DataFrame(
        inverse_synthetic_data, columns=generated_columns
    )
    optimized_geometries_df = optimized_geometries_df[desired_columns]

    optimized_geometries_df["Total Loss"] = total_loss_batch
    optimized_geometries_df["Sample"] = i + 1

    # Sort the DataFrame by the 'Total Loss' column (ascending order: best to worst)
    optimized_geometries_df_sorted = optimized_geometries_df.sort_values(
        by="Total Loss", ascending=True
    )
    best_geometries = pd.DataFrame(optimized_geometries_df_sorted.iloc[[0]])

    # Append the DataFrame to the list
    optimized_samples_list.append(optimized_geometries_df)
    best_geometries_list.append(best_geometries)

# Combine all the DataFrames into one large DataFrame:
final_optimized_df = pd.concat(optimized_samples_list, ignore_index=True)
final_best_geometries_df = pd.concat(best_geometries_list, ignore_index=True)

final_best_geometries_df

Generating optimized geometries for sample 1
Generating optimized geometries for sample 2
Generating optimized geometries for sample 3
Generating optimized geometries for sample 4
Generating optimized geometries for sample 5
Generating optimized geometries for sample 6
Generating optimized geometries for sample 7
Generating optimized geometries for sample 8
Generating optimized geometries for sample 9
Generating optimized geometries for sample 10
Generating optimized geometries for sample 11
Generating optimized geometries for sample 12
Generating optimized geometries for sample 13
Generating optimized geometries for sample 14
Generating optimized geometries for sample 15
Generating optimized geometries for sample 16
Generating optimized geometries for sample 17
Generating optimized geometries for sample 18
Generating optimized geometries for sample 19
Generating optimized geometries for sample 20
Generating optimized geometries for sample 21
Generating optimized geometries for sample 

Unnamed: 0,mat_core,mat_shell,r_core,r_shell,Total Loss,Sample
0,Si3N4,Si,38.608547,132.146255,0.000085,1
1,Au,Si3N4,64.528732,91.107384,0.000024,2
2,SiO2,SiO2,8.197314,63.836575,0.001219,3
3,ZrO2,ZrO2,8.27376,114.098946,0.000036,4
4,Au,TiO2,49.631756,79.075127,0.000250,5
...,...,...,...,...,...,...
95,ZrO2,SiO2,27.764015,109.388,0.000002,96
96,SiO2,SiO2,58.008728,138.531311,0.000006,97
97,Si,Ag,67.702515,97.790672,0.001091,98
98,Ag,Au,58.239803,146.192474,0.000005,99


### Forward Model predict on optimized geometries

In [28]:
categorical_features = ["mat_core", "mat_shell"]
numerical_features = ["r_core", "r_shell"]

# Use the preprocessor to transform the data
X_optimized_preprocessed = preprocessor.transform(
    final_best_geometries_df[categorical_features + numerical_features]
)
# Predict using the forward model
y_pred = forward_model.predict(X_optimized_preprocessed)

# Separate predictions for Qfwd and Qback
y_pred_Qfwd = y_pred[..., 0]
y_pred_Qback = y_pred[..., 1]

# Inverse transform the predictions to their original scale
y_pred_Qfwd_inverse = scaler_Qfwd.inverse_transform(y_pred_Qfwd)
y_pred_Qback_inverse = scaler_Qback.inverse_transform(y_pred_Qback)

# Apply expm1 to revert the log1p transformation
y_pred_Qfwd_orig_scale = np.expm1(y_pred_Qfwd_inverse)
y_pred_Qback_orig_scale = np.expm1(y_pred_Qback_inverse)

# Add the predicted values (in original scale) to the DataFrame
final_best_geometries_df["predicted_Qfwd"] = y_pred_Qfwd_orig_scale.tolist()
final_best_geometries_df["predicted_Qback"] = y_pred_Qback_orig_scale.tolist()

# Display the updated DataFrame with predictions
print("Predicted Qfwd (Original Scale):", y_pred_Qfwd_orig_scale)
print("Predicted Qback (Original Scale):", y_pred_Qback_orig_scale)
final_best_geometries_df

I0000 00:00:1742374205.560089 3022729 service.cc:148] XLA service 0x727194006e90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742374205.560134 3022729 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2025-03-19 09:50:05.632483: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.


[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m4s[0m 2s/step

I0000 00:00:1742374206.407625 3022729 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 538ms/step
Predicted Qfwd (Original Scale): [[ 8.743149    7.7987037   6.3712196  ...  6.102538    5.8223343
   5.516574  ]
 [ 2.9729717   2.8541548   2.7125287  ...  2.3996994   2.2657375
   2.127914  ]
 [ 0.43795985  0.40468156  0.3834572  ...  0.02188535  0.02020239
   0.02015775]
 ...
 [14.441613   14.306793   12.932632   ...  1.5846144   1.4626712
   1.3330357 ]
 [13.335457   12.939321   12.587738   ...  4.8530035   4.81108
   4.7853193 ]
 [ 8.061348    8.25566     7.8688545  ...  2.8937688   3.020079
   3.239564  ]]
Predicted Qback (Original Scale): [[ 6.5316105e-01  4.4260105e-01  5.5786014e-01 ...  7.6483736e+00
   7.6022477e+00  7.5651875e+00]
 [ 2.5865072e-01  2.7230498e-01  2.9234096e-01 ...  2.2607622e+00
   2.1165471e+00  1.9905717e+00]
 [ 1.6609423e-01  1.6057090e-01  1.5445229e-01 ...  1.7007980e-02
   1.6551463e-02  1.5603284e-02]
 ...
 [-2.5905990e-03  6.4542703e-02  2.0803747e-01 ...  2.2356057e+00
   2.101

Unnamed: 0,mat_core,mat_shell,r_core,r_shell,Total Loss,Sample,predicted_Qfwd,predicted_Qback
0,Si3N4,Si,38.608547,132.146255,0.000085,1,"[8.743148803710938, 7.798703670501709, 6.37121...","[0.6531610488891602, 0.4426010549068451, 0.557..."
1,Au,Si3N4,64.528732,91.107384,0.000024,2,"[2.9729716777801514, 2.8541548252105713, 2.712...","[0.2586507201194763, 0.2723049819469452, 0.292..."
2,SiO2,SiO2,8.197314,63.836575,0.001219,3,"[0.43795984983444214, 0.40468156337738037, 0.3...","[0.16609422862529755, 0.1605709046125412, 0.15..."
3,ZrO2,ZrO2,8.27376,114.098946,0.000036,4,"[19.226835250854492, 16.326990127563477, 14.47...","[2.042360782623291, 1.6524603366851807, 1.4473..."
4,Au,TiO2,49.631756,79.075127,0.000250,5,"[5.816237926483154, 6.035409450531006, 6.35834...","[2.065091371536255, 2.100466728210449, 1.98399..."
...,...,...,...,...,...,...,...,...
95,ZrO2,SiO2,27.764015,109.388,0.000002,96,"[4.784523010253906, 4.474790573120117, 4.14359...","[0.05459509417414665, 0.0638289675116539, 0.07..."
96,SiO2,SiO2,58.008728,138.531311,0.000006,97,"[10.751150131225586, 10.29588508605957, 9.7830...","[0.406226247549057, 0.38081347942352295, 0.345..."
97,Si,Ag,67.702515,97.790672,0.001091,98,"[14.44161319732666, 14.306793212890625, 12.932...","[-0.0025905990041792393, 0.06454270333051682, ..."
98,Ag,Au,58.239803,146.192474,0.000005,99,"[13.335456848144531, 12.93932056427002, 12.587...","[0.8511751294136047, 0.9003582000732422, 0.933..."


### Use Mie to calculate Mie Qfwd and Qback on optimized geometries

In [29]:
wavelengths = np.linspace(400, 800, 64)  # From 400 nm to 800 nm
n_env = 1.0
mie_Qfwd_list, mie_Qback_list = [], []

for idx, row in final_best_geometries_df.iterrows():
    mat_core = get_material(row["mat_core"])
    mat_shell = get_material(row["mat_shell"])
    r_core = row["r_core"]
    r_shell = row["r_shell"]

    _, Qback, Qfwd = get_Mie_spec(
        wavelengths, r_core, r_shell, mat_core, mat_shell, n_env
    )

    mie_Qfwd_list.append(Qfwd)
    mie_Qback_list.append(Qback)

# Convert the Mie Qfwd and Qback to DataFrame or arrays if needed
mie_Qfwd_array = np.array(mie_Qfwd_list)
mie_Qback_array = np.array(mie_Qback_list)

# Validation: Check dimensions
assert len(mie_Qfwd_array) == len(
    final_best_geometries_df
), "Mismatch in dimensions for Qfwd!"
assert len(mie_Qback_array) == len(
    final_best_geometries_df
), "Mismatch in dimensions for Qback!"

# Add the Mie results to the DataFrame
final_best_geometries_df["mie_Qfwd"] = mie_Qfwd_array.tolist()
final_best_geometries_df["mie_Qback"] = mie_Qback_array.tolist()

final_best_geometries_df

Unnamed: 0,mat_core,mat_shell,r_core,r_shell,Total Loss,Sample,predicted_Qfwd,predicted_Qback,mie_Qfwd,mie_Qback
0,Si3N4,Si,38.608547,132.146255,0.000085,1,"[8.743148803710938, 7.798703670501709, 6.37121...","[0.6531610488891602, 0.4426010549068451, 0.557...","[8.640238488200614, 7.846710352582983, 6.27778...","[0.6671531046393501, 0.44448338671821863, 0.55..."
1,Au,Si3N4,64.528732,91.107384,0.000024,2,"[2.9729716777801514, 2.8541548252105713, 2.712...","[0.2586507201194763, 0.2723049819469452, 0.292...","[2.968624455031121, 2.8256808535952618, 2.6933...","[0.2570695685589054, 0.27063707700378375, 0.28..."
2,SiO2,SiO2,8.197314,63.836575,0.001219,3,"[0.43795984983444214, 0.40468156337738037, 0.3...","[0.16609422862529755, 0.1605709046125412, 0.15...","[0.438275306737997, 0.40813270264231033, 0.380...","[0.16753281367386985, 0.16136237988757696, 0.1..."
3,ZrO2,ZrO2,8.27376,114.098946,0.000036,4,"[19.226835250854492, 16.326990127563477, 14.47...","[2.042360782623291, 1.6524603366851807, 1.4473...","[19.374567234626795, 16.28290895864681, 14.360...","[2.034854279291565, 1.6203112118805536, 1.4122..."
4,Au,TiO2,49.631756,79.075127,0.000250,5,"[5.816237926483154, 6.035409450531006, 6.35834...","[2.065091371536255, 2.100466728210449, 1.98399...","[5.712992555399698, 6.0070799348340005, 6.3316...","[2.12613556125396, 2.070087323727204, 1.997170..."
...,...,...,...,...,...,...,...,...,...,...
95,ZrO2,SiO2,27.764015,109.388,0.000002,96,"[4.784523010253906, 4.474790573120117, 4.14359...","[0.05459509417414665, 0.0638289675116539, 0.07...","[4.812068198356359, 4.467392009552152, 4.15140...","[0.057050696276052026, 0.06412063632690321, 0...."
96,SiO2,SiO2,58.008728,138.531311,0.000006,97,"[10.751150131225586, 10.29588508605957, 9.7830...","[0.406226247549057, 0.38081347942352295, 0.345...","[10.78927981242225, 10.276628812195597, 9.7958...","[0.4193436862504509, 0.3883020173005686, 0.354..."
97,Si,Ag,67.702515,97.790672,0.001091,98,"[14.44161319732666, 14.306793212890625, 12.932...","[-0.0025905990041792393, 0.06454270333051682, ...","[13.476763962163984, 14.415812087652041, 12.69...","[0.004223051313744561, 0.03439426048977227, 0...."
98,Ag,Au,58.239803,146.192474,0.000005,99,"[13.335456848144531, 12.93932056427002, 12.587...","[0.8511751294136047, 0.9003582000732422, 0.933...","[13.212818197672487, 12.852309342567487, 12.51...","[0.858572409124735, 0.9046687442272353, 0.9365..."


In [30]:
final_best_geometries_df["log_Qfwd"] = final_best_geometries_df["mie_Qfwd"].apply(
    lambda x: np.log1p(np.array(x))
)
final_best_geometries_df["log_Qback"] = final_best_geometries_df["mie_Qback"].apply(
    lambda x: np.log1p(np.array(x))
)
final_best_geometries_df.head()

Unnamed: 0,mat_core,mat_shell,r_core,r_shell,Total Loss,Sample,predicted_Qfwd,predicted_Qback,mie_Qfwd,mie_Qback,log_Qfwd,log_Qback
0,Si3N4,Si,38.608547,132.146255,8.5e-05,1,"[8.743148803710938, 7.798703670501709, 6.37121...","[0.6531610488891602, 0.4426010549068451, 0.557...","[8.640238488200614, 7.846710352582983, 6.27778...","[0.6671531046393501, 0.44448338671821863, 0.55...","[2.265945847756337, 2.1800456783454227, 1.9848...","[0.511117443965944, 0.36775173979759196, 0.444..."
1,Au,Si3N4,64.528732,91.107384,2.4e-05,2,"[2.9729716777801514, 2.8541548252105713, 2.712...","[0.2586507201194763, 0.2723049819469452, 0.292...","[2.968624455031121, 2.8256808535952618, 2.6933...","[0.2570695685589054, 0.27063707700378375, 0.28...","[1.3784195497805336, 1.3417364523781492, 1.306...","[0.228783272992234, 0.2395184101295558, 0.2516..."
2,SiO2,SiO2,8.197314,63.836575,0.001219,3,"[0.43795984983444214, 0.40468156337738037, 0.3...","[0.16609422862529755, 0.1605709046125412, 0.15...","[0.438275306737997, 0.40813270264231033, 0.380...","[0.16753281367386985, 0.16136237988757696, 0.1...","[0.3634446921135044, 0.3422645023304211, 0.322...","[0.15489281609646058, 0.1495937813936183, 0.14..."
3,ZrO2,ZrO2,8.27376,114.098946,3.6e-05,4,"[19.226835250854492, 16.326990127563477, 14.47...","[2.042360782623291, 1.6524603366851807, 1.4473...","[19.374567234626795, 16.28290895864681, 14.360...","[2.034854279291565, 1.6203112118805536, 1.4122...","[3.014287418883037, 2.8497180917221816, 2.7317...","[1.1101634099457875, 0.9632930938793611, 0.880..."
4,Au,TiO2,49.631756,79.075127,0.00025,5,"[5.816237926483154, 6.035409450531006, 6.35834...","[2.065091371536255, 2.100466728210449, 1.98399...","[5.712992555399698, 6.0070799348340005, 6.3316...","[2.12613556125396, 2.070087323727204, 1.997170...","[1.9040448359733833, 1.9469210571774738, 1.992...","[1.1397975967832545, 1.1217060054053927, 1.097..."


## Save best geometries

In [31]:
best_geometries_path = "best_geometries/gradient_inverse_test_data.pkl"

# Saving DataFrames
with open(best_geometries_path, "wb") as f:
    pickle.dump(final_best_geometries_df, f)