In [None]:
import random
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset,  random_split
from torch.optim import Adam
from parameter import *
from VAE_Model import *
import joblib

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Clear CUDA cache
torch.cuda.empty_cache()

# Set seeds for reproducibility
torch.manual_seed(0)
rng = np.random.default_rng()
np.random.seed(13)

In [None]:
Properties = pd.read_csv('files/Data/Properties.csv')
Compositions = pd.read_csv('files/Data/Compositions.csv')

'''
Properties indices:
 0: 'Density'
 1: 'Young's modulus'
 2: 'Flexural modulus'
 3: 'Shear modulus'
 4: 'Bulk modulus'
 5: 'Poisson's ratio'
 6: 'Melting point'
 7: 'Thermal conductivity'
 8: 'Specific heat capacity'
 9: 'Thermal expansion coefficient'
10: 'Latent heat of fusion'
11: 'Electrical conductivity'
12: 'Acoustic velocity'
13: 'Average Atomic Weight'
'''

'''
Compositions indices:
  0: 'Ag (silver)'
  1: 'Al (aluminum)'
  2: 'As (arsenic)'
  3: 'Au (gold)'
  4: 'B (boron)'
  5: 'Be (beryllium)'
  6: 'BeO (beryllia)'
  7: 'Bi (bismuth)'
  8: 'C (carbon)'
  9: 'Ca (calcium)'
 10: 'Cd (cadmium)'
 11: 'Ce (cerium)'
 12: 'Co (cobalt)'
 13: 'Cr (chromium)'
 14: 'Cu (copper)'
 15: 'Dy (dysprosium)'
 16: 'Er (erbium)'
 17: 'Eu (europium)'
 18: 'Fe (iron)'
 19: 'Ga (gallium)'
 20: 'Gd (gadolinium)'
 21: 'Ge (germanium)'
 22: 'H (hydrogen)'
 23: 'Hf (hafnium)'
 24: 'Ho (holmium)'
 25: 'In (indium)'
 26: 'Ir (iridium)'
 27: 'La (lanthanum)'
 28: 'Li (lithium)'
 29: 'Lu (lutetium)'
 30: 'Mg (magnesium)'
 31: 'Mn (manganese)'
 32: 'Mo (molybdenum)'
 33: 'N (nitrogen)'
 34: 'Nb (niobium)'
 35: 'Nd (neodymium)'
 36: 'Ni (nickel)'
 37: 'O (oxygen)'
 38: 'O2 (oxygen gas)'
 39: 'Os (osmium)'
 40: 'P (phosphorus)'
 41: 'Pb (lead)'
 42: 'Pd (palladium)'
 43: 'Pr (praseodymium)'
 44: 'Pt (platinum)'
 45: 'Re (rhenium)'
 46: 'Rh (rhodium)'
 47: 'Ru (ruthenium)'
 48: 'S (sulfur)'
 49: 'Sb (antimony)'
 50: 'Sc (scandium)'
 51: 'Se (selenium)'
 52: 'Si (silicon)'
 53: 'Sm (samarium)'
 54: 'Sn (tin)'
 55: 'Sr (strontium)'
 56: 'Ta (tantalum)'
 57: 'Tb (terbium)'
 58: 'Te (tellurium)'
 59: 'ThO2 (thoria)'
 60: 'Ti (titanium)'
 61: 'Tl (thallium)'
 62: 'Tm (thulium)'
 63: 'U (uranium)'
 64: 'V (vanadium)'
 65: 'Y (Yttrium)'
 66: 'Yb (Ytterbium)'
 67: 'W (Tungsten)'
 68: 'Zn (Zinc)'
 69: 'Zr (Zirconium)'
'''

scaler_y = joblib.load('files/Scalers/scaler_Properties.pkl')
X = Compositions
Y = Properties

XX = X.values
YY = scaler_y.transform(Y)


XX = torch.tensor(XX, dtype=torch.float32)
YY = torch.tensor(YY, dtype=torch.float32)

dataset  = TensorDataset(XX,YY)

num_train = len(dataset)
split = int(np.floor(valid_size * num_train)) 
train_dataset, test_dataset = random_split(dataset = dataset, lengths = [num_train - split,split])
train_loader = DataLoaderX(train_dataset, batch_size = batch_size, shuffle = True, pin_memory = True)
test_loader = DataLoaderX(test_dataset, batch_size = test_batch_size, shuffle = True, pin_memory = True)


vae = vaeModel().to(device)
p_model = pModel().to(device)
vae.load_state_dict(torch.load(savedModelFolder + '/model.pt'))
p_model.load_state_dict(torch.load(savedModelFolder + '/p_model.pt'))


vae.eval()
p_model.eval()

In [None]:
# *********************************
# ******** Initialization *********
# *********************************

dataset = test_loader.dataset
# Choose a random index
random_index = random.randint(0, len(dataset) - 1)
# Get the data at the random index
x, c = dataset[random_index:random_index+1]
opt_value = c.to(device)


dataset  = TensorDataset(XX,YY)
num_train = len(dataset)
data_loader = DataLoaderX(dataset, batch_size = num_train, shuffle = False, pin_memory = True)

optimization_method = ['Adam']
opt_epoch = 100
num_sample = 50
Adam_lr = 1e-4
trace_back = True
recon_criterion = nn.MSELoss(reduction = 'sum')

initial_z = torch.randn(num_sample, latent_dim, device=device)

print("Number of initial guesses = ", len(initial_z))
updated_z = np.zeros(initial_z.shape)
predicted_c = np.zeros([len(initial_z), param_Dim])
optimization = optimization_method[0]


# Initialize storage for predictions, ground truths, and compositions
Properties_test_all = []                  # Corresponding true properties
Properties_pred_best = []                 # Best predicted properties per sample
composition_best = []           # Best predicted compositions per sample

best_sample_pred = None

for composition, properties in test_loader:
    composition = composition.to(device)
    properties = properties.to(device)

    # Optimization settings
    patience = 100
    updated_z_np = np.zeros(initial_z.shape)
    predicted_properties_np = np.zeros([len(initial_z), param_Dim])
    updated_composition = np.zeros((num_sample, composition.shape[1]))

    best_accuracy_np = np.zeros(num_sample)

    for idx in range(num_sample):
        es = 0
        best_acc = 1e10

        # Initialize latent vector for optimization
        initial_guess_z = initial_z[idx, :].clone().reshape(1, initial_z.shape[-1]).requires_grad_()

        # Choose Adam optimizer
        opt = torch.optim.Adam([initial_guess_z])


        for e in range(opt_epoch):
            predicted_composition = vae.decoder(initial_guess_z)  # Decode latent vector
            predicted_composition = refine_composition(predicted_composition)

            inv_z_recon, inv_mu_recon, inv_std_recon = vae.encoder(predicted_composition)

            # Predict properties using mean from VAE encoder
            predicted_properties = p_model(inv_mu_recon)

            # Compute loss using only selected conditional features
            target_values = torch.tensor(properties)
            loss = recon_criterion(predicted_properties[0, [0, 1, 5]], target_values[0, [0, 1, 5]])

            # Backprop and optimization step
            loss.backward(retain_graph=True)
            opt.step()
            accuracy = loss.item()
            stored_z = inv_z_recon.detach().numpy()
            stored_properties = predicted_properties.detach().numpy()
            stored_composition = predicted_composition.detach().numpy()


            # Early stopping and best tracking
            if loss.item() < best_acc:
                best_acc = loss.item()

                es = 0
            else:
                es += 1
                if es > patience:
                    break

        # Store final results per sample
        updated_z_np[idx, :] = stored_z
        predicted_properties_np[idx, :] = stored_properties
        updated_composition[idx, :] = stored_composition
        best_accuracy_np[idx] = accuracy

    # Sort all outputs by accuracy (ascending MSE)
    sorted_indices = np.argsort(best_accuracy_np)
    updated_z_np_sorted = updated_z_np[sorted_indices]
    predicted_properties_np_sorted = predicted_properties_np[sorted_indices]
    updated_composition_sorted = updated_composition[sorted_indices]
    best_accuracy_sorted = best_accuracy_np[sorted_indices]

    # Store predictions and compositions
    Properties_pred_best.append(predicted_properties_np_sorted[0, :].reshape(1, -1))  # Best predicted properties
    Properties_test_all.append(properties.cpu().detach().numpy())           # True properties
    composition_best.append(updated_composition_sorted[0, :].reshape(1, -1))  # Best composition (raw)
print("Optimization completed for all samples.")

# Concatenate all results

Properties_test_all = np.concatenate(Properties_test_all, axis=0)
Properties_pred_best = np.concatenate(Properties_pred_best, axis=0)
composition_best = np.concatenate(composition_best, axis=0)

# Inverse scale property predictions
Properties_test_all = scaler_y.inverse_transform(Properties_test_all)
Properties_pred_best = scaler_y.inverse_transform(Properties_pred_best)


In [None]:
test_loader = DataLoaderX(test_dataset, batch_size = 1, shuffle = True, pin_memory = True)
# Initialize storage for predictions, ground truths, and compositions
Properties_test_all = []                  # Store ground-truth material properties
Properties_pred_best = []                 # Store best predicted properties from optimization
composition_best = []                     # Store best refined compositions (decoded from latent)

best_sample_pred = None  # Placeholder for best latent vector if needed later


# Inference and optimization loop over test dataset
for composition, properties in test_loader:
    composition = composition.to(device)     # Input composition (not used directly in optimization)
    properties = properties.to(device)       # Ground-truth material properties (target)

    # Optimization settings
    patience = 100                           # Early stopping patience
    updated_z_np = np.zeros(initial_z.shape)                          # Optimized latent vectors
    predicted_properties_np = np.zeros([len(initial_z), param_Dim])   # Predicted properties
    updated_composition = np.zeros((num_sample, composition.shape[1]))# Refined compositions

    best_accuracy_np = np.zeros(num_sample)   # Store best loss value per latent sample

    # Loop through each latent sample
    for idx in range(num_sample):
        es = 0              # Early stopping counter
        best_acc = 1e10     # Initialize best loss (MSE) for current latent sample

        # Initialize latent vector for optimization
        initial_guess_z = initial_z[idx, :].clone().reshape(1, initial_z.shape[-1]).requires_grad_()

        # Choose optimizer (Adam)
        opt = torch.optim.Adam([initial_guess_z])

        # Run optimization loop
        for e in range(opt_epoch):
            predicted_composition = vae.decoder(initial_guess_z)           # Decode latent to composition
            predicted_composition = refine_composition(predicted_composition)  # Round and normalize

            inv_z_recon, inv_mu_recon, inv_std_recon = vae.encoder(predicted_composition)  # Re-encode

            predicted_properties = p_model(inv_mu_recon)  # Predict properties using TF model

            # Compute loss on selected conditional features [0, 1, 5]

            target_values = torch.tensor(properties)
            loss = recon_criterion(predicted_properties[0, [0, 1, 5]], target_values[0, [0, 1, 5]])

            # Backpropagation and step
            loss.backward(retain_graph=True)
            opt.step()

            # Store intermediate results
            accuracy = loss.item()
            stored_z = inv_z_recon.detach().numpy()
            stored_properties = predicted_properties.detach().numpy()
            stored_composition = predicted_composition.detach().numpy()

            # Early stopping logic and best result tracking
            if loss.item() < best_acc:
                best_acc = loss.item()
                es = 0  # Reset counter if improvement
            else:
                es += 1
                if es > patience:
                    break

        # Store final results for current latent sample
        updated_z_np[idx, :] = stored_z
        predicted_properties_np[idx, :] = stored_properties
        updated_composition[idx, :] = stored_composition
        best_accuracy_np[idx] = accuracy

    # Sort all results by best accuracy (MSE ascending)
    sorted_indices = np.argsort(best_accuracy_np)
    updated_z_np_sorted = updated_z_np[sorted_indices]
    predicted_properties_np_sorted = predicted_properties_np[sorted_indices]
    updated_composition_sorted = updated_composition[sorted_indices]
    best_accuracy_sorted = best_accuracy_np[sorted_indices]

    # Save best predictions and compositions
    Properties_pred_best.append(predicted_properties_np_sorted[0, :].reshape(1, -1))   # Best prediction
    Properties_test_all.append(properties.cpu().detach().numpy())                      # Ground truth
    composition_best.append(updated_composition_sorted[0, :].reshape(1, -1))           # Best composition

print("Optimization completed for all samples.")

# Concatenate all results from batches
Properties_test_all = np.concatenate(Properties_test_all, axis=0)
Properties_pred_best = np.concatenate(Properties_pred_best, axis=0)
composition_best = np.concatenate(composition_best, axis=0)

# Inverse scale the predicted and true properties for final comparison
Properties_test_all = scaler_y.inverse_transform(Properties_test_all)
Properties_pred_best = scaler_y.inverse_transform(Properties_pred_best)
