In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import random
from sklearn.metrics import r2_score
import random

The code below has been used to train only on binaries dataset 

In [None]:
def generate_train_test_indices(total_binary_count, total_data_count, train_ratio, seed=42): ## different seed tested such as 42 , 123 , 72  #27, 45, 99
    if seed is not None:
        random.seed(seed)
    
    binary_indices = list(range(total_binary_count))
    train_count = int(total_binary_count * train_ratio)
    train_indices = random.sample(binary_indices, train_count)
    all_indices = list(range(total_data_count))
    test_indices = [i for i in all_indices if i not in train_indices]
    return train_indices, test_indices

def custom_loss(y_true, y_pred):
    mse = tf.keras.losses.mean_squared_error(y_true, y_pred)
    sum_squared_true = tf.reduce_sum(tf.square(y_true))
    epsilon = 1e-7
    sum_squared_true = sum_squared_true + epsilon
    loss = mse / sum_squared_true
    return loss

def build_model(kernel_size, dropout):
    conv1 = layers.Conv3D(16, kernel_size, activation='relu', data_format="channels_last", padding="same")
    ap1 = layers.MaxPooling3D(pool_size=(2, 2, 2))
    conv2 = layers.Conv3D(32, kernel_size, activation='relu', data_format="channels_last", padding="same")
    ap2 = layers.MaxPooling3D(pool_size=(2, 2, 2))
    conv3 = layers.Conv3D(64, kernel_size, activation='relu', data_format="channels_last", padding="same")
    ap3 = layers.MaxPooling3D(pool_size=(2, 2, 2))
    flat = layers.Flatten()
    drop1 = layers.Dropout(rate=dropout)
    dense1 = layers.Dense(2048, activation='relu')
    drop2 = layers.Dropout(rate=dropout)
    dense2 = layers.Dense(1024, activation='relu')
    dense3 = layers.Dense(8, name='parameters')  
    input_layer = keras.Input(shape=(56, 40, 40, 1))
    x = conv1(input_layer)
    x = ap1(x)
    x = conv2(x)
    x = ap2(x)
    x = conv3(x)  
    x = ap3(x)
    x = flat(x)
    x = dense1(x)
    x = drop1(x)
    x = dense2(x)
    x = drop2(x)
    outputs = dense3(x)
    model = keras.Model(inputs=input_layer, outputs=outputs)
    return model

The code below has been used to train on entire binaries dataset plus terneries

In [None]:
def generate_train_test_indices(total_binary_count, total_data_count, train_ratio, seed=42): #different seed tested such as 1234, 42, 32
    if seed is not None:
        random.seed(seed)
    binary_indices = list(range(total_binary_count))
    train_indices = binary_indices[:1110]
    remaining_indices_count = total_data_count - len(train_indices)
    additional_train_count = int((remaining_indices_count - 78) * train_ratio)  
    additional_train_indices = random.sample(range(1110, 1473), additional_train_count)
    train_indices.extend(additional_train_indices)
    all_indices = list(range(total_data_count))
    test_indices = [i for i in all_indices if i not in train_indices]
    return train_indices, test_indices

def custom_loss(y_true, y_pred):
    mse = tf.keras.losses.mean_squared_error(y_true, y_pred)
    sum_squared_true = tf.reduce_sum(tf.square(y_true))
    epsilon = 1e-7
    sum_squared_true = sum_squared_true + epsilon
    loss = mse / sum_squared_true
    return loss

def build_model(kernel_size, dropout):
    # Define layers
    conv1 = layers.Conv3D(16, kernel_size, activation='relu', data_format="channels_last", padding="same")
    ap1 = layers.MaxPooling3D(pool_size=(2, 2, 2))
    conv2 = layers.Conv3D(32, kernel_size, activation='relu', data_format="channels_last", padding="same")
    ap2 = layers.MaxPooling3D(pool_size=(2, 2, 2))
    conv3 = layers.Conv3D(64, kernel_size, activation='relu', data_format="channels_last", padding="same")
    ap3 = layers.MaxPooling3D(pool_size=(2, 2, 2))
    flat = layers.Flatten()
    drop1 = layers.Dropout(rate=dropout)
    dense1 = layers.Dense(2048, activation='relu')
    drop2 = layers.Dropout(rate=dropout)
    dense2 = layers.Dense(1024, activation='relu')
    dense3 = layers.Dense(8, name='parameters')  
    input_layer = keras.Input(shape=(56, 40, 40, 1))
    x = conv1(input_layer)
    x = ap1(x)
    x = conv2(x)
    x = ap2(x)
    x = conv3(x)  
    x = ap3(x)
    x = flat(x)
    x = dense1(x)
    x = drop1(x)
    x = dense2(x)
    x = drop2(x)
    outputs = dense3(x)
    model = keras.Model(inputs=input_layer, outputs=outputs)
    return model

In [None]:
### These are input data including the images and corresponding properties
images = np.load("merged_array.npy")   
properties = pd.read_csv("merged_csv.csv")

In [None]:
train_indices, test_indices = generate_train_test_indices(total_binary_count=1110, total_data_count=1550, train_ratio=0.25)  ## train_ratio might change based on the number of images use for training process
X_train = images[train_indices]
X_test = images[test_indices]
y_train= properties.iloc[train_indices]
y_test = properties.iloc[test_indices]
## To select only some specific properties we need to adjust the code below. This code consider all 8 elastic properties
columns_to_normalize = ['c11', 'c12', 'c44', 'G', 'B', 'E_VRH', 'nu', 'Cohesive_energy']
scaler = MinMaxScaler()
y_train[columns_to_normalize] = scaler.fit_transform(y_train[columns_to_normalize])
y_train = y_train[['c11', 'c12', 'c44', 'G', 'B', 'E_VRH', 'nu', 'Cohesive_energy']]
y_test[columns_to_normalize] = scaler.fit_transform(y_test[columns_to_normalize])
y_test = y_test[['c11', 'c12', 'c44', 'G', 'B', 'E_VRH', 'nu', 'Cohesive_energy']]
y_train.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)
np.random.seed(42)
val_split = 0.1
val_size = int(X_train.shape[0] * val_split)
val_indices = np.random.choice(X_train.shape[0], val_size, replace=False)
val_data = (X_train[val_indices], y_train.iloc[val_indices])
train_data = (np.delete(X_train, val_indices, axis=0), y_train.drop(index=val_indices))

In [None]:
train_indices = [i for i in train_indices if i not in val_indices]
all_indices = np.arange(0, 1550)
data = []
for idx in all_indices:
    if idx in val_indices:
        set_type = 'validation'
    elif idx in train_indices:
        set_type = 'training'
    elif idx in test_indices:
        set_type = 'test'
    else:
        set_type = 'none'
    data.append((idx, set_type))
df_25B = pd.DataFrame(data, columns=['index', 'set_type'])

In [None]:
model = build_model((3, 3, 11), 0.3)
checkpoint_callback = ModelCheckpoint("May14_allproperties_25B.h5", monitor='val_loss', save_best_only=True, save_weights_only=True)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
reduce_lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=30, min_lr=1e-7)
model.compile(optimizer='adam', loss=custom_loss)

In [None]:
history = model.fit(train_data[0], train_data[1], 
                    batch_size=128, 
                    epochs=5000, 
                    validation_data=val_data,
                    callbacks=[checkpoint_callback, early_stopping_callback, reduce_lr_callback])

In [None]:
history_df = pd.DataFrame(history.history)
history_df.to_csv('May14_allproperties_25B.csv', index=False)

In [None]:
model.load_weights("May14_allproperties_25B.h5")
predictions = model.predict(images)
denormalized_predictions = scaler.inverse_transform(predictions)
columns_to_normalize = ['c11', 'c12', 'c44', 'G', 'B', 'E_VRH', 'nu', 'Cohesive_energy']
denormalized_df = pd.DataFrame(denormalized_predictions, columns=columns_to_normalize)

In [None]:
model.load_weights("May14_allproperties_100B_25T.h5")
predictions = model.predict(images)
denormalized_predictions = scaler.inverse_transform(predictions)
columns_to_normalize = ['c11', 'c12', 'c44', 'G', 'B', 'E_VRH', 'nu', 'Cohesive_energy']
denormalized_df = pd.DataFrame(denormalized_predictions, columns=columns_to_normalize)
assert denormalized_df.shape[0] == df_100B_25T.shape[0], "The number of rows in both DataFrames must be the same."
merged_df = pd.concat([df_100B_25T, denormalized_df], axis=1)
merged_df.to_excel('results_100B_25T.xlsx', index=False)

In [None]:
model.load_weights("May14_allproperties_25B.h5")
predictions = model.predict(X_test)
r2_scores = []
for i in range(predictions.shape[1]):
    r2 = r2_score(y_test.iloc[:, i], predictions[:, i])
    r2_scores.append(r2)
print("R2-scores for each target parameter:", r2_scores)

In [None]:
predictions = model.predict(train_data[0])
r2_scores = []
for i in range(predictions.shape[1]):
    r2 = r2_score(train_data[1].iloc[:, i], predictions[:, i])
    r2_scores.append(r2)
print("R2-scores for each target parameter:", r2_scores)

In [None]:
predictions = model.predict(val_data[0])
r2_scores = []
for i in range(predictions.shape[1]):
    r2 = r2_score(val_data[1].iloc[:, i], predictions[:, i])
    r2_scores.append(r2)
print("R2-scores for each target parameter:", r2_scores)