In [None]:
# Import libraries
import numpy as np
import math as math
import pandas as pd
import matplotlib.pyplot as plt
from numpy import loadtxt
from numpy.random import seed
import random
import os

from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression, make_blobs
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn import preprocessing, metrics, model_selection

from matplotlib.legend_handler import HandlerLine2D

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
import tensorflow.keras.backend as K
from tensorflow.keras.losses import Huber
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Dense

import optuna




In [None]:
#load data


xls1 = pd.ExcelFile('polymer properties')

file_part1  = pd.DataFrame()

num =0 
while num < 109:
    part1 = pd.read_excel(xls1,'descriptors.xls_'+ str(num))
    file_part1 = pd.concat([file_part1,part1], axis=1)
    num +=1

count =0


dataset = file_part1.values

print(dataset.shape)


#name
A  = dataset [:, 1]
#X is the value of descriptors
X = dataset [:,3:]
Y = dataset [:,2]
print(X.shape)

min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)


# Instantiate a PCA object with 6 components
pca = PCA(n_components=13)

# Fit the PCA model to your scaled dataset
X_pca = pca.fit_transform(X_scale)


X_cp= X_pca[:123]
Y_cp=Y[:123]

X_cv= X_pca[123:133]
Y_cv=Y[123:133]

X_flexural= X_pca[133:146]
Y_flexural=Y[133:146]



X_shear= X_pca[146:164]
Y_shear=Y[146:164]


X_dynamic= X_pca[164:]
Y_dynamic=Y[164:]


In [None]:
#predict flexural stress
#load svaed model

base_model= load_model('Model.h5')
base_model.layers.pop()
base_model.trainable = True


tf.keras.utils.set_random_seed(77)

# Define the model creation function with Optuna
def create_model(trial):
    # Start with the base model
    model = keras.Sequential()

    # Add existing layers from the base model
    for layer in base_model.layers:
        model.add(layer)

    # Define the number of additional layers to add
    n_layers = trial.suggest_int('n_layers', 1, 5)  # Choose between 1 and 5 additional layers

    # Add additional layers as specified by Optuna
    for i in range(n_layers):
        n_units = trial.suggest_int(f'units_{i}', 10, 128)  # Choose between 10 and 128 units
        activation = trial.suggest_categorical(f'activation_{i}', ['relu', 'sigmoid', 'tanh', 'linear'])
        model.add(keras.layers.Dense(n_units, activation=activation))

    # Output layer
    model.add(keras.layers.Dense(1, activation='linear'))

    # Compile the model
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=['accuracy'])
    
    return model

# Define the objective function for Optuna
def objective(trial):
    model = create_model(trial)
    model.fit(X_flexural_train, Y_flexural_train, epochs=380, batch_size=2, verbose=0)
    # Evaluate the model on the test set
    score = model.evaluate(X_flexural_test, Y_flexural_test, verbose=0)
    return score[0]  # Return the loss

# Create a study object and optimize
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Adjust the number of trials as needed

# Print the best hyperparameters found
print("Best hyperparameters: ", study.best_params)

# Optionally, retrain the model with the best hyperparameters
best_model = create_model(study.best_trial)
best_model.fit(X_flexural_train, Y_flexural_train, epochs=380, batch_size=2, verbose=1)

# Evaluate the best model on the test set
test_loss = best_model.evaluate(X_flexural_test, Y_flexural_test)
print("Test loss of the best model: ", test_loss)


train_results = []
test_results = []

Y_flexural_pred_train = NEW_MODEL.predict(X_flexural_train)
Y_flexural_pred_test = NEW_MODEL.predict(X_flexural_test)
R2train = r2_score(Y_flexural_train, Y_flexural_pred_train)
train_results.append(R2train)
R2test = r2_score(Y_flexural_test, Y_flexural_pred_test)
test_results.append(R2test)
from sklearn import metrics
print('Mean Absolute Error of train:', metrics.mean_absolute_error(Y_flexural_train,Y_flexural_pred_train))
print('Mean Absolute Error of test:', metrics.mean_absolute_error(Y_flexural_test,Y_flexural_pred_test))
print('Max random state for train set:', max((train_results)))
print('Max random state for test set:', max((test_results)))

mse_train = mean_squared_error(Y_flexural_train, Y_flexural_pred_train)
mse_test = mean_squared_error(Y_flexural_test, Y_flexural_pred_test)

mse_per_sample_train = mse_train / len(Y_flexural_train)
mse_per_sample_test = mse_test / len(Y_flexural_test)

print('MSE per sample on train set:', mse_per_sample_train)
print('MSE per sample on test set:', mse_per_sample_test)



fig = plt.figure(dpi=200)

x =[0,0.25]
y =[0,0.25]
plt.plot(x, y, '--', color = 'k')
plt.xlim(0,0.25)
plt.ylim(0,0.25)


plt.plot(Y_flexural_train, Y_flexural_pred_train, 'o', color = '#FFFF00', label = "Training set")
plt.plot(Y_flexural_test, Y_flexural_pred_test, 'o', color = 'k', label = "Test set")
plt.xlabel('Expected Flexural stress (GPa)',labelpad=12,fontsize= 12,fontname ='Times New Roman') 
plt.ylabel('Predicted Flexural stress (GPa)',labelpad=12,fontsize= 12,fontname ='Times New Roman')

ax = plt.gca()
for axis in ['top','bottom','left','right']:
  ax.spines[axis].set_linewidth(1.5)

plt.tick_params(axis='x', direction='in')
plt.tick_params(axis='y', direction='in')

plt.legend(loc='best',fontsize=10) 
plt.show()

In [None]:
#Predict Cv:
#load svaed model
base_model= load_model('Model.h5')
base_model.layers.pop()
base_model.trainable = True


X_cv_train, X_cv_test, Y_cv_train, Y_cv_test = train_test_split (X_cv, Y_cv, test_size=0.3,random_state =64)

X_cv_test=np.array(X_cv_test).astype("float")
Y_cv_test=np.array(Y_cv_test).astype("float")
X_cv_train=np.array(X_cv_train).astype("float")
Y_cv_train=np.array(Y_cv_train).astype("float")

tf.keras.utils.set_random_seed(36)

# Define the model creation function with Optuna
def create_model(trial):
    # Start with the base model
    model = keras.Sequential()

    # Add existing layers from the base model
    for layer in base_model.layers[:-1]:  # Exclude the last layer
        model.add(layer)

    # Define the number of additional layers to add
    n_layers = trial.suggest_int('n_layers', 1, 5)  # Choose between 1 and 5 additional layers

    # Add additional layers as specified by Optuna
    for i in range(n_layers):
        n_units = trial.suggest_int(f'units_{i}', 10, 128)  # Choose between 10 and 128 units
        activation = trial.suggest_categorical(f'activation_{i}', ['relu', 'sigmoid', 'tanh', 'linear'])
        model.add(keras.layers.Dense(n_units, activation=activation))

    # Output layer
    model.add(keras.layers.Dense(1, activation='linear'))

    # Compile the model with a learning rate suggested by Optuna
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=['accuracy'])
    
    return model

# Define the objective function for Optuna
def objective(trial):
    model = create_model(trial)
    model.fit(X_cv_train, Y_cv_train, epochs=500, batch_size=3, verbose=0)
    # Evaluate the model on the test set
    score = model.evaluate(X_cv_test, Y_cv_test, verbose=0)
    return score[0]  # Return the loss

# Create a study object and optimize
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Adjust the number of trials as needed

# Print the best hyperparameters found
print("Best hyperparameters: ", study.best_params)

# Optionally, retrain the model with the best hyperparameters
best_model = create_model(study.best_trial)
best_model.fit(X_cv_train, Y_cv_train, epochs=500, batch_size=3, verbose=1)

# Evaluate the best model on the test set
test_loss = best_model.evaluate(X_cv_test, Y_cv_test)
print("Test loss of the best model: ", test_loss)




train_results = []
test_results = []

Y_cv_pred_train = NEW_MODEL.predict(X_cv_train)
Y_cv_pred_test = NEW_MODEL.predict(X_cv_test)
R2train = r2_score(Y_cv_train, Y_cv_pred_train)
train_results.append(R2train)
R2test = r2_score(Y_cv_test, Y_cv_pred_test)
test_results.append(R2test)
from sklearn import metrics
print('Mean Absolute Error of train:', metrics.mean_absolute_error(Y_cv_train,Y_cv_pred_train))
print('Mean Absolute Error of test:', metrics.mean_absolute_error(Y_cv_test,Y_cv_pred_test))
print('Max random state for train set:', max((train_results)))
print('Max random state for test set:', max((test_results)))

mse_train = mean_squared_error(Y_cv_train, Y_cv_pred_train)
mse_test = mean_squared_error(Y_cv_test, Y_cv_pred_test)

mse_per_sample_train = mse_train / len(Y_cv_train)
mse_per_sample_test = mse_test / len(Y_cv_test)

print('MSE per sample on train set:', mse_per_sample_train)
print('MSE per sample on test set:', mse_per_sample_test)



fig = plt.figure(dpi=200)

x =[0.2,0.5]
y =[0.2,0.5]
plt.plot(x, y, '--', color = 'k')
plt.xlim(0.2,0.5)
plt.ylim(0.2,0.5)


plt.plot(Y_cv_train, Y_cv_pred_train, 'o', color = '#FF69B4', label = "Training set")
plt.plot(Y_cv_test, Y_cv_pred_test, 'o', color = 'k', label = "Test set")
plt.xlabel('Expected $C_{v}$ (cal/g.C)',labelpad=12,fontsize= 12,fontname ='Times New Roman') 
plt.ylabel('Predicted $C_{v}$ (cal/g.C)',labelpad=12,fontsize= 12,fontname ='Times New Roman')

ax = plt.gca()
for axis in ['top','bottom','left','right']:
  ax.spines[axis].set_linewidth(1.5)

plt.tick_params(axis='x', direction='in')
plt.tick_params(axis='y', direction='in')

plt.legend(loc='best',fontsize=10) 
plt.show()

In [None]:
#Predict shear strength :
#load svaed model

base_model= load_model('Base model.h5')
base_model.layers.pop()
base_model.trainable = True


X_shear_train, X_shear_test, Y_shear_train, Y_shear_test = train_test_split (X_shear, Y_shear, test_size=0.3,random_state =24)#24

X_shear_test=np.array(X_shear_test).astype("float")
Y_shear_test=np.array(Y_shear_test).astype("float")
X_shear_train=np.array(X_shear_train).astype("float")
Y_shear_train=np.array(Y_shear_train).astype("float")



# Set random seed for reproducibility
tf.keras.utils.set_random_seed(39)

# Define the model creation function with Optuna
def create_model(trial):
    # Start with the base model
    model = keras.Sequential()

    # Add existing layers from the base model
    for layer in base_model.layers[:-1]:  # Exclude the last layer
        model.add(layer)

    # Define the number of additional layers to add
    n_layers = trial.suggest_int('n_layers', 1, 5)  # Choose between 1 and 5 additional layers

    # Add additional layers as specified by Optuna
    for i in range(n_layers):
        n_units = trial.suggest_int(f'units_{i}', 10, 128)  # Choose between 10 and 128 units
        activation = trial.suggest_categorical(f'activation_{i}', ['relu', 'sigmoid', 'tanh', 'linear'])
        model.add(keras.layers.Dense(n_units, activation=activation))

    # Output layer
    model.add(keras.layers.Dense(1, activation='linear'))

    # Compile the model with a learning rate suggested by Optuna
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=['accuracy'])
    
    return model

# Define the objective function for Optuna
def objective(trial):
    model = create_model(trial)
    model.fit(X_shear_train, Y_shear_train, epochs=275, batch_size=3, verbose=0)
    # Evaluate the model on the test set
    score = model.evaluate(X_shear_test, Y_shear_test, verbose=0)
    return score[0]  # Return the loss

# Create a study object and optimize
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Adjust the number of trials as needed

# Print the best hyperparameters found
print("Best hyperparameters: ", study.best_params)

# Optionally, retrain the model with the best hyperparameters
best_model = create_model(study.best_trial)
best_model.fit(X_shear_train, Y_shear_train, epochs=275, batch_size=3, verbose=1)

# Evaluate the best model on the test set
test_loss = best_model.evaluate(X_shear_test, Y_shear_test)
print("Test loss of the best model: ", test_loss)

train_results = []
test_results = []

Y_shear_pred_train = NEW_MODEL.predict(X_shear_train)
Y_shear_pred_test = NEW_MODEL.predict(X_shear_test)
R2train = r2_score(Y_shear_train, Y_shear_pred_train)
train_results.append(R2train)
R2test = r2_score(Y_shear_test, Y_shear_pred_test)
test_results.append(R2test)
from sklearn import metrics
print('Mean Absolute Error of train:', metrics.mean_absolute_error(Y_shear_train,Y_shear_pred_train))
print('Mean Absolute Error of test:', metrics.mean_absolute_error(Y_shear_test,Y_shear_pred_test))
print('Max random state for train set:', max((train_results)))
print('Max random state for test set:', max((test_results)))


mse_train = mean_squared_error(Y_shear_train, Y_shear_pred_train)
mse_test = mean_squared_error(Y_shear_test, Y_shear_pred_test)

mse_per_sample_train = mse_train / len(Y_shear_train)
mse_per_sample_test = mse_test / len(Y_shear_test)

print('MSE per sample on train set:', mse_per_sample_train)
print('MSE per sample on test set:', mse_per_sample_test)



fig = plt.figure(dpi=200)

x =[0,1.5]
y =[0,1.5]
plt.plot(x, y, '--', color = 'k')
plt.xlim(0,1.5)
plt.ylim(0,1.5)


plt.plot(Y_shear_train, Y_shear_pred_train, 'o', color = '#BFEFFF', label = "Training set")
plt.plot(Y_shear_test, Y_shear_pred_test, 'o', color = 'k', label = "Test set")
plt.xlabel('Expected Shear Modulus (GPa)',labelpad=12,fontsize= 12,fontname ='Times New Roman') 
plt.ylabel('Predicted Shear Modulus(GPa)',labelpad=12,fontsize= 12,fontname ='Times New Roman')

ax = plt.gca()
for axis in ['top','bottom','left','right']:
  ax.spines[axis].set_linewidth(1.5)

plt.tick_params(axis='x', direction='in')
plt.tick_params(axis='y', direction='in')

plt.legend(loc='best',fontsize=10) 
plt.show()


In [None]:
#Predict dynamic viscosity:
#load svaed model

base_model= load_model('Base model.h5')
base_model.layers.pop()
base_model.trainable = True


X_dynamic_train, X_dynamic_test, Y_dynamic_train, Y_dynamic_test = train_test_split (X_dynamic, Y_dynamic,test_size=0.3,
                                        random_state =190)

X_dynamic_test=np.array(X_dynamic_test).astype("float")
Y_dynamic_test=np.array(Y_dynamic_test).astype("float")
X_dynamic_train=np.array(X_dynamic_train).astype("float")
Y_dynamic_train=np.array(Y_dynamic_train).astype("float")



# Set random seed for reproducibility
tf.keras.utils.set_random_seed(70)

# Define the model creation function with Optuna
def create_model(trial):
    # Start with the base model
    model = keras.Sequential()

    # Add existing layers from the base model
    for layer in base_model.layers[:-1]:  # Exclude the last layer
        model.add(layer)

    # Define the number of additional layers to add
    n_layers = trial.suggest_int('n_layers', 1, 5)  # Choose between 1 and 5 additional layers

    # Add additional layers as specified by Optuna
    for i in range(n_layers):
        n_units = trial.suggest_int(f'units_{i}', 10, 512)  # Choose between 10 and 512 units
        activation = trial.suggest_categorical(f'activation_{i}', ['sigmoid', 'relu', 'tanh', 'linear'])
        model.add(keras.layers.Dense(n_units, activation=activation))

    # Output layer
    model.add(keras.layers.Dense(1, activation='linear'))

    # Compile the model with a learning rate suggested by Optuna
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=['accuracy'])
    
    return model

# Define the objective function for Optuna
def objective(trial):
    model = create_model(trial)
    model.fit(X_dynamic_train, Y_dynamic_train, epochs=850, batch_size=3, verbose=0)
    # Evaluate the model on the test set
    score = model.evaluate(X_dynamic_test, Y_dynamic_test, verbose=0)
    return score[0]  # Return the loss

# Create a study object and optimize
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Adjust the number of trials as needed

# Print the best hyperparameters found
print("Best hyperparameters: ", study.best_params)

# Optionally, retrain the model with the best hyperparameters
best_model = create_model(study.best_trial)
best_model.fit(X_dynamic_train, Y_dynamic_train, epochs=850, batch_size=3, verbose=1)

# Evaluate the best model on the test set
test_loss = best_model.evaluate(X_dynamic_test, Y_dynamic_test)
print("Test loss of the best model: ", test_loss)

train_results = []
test_results = []

Y_dynamic_pred_train = NEW_MODEL.predict(X_dynamic_train)
Y_dynamic_pred_test = NEW_MODEL.predict(X_dynamic_test)
R2train = r2_score(Y_dynamic_train, Y_dynamic_pred_train)
train_results.append(R2train)
R2test = r2_score(Y_dynamic_test, Y_dynamic_pred_test)
test_results.append(R2test)
from sklearn import metrics
print('Mean Absolute Error of train:', metrics.mean_absolute_error(Y_dynamic_train,Y_dynamic_pred_train))
print('Mean Absolute Error of test:', metrics.mean_absolute_error(Y_dynamic_test,Y_dynamic_pred_test))
print('Max random state for train set:', max((train_results)))
print('Max random state for test set:', max((test_results)))


mse_train = mean_squared_error(Y_dynamic_train, Y_dynamic_pred_train)
mse_test = mean_squared_error(Y_dynamic_test, Y_dynamic_pred_test)

mse_per_sample_train = mse_train / len(Y_dynamic_train)
mse_per_sample_test = mse_test / len(Y_dynamic_test)

print('MSE per sample on train set:', mse_per_sample_train)
print('MSE per sample on test set:', mse_per_sample_test)



fig = plt.figure(dpi=200)

x =[0,3]
y =[0,3]
plt.plot(x, y, '--', color = 'k')
plt.xlim(0,3)
plt.ylim(0,3)


plt.plot(Y_dynamic_train, Y_dynamic_pred_train, 'o', color = '#4B0082', label = "Training set")
plt.plot(Y_dynamic_test, Y_dynamic_pred_test, 'o', color = 'k', label = "Test set")
plt.xlabel('Expected Dynamic Viscosity (Pa.s)',labelpad=12,fontsize= 12,fontname ='Times New Roman') 
plt.ylabel('Predicted Dynamic Viscosity (Pa.s)',labelpad=12,fontsize= 12,fontname ='Times New Roman')

ax = plt.gca()
for axis in ['top','bottom','left','right']:
  ax.spines[axis].set_linewidth(1.5)

plt.tick_params(axis='x', direction='in')
plt.tick_params(axis='y', direction='in')

plt.legend(loc='best',fontsize=10) 
plt.show()