# Artificial Neural Network

### Importing the libraries

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import GridSearchCV, KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasRegressor

## Part 1 - Data Preprocessing

### Importing the dataset

In [3]:
dataset = pd.read_excel('P_Dataset4.xlsx')
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 588 entries, 0 to 587
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Explosive type          588 non-null    object 
 1   Explosive mass          588 non-null    float64
 2   Standoff distance       588 non-null    float64
 3   Peak incident pressure  588 non-null    float64
dtypes: float64(3), object(1)
memory usage: 18.5+ KB


In [4]:
# convert categorical variable into dummy variables
dataset = pd.get_dummies(dataset, columns=['Explosive type'])
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 588 entries, 0 to 587
Data columns (total 5 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Explosive mass                588 non-null    float64
 1   Standoff distance             588 non-null    float64
 2   Peak incident pressure        588 non-null    float64
 3   Explosive type_Composition B  588 non-null    uint8  
 4   Explosive type_TNT            588 non-null    uint8  
dtypes: float64(3), uint8(2)
memory usage: 15.1 KB


In [5]:
dataset.head()

Unnamed: 0,Explosive mass,Standoff distance,Peak incident pressure,Explosive type_Composition B,Explosive type_TNT
0,0.5,1.0,597.46,0,1
1,0.5,1.5,283.258,0,1
2,0.5,2.5,163.904,0,1
3,0.5,3.5,135.678,0,1
4,0.5,4.5,124.039,0,1


In [6]:
y = dataset[('Peak incident pressure')]
X = dataset.drop('Peak incident pressure', axis=1)
print(X.shape, y.shape)

(588, 4) (588,)


In [7]:
# convert to numpy array
X = np.array(X)
y = np.array(y)

### Splitting the dataset into the Training set and Test set

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 71)

In [9]:
X_val, X_test, y_val, y_test = train_test_split(X_test,
                                                y_test,
                                                test_size = 0.5,
                                                random_state = 71)

### Feature scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:,0:2] = sc.fit_transform(X_train[:, 0:2])
print (X_train)

[[ 0.55550177 -0.22203742  1.          0.        ]
 [-0.64547091  0.27937676  1.          0.        ]
 [ 0.36587451  0.78079094  1.          0.        ]
 ...
 [-1.15114363  0.44651482  0.          1.        ]
 [-1.53039816  0.1122387   0.          1.        ]
 [-0.39263456 -1.39200384  0.          1.        ]]


In [11]:
X_test[:,0:2] = sc.transform(X_test[:, 0:2])
print (X_test)

[[-1.53039816 -0.22203742  1.          0.        ]
 [-0.39263456 -0.22203742  1.          0.        ]
 [-0.1397982  -1.39200384  1.          0.        ]
 [-0.64547091 -1.05772772  1.          0.        ]
 [ 0.61871086  0.1122387   0.          1.        ]
 [ 1.5668472  -0.05489936  1.          0.        ]
 [-0.39263456 -1.5591419   1.          0.        ]
 [ 0.61871086  0.27937676  0.          1.        ]
 [ 0.87154722  0.1122387   0.          1.        ]
 [-0.39263456 -0.89058966  0.          1.        ]
 [-0.64547091  1.53291221  0.          1.        ]
 [ 0.87154722 -0.89058966  0.          1.        ]
 [-1.40397998 -1.05772772  1.          0.        ]
 [-1.40397998  0.27937676  1.          0.        ]
 [ 0.80833813 -1.5591419   1.          0.        ]
 [-0.89830727 -0.7234516   0.          1.        ]
 [ 0.80833813 -0.7234516   1.          0.        ]
 [-0.1397982  -0.55631354  1.          0.        ]
 [-0.39263456 -0.05489936  1.          0.        ]
 [-0.64547091 -1.5591419   1.  

In [12]:
X_val[:,0:2] = sc.transform(X_val[:, 0:2])
print (X_val)

[[ 0.55550177  1.53291221  1.          0.        ]
 [-0.39263456 -0.38917548  1.          0.        ]
 [-0.89830727  0.27937676  1.          0.        ]
 [-1.53039816  1.28220512  1.          0.        ]
 [-0.39263456 -1.05772772  0.          1.        ]
 [-1.40397998 -0.05489936  1.          0.        ]
 [-1.40397998 -0.89058966  0.          1.        ]
 [-1.53039816  1.53291221  1.          0.        ]
 [ 0.55550177  0.1122387   1.          0.        ]
 [-1.53039816 -0.7234516   1.          0.        ]
 [-1.40397998  0.78079094  1.          0.        ]
 [-1.53039816  1.53291221  0.          1.        ]
 [-0.1397982  -1.64271093  1.          0.        ]
 [-1.15114363  0.27937676  1.          0.        ]
 [-0.64547091 -0.55631354  0.          1.        ]
 [ 0.80833813  1.11506706  1.          0.        ]
 [ 0.04982906 -0.05489936  1.          0.        ]
 [-0.39263456 -1.22486578  1.          0.        ]
 [-1.40397998  0.44651482  0.          1.        ]
 [-0.64547091 -1.22486578  1.  

### S1 - Hyperparameter tuning - layers, neurons, activation function

In [186]:
import math
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam


# Set seed for NumPy
np.random.seed(71)

# Set seed for TensorFlow
tf.random.set_seed(71)

def FindLayerNodesLinear( last_layer_nodes):
    layers = []
    nodes_increment = (last_layer_nodes - 200)/ 2
    nodes = 200
    for i in range(1, 4):
        layers.append(math.ceil(nodes))
        nodes = nodes + nodes_increment
    
    return layers

In [187]:
def create_model1(last_layer_nodes, activation_func):
    model = Sequential()
    n_nodes = FindLayerNodesLinear(last_layer_nodes)
    for i in range(1, 4):
        if i==1:
            model.add(Dense(units = 200,  input_shape=(X_train.shape[1],), activation=activation_func))
            model.add(Dropout(0.1))
        else:
            model.add(Dense(n_nodes[i-1], activation=activation_func))
            model.add(Dropout(0.1))
            
    #Finally, the output layer should have a single node in binary classification
    model.add(Dense(1, activation='linear'))
    optimizer1 = Adam(learning_rate=0.01)
    model.compile(optimizer = optimizer1, loss = 'mean_squared_error', metrics = ['mae'])
    return model

activation_func = ['relu', 'leaky_relu', 'softplus']
last_layer_nodes = [10, 20, 30, 40, 50,60, 70, 80,90,100,110,120,130, 140, 150,160, 170, 180, 190, 200]

param_grid = dict(model__activation_func = activation_func,model__last_layer_nodes = last_layer_nodes)

##Wrap model into scikit-learn
model1 = KerasRegressor(model=create_model1, verbose=0, epochs = 100, batch_size = 50, random_state = 71)

In [188]:
kf = KFold(n_splits=5, shuffle=True, random_state=71)
grid1 = GridSearchCV(estimator = model1, param_grid= param_grid, n_jobs=-1, scoring = 'r2', cv=kf)
grid_result1 = grid1.fit(X_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result1.best_score_, grid_result1.best_params_))
means = grid_result1.cv_results_['mean_test_score']
stds = grid_result1.cv_results_['std_test_score']
params = grid_result1.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))



Best: 0.958613 using {'model__activation_func': 'relu', 'model__last_layer_nodes': 10}
0.958613 (0.018097) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 10}
0.934811 (0.049576) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 20}
0.934409 (0.028650) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 30}
0.951170 (0.014944) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 40}
0.937980 (0.032260) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 50}
0.946756 (0.022705) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 60}
0.936221 (0.040043) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 70}
0.940796 (0.032936) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 80}
0.938116 (0.027923) with: {'model__activation_func': 'relu', 'model__last_layer_nodes': 90}
0.951490 (0.018479) with: {'model__activation_func': 'relu', 'model__last_layer_nodes

In [189]:
pd.concat([pd.DataFrame(params), pd.DataFrame({'model__first_layer_nodes': [200] * len(params)}),pd.DataFrame(means, columns=['R2'])], axis =1)

Unnamed: 0,model__activation_func,model__last_layer_nodes,model__first_layer_nodes,R2
0,relu,10,200,0.958613
1,relu,20,200,0.934811
2,relu,30,200,0.934409
3,relu,40,200,0.95117
4,relu,50,200,0.93798
5,relu,60,200,0.946756
6,relu,70,200,0.936221
7,relu,80,200,0.940796
8,relu,90,200,0.938116
9,relu,100,200,0.95149


### Merge all files

In [190]:
from pathlib import Path

#Access input folder
input_dir = Path ("Pressure_hyperparameter_tuning_3layers")
print ("1",input_dir)

# Output Excel file
output_excel_file = Path("Pressure_hyperparameter_tuning_3layers/S1_summary_3layers.xlsx")

# List to store DataFrames from CSV files
dfs = []

# Loop through CSV files in the directory
for csv_file in input_dir.glob('*.csv'):
    # Read CSV file into a DataFrame and append to the list
    df = pd.read_csv(csv_file)
    dfs.append(df)

# Concatenate DataFrames in the list along rows
merged_df = pd.concat(dfs, ignore_index=True)

# Write the merged DataFrame to an Excel file
merged_df.to_excel(output_excel_file, index=False)

1 Pressure_hyperparameter_tuning_3layers


In [191]:
# Set seed for NumPy
np.random.seed(71)

# Set seed for TensorFlow
tf.random.set_seed(71)

# Function to create model, required for KerasClassifier
def create_model2():
 # create model
 model = Sequential()
 model.add(Dense(units=60, input_shape=(X_train.shape[1],), activation='relu'))
 #model.add(Dropout(0.1))
 model.add(Dense(units=75, activation='relu'))
 #model.add(Dropout(0.1))
 model.add(Dense(units=90, activation='relu'))
 #model.add(Dropout(0.1))
 model.add(Dense(units=1, activation='linear'))

 return model

In [192]:
# create model
model2 = KerasRegressor(model=create_model2, verbose=0, random_state = 71, loss = 'mean_squared_error', metrics = ['mae'])

# define the grid search parameters


batch_size = [30,40,50]
optimizer = [Adam, Nadam, RMSprop]
learning_rate = [ 0.001,0.01, 0.1]
epochs = [100, 200, 300, 400, 500]

# gridsearch
param_grid2 = dict(batch_size=batch_size, optimizer=optimizer, optimizer__learning_rate = learning_rate, epochs = epochs)
kf = KFold(n_splits=5, shuffle=True, random_state=71)
grid2 = GridSearchCV(estimator=model2, param_grid=param_grid2, n_jobs=-1, scoring = 'r2', cv=kf)
grid_result2 = grid2.fit(X_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result2.best_score_, grid_result2.best_params_))
means = grid_result2.cv_results_['mean_test_score']
stds = grid_result2.cv_results_['std_test_score']
params = grid_result2.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.983931 using {'batch_size': 50, 'epochs': 400, 'optimizer': <class 'keras.src.optimizers.nadam.Nadam'>, 'optimizer__learning_rate': 0.01}
0.709195 (0.165185) with: {'batch_size': 30, 'epochs': 100, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'optimizer__learning_rate': 0.001}
0.960317 (0.013291) with: {'batch_size': 30, 'epochs': 100, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'optimizer__learning_rate': 0.01}
0.835005 (0.066144) with: {'batch_size': 30, 'epochs': 100, 'optimizer': <class 'keras.src.optimizers.adam.Adam'>, 'optimizer__learning_rate': 0.1}
0.711179 (0.162899) with: {'batch_size': 30, 'epochs': 100, 'optimizer': <class 'keras.src.optimizers.nadam.Nadam'>, 'optimizer__learning_rate': 0.001}
0.956524 (0.030609) with: {'batch_size': 30, 'epochs': 100, 'optimizer': <class 'keras.src.optimizers.nadam.Nadam'>, 'optimizer__learning_rate': 0.01}
0.906788 (0.015807) with: {'batch_size': 30, 'epochs': 100, 'optimizer': <class 'keras.src.optimizers.na