In [1]:
# Import necessary libraries
import pandas as pd
import keras
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense
from sklearn import preprocessing

from joblib import Parallel, delayed

In [None]:
# Load dataset and separate features and target variable
df = pd.read_csv('concrete_data.csv')  # Read the dataset from a CSV file

# Normalize the entire dataset
d = preprocessing.normalize(df)  # Apply normalization to scale the data between 0 and 1
scaled_df = pd.DataFrame(d, columns=df.columns)  # Convert the normalized data back into a DataFrame

# Split the normalized data into features (X) and target variable (y)
X = scaled_df[scaled_df.columns[:-1]]  # Extract all columns except the last one as features
y = scaled_df[scaled_df.columns[-1:]]  # Extract the last column as the target variable

In [3]:
# Define input and hidden layer sizes
input_neurons = df.columns[:-1].shape[0]  # Number of input features
hidden_layer = 50  # Number of neurons in the hidden layer

# Define regression model
def regression_model():
    # Create the neural network model
    model = Sequential()
    model.add(Dense(hidden_layer, activation='relu', input_shape=(input_neurons,)))  # Hidden layer with ReLU activation
    model.add(Dense(1))  # Output layer with a single neuron for regression
    
    # Compile the model with Adam optimizer and mean squared error loss function
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


In [11]:
# Define training parameters
epochs = 100         # Number of epochs to train the model
test_size = 0.3     # Proportion of the dataset reserved for testing
n_repeats = 50      # Number of times the model will be trained and evaluated

# Initialize a list to store mean squared errors from each repeat
mse_list = [0] * n_repeats


In [12]:
# Build the regression model
model = regression_model()

# Function to compute the mean squared error (MSE) for an iteration
def mean_and_standardDeviation_of_MSE(iteration):
    print(f"Iteration: {iteration}")
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=iteration)
    
    # Train the model
    model = regression_model()  # Create a fresh model for each iteration
    model.fit(X_train, y_train, epochs=epochs, verbose=0, batch_size=32)
    
    # Predict on the test data
    y_pred = model.predict(X_test).flatten()
    
    # Calculate mean squared error
    mse = mean_squared_error(y_test, y_pred)
    return mse

In [6]:
# Execute the function in parallel and save the results
results = Parallel(n_jobs=-1)(delayed(mean_and_standardDeviation_of_MSE)(i) for i in range(n_repeats))

In [7]:
# Compute the mean and standard deviation of the results
mse_mean = np.mean(results)
mse_std = np.std(results)

# Display the results
mse_mean, mse_std

(5.023929223268045e-05, 1.1253402227062306e-05)

In [13]:
# Execute the function in parallel and save the results
results = Parallel(n_jobs=-1)(delayed(mean_and_standardDeviation_of_MSE)(i) for i in range(n_repeats))

In [14]:
# Compute the mean and standard deviation of the results
mse_mean = np.mean(results)
mse_std = np.std(results)

# Display the results
mse_mean, mse_std

(4.2714098144042946e-05, 8.741100135632135e-06)