In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error


In [None]:

# 1. Load data -------------------------------------------------

# Create 10 evenly spaced points between 0 and 2Ï€
x = np.linspace(0, 2*np.pi, 10)

# Generate sinusoidal y values
y = np.sin(x)

# Create DataFrame
df = pd.DataFrame({
    'x': x,
    'y': y
})

print(df)

# features and targets
X_all = df["x"].values.reshape(-1, 1).astype("float32")
y_all = df["y"].values.astype("float32")

# 2. Split into train / val / test ----------------------------
# 60% train, 20% val, 20% test
X_train, X_temp, y_train, y_temp = train_test_split(
    X_all, y_all, test_size=0.4, random_state=40 #try random_state=42 for a difficult distribution
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=40
)


In [None]:
# 3. Build a simple regression NN in Keras ---------------------
def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(1,)),
        tf.keras.layers.Dense(10, activation="relu"),
        tf.keras.layers.Dense(10, activation="relu"),
        tf.keras.layers.Dense(1)  # linear output for regression
    ])
    return model

model = build_model()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="mse"  # mean squared error
)
# 4. Train -----------------------------------------------------
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    verbose=1  # set to 1 if you want progress output
)

# Plot training history
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', linewidth=2)
plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss (MSE)', fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=12, frameon=False)
plt.grid(True, linewidth=0.8, alpha=0.6)
plt.title('Training and Validation Loss', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# 5. Evaluate --------------------------------------------------
y_pred_train = model.predict(X_train).flatten()
y_pred_val   = model.predict(X_val).flatten()
y_pred_test  = model.predict(X_test).flatten()
y_pred_all   = model.predict(X_all).flatten()

x_pred_lin  = np.linspace(0, 2*np.pi, 100).reshape(-1, 1)
y_pred_lin   = model.predict(x_pred_lin).flatten()


mae_train = mean_absolute_error(y_train, y_pred_train)
mae_val   = mean_absolute_error(y_val,   y_pred_val)
mae_test  = mean_absolute_error(y_test,  y_pred_test)

print(f"MAE train: {mae_train:.3f}")
print(f"MAE val:   {mae_val:.3f}")
print(f"MAE test:  {mae_test:.3f}")
# 6. Plot Experimental Data vs Model Data NN ------------------
# Sort by time for a nice continuous line
sort_idx = np.argsort(X_all.flatten())
t_sorted = X_all.flatten()[sort_idx]
y_sorted = y_all[sort_idx]
y_pred_sorted = y_pred_all[sort_idx]
plt.figure(figsize=(10,5))

# Plot training data
plt.plot(X_train.flatten(), y_train,
         label="Training Data",
         marker='o',
         linestyle='',
         markersize=10,
         color='blue')

# Plot validation data
plt.plot(X_val.flatten(), y_val,
         label="Validation Data",
         marker='s',
         linestyle='',
         markersize=10,
         color='green')

# Plot test data
plt.plot(X_test.flatten(), y_test,
         label="Test Data",
         marker='x',
         linestyle='',
         markersize=12,
         color='red')

# Plot model prediction
plt.plot(x_pred_lin, y_pred_lin,
         label="Model Data NN",
         linewidth=2,
         linestyle="--",
         color='black')

plt.xlabel("x [-]", fontsize=12)
plt.ylabel("y [-]", fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=12, frameon=False)
plt.grid(True, linewidth=0.8, alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
# 7. Train 4 instances of the model and store predictions -----
num_models = 4
models = []
all_predictions = []

print("Training multiple model instances...")

for i in range(num_models):
    print(f"Training model {i+1}/{num_models}")
    
    # Create a new instance of the same model architecture
    model_instance = build_model()
    
    model_instance.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss="mae"  # mean absolute error
    )
    
    # Train the model
    history_instance = model_instance.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        verbose=0  # suppress output for cleaner display
    )
    
    # Store the trained model
    models.append(model_instance)
    
    # Generate predictions for all data points
    y_pred_all_instance = model_instance.predict(x_pred_lin, verbose=0).flatten()
    all_predictions.append(y_pred_all_instance)
    
    # Calculate and display MAE for this instance
    y_pred_test_instance = model_instance.predict(X_test, verbose=0).flatten()
    mae_test_instance = mean_absolute_error(y_test, y_pred_test_instance)
    print(f"  Model {i+1} Test MAE: {mae_test_instance:.3f}")

print(f"\nCompleted training {num_models} model instances.")
print(f"Models stored in 'models' list")
print(f"Predictions stored in 'all_predictions' list")
print(f"Shape of each prediction array: {all_predictions[0].shape}")

In [None]:
# 8. Plot individual predictions and calculate variance ----------
# Convert predictions to numpy array for easier manipulation
all_predictions_array = np.array(all_predictions)  # Shape: (num_models, num_data_points)


# Calculate variance across models for each x value
prediction_variance = np.var(all_predictions_array, axis=0)
mean_variance = np.mean(prediction_variance)

# Plot the results
plt.figure(figsize=(12, 8))

# Plot experimental data
plt.plot(X_all, y_all, 
         label="Experimental Data", 
         linewidth=3, 
         color='black', 
         zorder=10)

# Plot individual model predictions
colors = ['red', 'blue', 'green', 'orange']
for i in range(num_models):
    plt.plot(x_pred_lin, all_predictions_array[i], 
             label=f"Model {i+1}", 
             linewidth=2, 
             linestyle="--", 
             color=colors[i],
             alpha=0.8)

# Calculate and plot ensemble mean
ensemble_mean = np.mean(all_predictions_array, axis=0)
plt.plot(x_pred_lin, ensemble_mean, 
         label="Ensemble Mean", 
         linewidth=3, 
         linestyle="-.", 
         color='purple',
         zorder=5)

plt.xlabel("t [s]", fontsize=20)
plt.ylabel("y [-]", fontsize=20)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.legend(fontsize=14, frameon=False)
plt.grid(True, linewidth=0.8, alpha=0.6)
plt.title(f"Individual Model Predictions\nMean Variance: {mean_variance:.6f}", fontsize=16)
plt.tight_layout()
plt.show()

# Print variance statistics
print(f"Variance Statistics:")
print(f"Mean variance across all x values: {mean_variance:.6f}")
print(f"Min variance: {np.min(prediction_variance):.6f}")
print(f"Max variance: {np.max(prediction_variance):.6f}")
print(f"Std of variances: {np.std(prediction_variance):.6f}")

# Show where the highest variance occurs
max_var_idx = np.argmax(prediction_variance)
print(f"\nHighest variance at x = {x_pred_lin[max_var_idx, 0]:.3f}")
print(f"Variance value: {prediction_variance[max_var_idx]:.6f}")
print(f"Predictions at this point: {all_predictions_array[:, max_var_idx]}")

# Calculate ensemble MAE
ensemble_mae_test_idx = np.isin(X_all.flatten(), X_test.flatten())
ensemble_mean_test = ensemble_mean[sort_idx][ensemble_mae_test_idx[sort_idx]]
y_test_sorted_for_ensemble = y_sorted[ensemble_mae_test_idx[sort_idx]]
ensemble_mae = mean_absolute_error(y_test_sorted_for_ensemble, ensemble_mean_test)
print(f"\nEnsemble Test MAE: {ensemble_mae:.3f}")