In [None]:
import pandas as pd
import numpy as np
import os
from scipy.stats import kurtosis, skew
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.signal import find_peaks, welch
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, mean_squared_error, r2_score,mean_absolute_error, root_mean_squared_error

In [None]:
main_file = "simulation_results/results/Sobol_Simulation_postsurrogate_x_y_h.csv"  
voltage_folder = "simulation_results/results"  
output_file = "merged_dataset_simulation.csv"  

main_df = pd.read_csv(main_file)

combined_data = []

for _, row in main_df.iterrows():
    id = row['<ID>']
    h, x, y = row['h'], row['x'], row['y']
    voltage_file = os.path.join(voltage_folder, f"voltages_{id}.csv")
    
    if os.path.exists(voltage_file):
        voltage_df = pd.read_csv(voltage_file)
        voltage_df['ID'] = id
        voltage_df['height'] = h
        voltage_df['x'] = x
        voltage_df['y'] = y
        voltage_df.rename(columns = {'2':'sensorA', '4':'sensorB', '5':'sensorC', '6':'sensorD'}, inplace = True)
        
        combined_data.append(voltage_df)
    else:
        print(f"File not found: {voltage_file}")

final_df = pd.concat(combined_data, ignore_index=True)

final_df.to_csv(output_file, index=False)

print(f"Combined file saved as {output_file}")

print(len(final_df))
final_df.head()


In [None]:
def extract_features(group):
    features = {}
    sensors = ['sensorA', 'sensorB', 'sensorC', 'sensorD']

    for sensor in sensors:
        data = group[sensor]
        
        features[f'{sensor}_mean'] = data.mean()
        features[f'{sensor}_std'] = data.std()
        features[f'{sensor}_min'] = data.min()
        features[f'{sensor}_max'] = data.max()
        features[f'{sensor}_skew'] = skew(data)
        features[f'{sensor}_kurtosis'] = kurtosis(data)
        features[f'{sensor}_energy'] = np.sum(data**2)


    #features['height'] = group['height'].iloc[0]
    features['x'] = group['x'].iloc[0]
    features['y'] = group['y'].iloc[0]

    return pd.Series(features)

features_df = final_df.groupby('ID').apply(extract_features).reset_index(drop=True)

X = features_df.drop(columns=['x', 'y']) 
y = features_df[['x', 'y']] 

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

print("Training Features Shape:", X_train.shape)
print("Testing Features Shape:", X_test.shape)
print("Training Target Shape:", y_train.shape)
print("Testing Target Shape:", y_test.shape)


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Hyperparameters
n_estimators = 50
max_depth = 8
max_samples = 280  
n_iterations = 10


mse_X = []
mse_Y = []
mae_X = []
mae_Y = []
r2_X = []
r2_Y = []

for i in range(n_iterations):
    print(f"Iteration {i+1}:")
    
    rf = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        max_samples=max_samples,  
        random_state=i  
    )
    
    rf.fit(X_train, y_train)
    
    y_pred = rf.predict(X_test)
    
    mse_x = root_mean_squared_error(y_test['x'], y_pred[:, 0])
    mse_X.append(mse_x)
    
    mse_y = root_mean_squared_error(y_test['y'], y_pred[:, 1])
    mse_Y.append(mse_y)
    
    mae_x = mean_absolute_error(y_test['x'], y_pred[:, 0])
    mae_X.append(mae_x)
    
    mae_y = mean_absolute_error(y_test['y'], y_pred[:, 1])
    mae_Y.append(mae_y)
    
    r2_x = r2_score(y_test['x'], y_pred[:, 0])
    r2_X.append(r2_x)
    
    r2_y = r2_score(y_test['y'], y_pred[:, 1])
    r2_Y.append(r2_y)
    
    print(f"RMSE for x: {mse_x:.4f}, MSE for y: {mse_y:.4f}")
    print(f"MAE for x: {mae_x:.4f}, MAE for y: {mae_y:.4f}")
    print(f"R2 for x: {r2_x:.4f}, R2 for y: {r2_y:.4f}")

average_RMSE_X = np.mean(mse_X)
average_RMSE_Y = np.mean(mse_Y)

average_MAE_X = np.mean(mae_X)
average_MAE_Y = np.mean(mae_Y)

average_R2_X = np.mean(r2_X)
average_R2_Y = np.mean(r2_Y)

print("\nSummary of Metrics:")
print(f"Average RMSE over {n_iterations} iterations for x: {average_RMSE_X:.4f}")
print(f"Average RMSE over {n_iterations} iterations for y: {average_RMSE_Y:.4f}")
print(f"Average MAE over {n_iterations} iterations for x: {average_MAE_X:.4f}")
print(f"Average MAE over {n_iterations} iterations for y: {average_MAE_Y:.4f}")
print(f"Average R2 over {n_iterations} iterations for x: {average_R2_X:.4f}")
print(f"Average R2 over {n_iterations} iterations for y: {average_R2_Y:.4f}")


In [None]:
main_file = "simulation_results/results/Sobol_Simulation_postsurrogate_x_y_h.csv"  

plate_width = 5 * 64  # 320 mm
plate_height = 5 * 63.4  # 317 mm

grid_rows = 5
grid_cols = 5

main_df = pd.read_csv(main_file)

x_coords = main_df['x']
y_coords = main_df['y']
h_values = main_df['h']

plt.figure(figsize=(8, 8)) 
scatter = plt.scatter(
    x_coords, 
    y_coords, 
    c=h_values, 
    cmap="viridis",  
    s=20,     
    edgecolor="black", 
    zorder=5
)

for i in range(1, grid_cols):
    plt.axvline(x=i * 64, color="gray", linestyle="--", zorder=0)
for j in range(1, grid_rows):
    plt.axhline(y=j * 63.4, color="gray", linestyle="--", zorder=0)

plt.xlim(0, plate_width)
plt.ylim(0, plate_height)

cbar = plt.colorbar(scatter)
cbar.set_label("h Values", rotation=270, labelpad=15)

plt.xlabel("X (mm)")
plt.ylabel("Y (mm)")
plt.title("Sensor Locations on a 5x5 Grid Plate with h Gradient")

plt.grid(visible=False) 
plt.gca().set_aspect("equal", adjustable="box") 
plt.show()



In [None]:
block_width = 64  
block_height = 63.4
plate_width = 5 * block_width  
plate_height = 5 * block_height 

corner_blocks = {
    "1_1": {"x_min": 0, "x_max": block_width, "y_min": 0, "y_max": block_height},
    "1_5": {"x_min": plate_width - block_width, "x_max": plate_width, "y_min": 0, "y_max": block_height},
    "5_1": {"x_min": 0, "x_max": block_width, "y_min": plate_height - block_height, "y_max": plate_height},
    "5_5": {"x_min": plate_width - block_width, "x_max": plate_width, "y_min": plate_height - block_height, "y_max": plate_height},
}

def is_in_corner_block(x, y):
    for _, bounds in corner_blocks.items():
        if bounds["x_min"] <= x <= bounds["x_max"] and bounds["y_min"] <= y <= bounds["y_max"]:
            return True
    return False
final_df = pd.read_csv("merged_dataset_simulation.csv")
final_df['is_corner_block'] = final_df.apply(lambda row: is_in_corner_block(row['x'], row['y']), axis=1)
filtered_df = final_df[~final_df['is_corner_block']].drop(columns=['is_corner_block'])
filtered_df.to_csv("simulation_inside_merged.csv")

In [None]:
main_file = "simulation_results/results/Sobol_Simulation_postsurrogate_x_y_h.csv"  

plate_width = 5 * 64  # 320 mm
plate_height = 5 * 63.4  # 317 mm

grid_rows = 5
grid_cols = 5

main_df = pd.read_csv(main_file)

x_coords = filtered_df['x']
y_coords = filtered_df['y']
h_values = filtered_df['height']

plt.figure(figsize=(8, 8)) 
scatter = plt.scatter(
    x_coords, 
    y_coords, 
    c=h_values, 
    cmap="viridis",  
    s=20,     
    edgecolor="black", 
    zorder=5
)

for i in range(1, grid_cols):
    plt.axvline(x=i * 64, color="gray", linestyle="--", zorder=0)
for j in range(1, grid_rows):
    plt.axhline(y=j * 63.4, color="gray", linestyle="--", zorder=0)

plt.xlim(0, plate_width)
plt.ylim(0, plate_height)

cbar = plt.colorbar(scatter)
cbar.set_label("h Values", rotation=270, labelpad=15)

plt.xlabel("X (mm)")
plt.ylabel("Y (mm)")
plt.title("Sensor Locations on a 5x5 Grid Plate with h Gradient")

plt.grid(visible=False) 
plt.gca().set_aspect("equal", adjustable="box") 
plt.show()
