In [22]:
import pandas as pd
from sklearn.utils import resample # Augment small dataset
from scipy.stats import ks_2samp # KS Test for goodness of fit

original_df = pd.read_excel("tensile_fatigue_data_ong_raw.xlsx")

n_original = len(original_df)
n_target = 300 # No. of samples needed
bootstrap_df = pd.DataFrame()

In [23]:
while len(bootstrap_df) < n_target:
    sample = resample(original_df, replace=True, n_samples=n_original, random_state=42)
    bootstrap_df = pd.concat([bootstrap_df, sample], ignore_index=True)

bootstrap_df = bootstrap_df.iloc[:n_target] #Obtain exactly n_target amount

bootstrap_df

Unnamed: 0,BHN,σu,True Fracture σ,RA,True Fracture Strain,E,σf',b,εf',c
0,350,1240,1655,57,0.84,195,1655,-0.076,0.73,-0.062
1,310,1060,1117,29,0.35,200,1450,-0.100,0.22,-0.051
2,225,620,1048,60,0.93,200,1540,-0.140,0.61,-0.057
3,280,1000,1220,33,0.41,195,1220,-0.073,0.41,-0.060
4,430,1640,1779,38,0.89,195,1780,-0.067,0.32,-0.056
...,...,...,...,...,...,...,...,...,...,...
295,310,1060,1117,29,0.35,200,1450,-0.100,0.22,-0.051
296,225,620,1048,60,0.93,200,1540,-0.140,0.61,-0.057
297,280,1000,1220,33,0.41,195,1220,-0.073,0.41,-0.060
298,430,1640,1779,38,0.89,195,1780,-0.067,0.32,-0.056


In [24]:
ks_results = {}

for col in bootstrap_df.columns:
    stat, p_value = ks_2samp(original_df[col], bootstrap_df[col])
    ks_results[col] = {
        "KS_statistic": round(stat, 4),
        "p_value": round(p_value, 4),
        "same_distribution": p_value > 0.05
    }

bootstrap_df.to_excel("tensile_fatigue_data_ong_bootstrap.xlsx", index = False)

print("K-S Test Results (Original vs Bootstrapped):")

for col, result in ks_results.items():
    print(f"{col:20} -> KS_stat={result['KS_statistic']:.4f}, p={result['p_value']:.4f}, "
          f"{'Same' if result['same_distribution'] else 'Different'}")

K-S Test Results (Original vs Bootstrapped):
BHN                  -> KS_stat=0.0641, p=0.9898, Same
σu                   -> KS_stat=0.0641, p=0.9898, Same
True Fracture σ      -> KS_stat=0.1040, p=0.7078, Same
RA                   -> KS_stat=0.0861, p=0.8830, Same
True Fracture Strain -> KS_stat=0.1049, p=0.6981, Same
E                    -> KS_stat=0.0861, p=0.8836, Same
σf'                  -> KS_stat=0.0832, p=0.9053, Same
b                    -> KS_stat=0.1222, p=0.5109, Same
εf'                  -> KS_stat=0.0637, p=0.9904, Same
c                    -> KS_stat=0.1069, p=0.6758, Same
