In [1]:
import pandas as pd
from sklearn.utils import resample # Augment small dataset
from scipy.stats import ks_2samp # KS Test for goodness of fit

original_df = pd.read_excel("AM_dataset_v2_ALL.xlsx")

n_original = len(original_df)
n_target = 50 # No. of samples needed
bootstrap_df = pd.DataFrame()

In [2]:
while len(bootstrap_df) < n_target:
    sample = resample(original_df, replace=True, n_samples=n_original, random_state=42)
    bootstrap_df = pd.concat([bootstrap_df, sample], ignore_index=True)

bootstrap_df = bootstrap_df.iloc[:n_target] #Obtain exactly n_target amount

bootstrap_df

Unnamed: 0,E (GPa),EL (%),σy (MPa),σu (MPa),σf' (MPa),b,εf' (%),c
0,77.0,27.21,557,439,52.25,-0.05,1.31,-0.22
1,119.0,3.8,908,1038,2310.0,-0.135,0.03,-0.53
2,70.0,14.1,110,180,291.43,-0.084,0.4766,-0.703
3,153.0,38.0,568,596,714.95,-0.056,0.2413,-0.503
4,77.0,58.8,561,343,87.13,-0.1,2.96,-0.31
5,112.0,3.7,959,1049,4687.0,-0.21,0.0,0.0
6,77.0,27.21,557,439,52.25,-0.05,1.31,-0.22
7,106.0,6.2,1088,1137,2017.5,-0.08428,1.0541,-0.85695
8,78.0,12.0,246,366,585.0,-0.124,0.635,-0.663
9,116.2,17.1,964,1115,1302.0,-0.0509,0.236,-0.582


In [3]:
ks_results = {}

for col in bootstrap_df.columns:
    stat, p_value = ks_2samp(original_df[col], bootstrap_df[col])
    ks_results[col] = {
        "KS_statistic": round(stat, 4),
        "p_value": round(p_value, 4),
        "same_distribution": p_value > 0.05
    }

bootstrap_df.to_excel("AM_dataset_v2_ALL_bootstrap.xlsx", index = False)

print("K-S Test Results (Original vs Bootstrapped):")

for col, result in ks_results.items():
    print(f"{col:10} -> KS_stat={result['KS_statistic']:.4f}, p={result['p_value']:.4f}, "
          f"{'Same' if result['same_distribution'] else 'Different'}")

K-S Test Results (Original vs Bootstrapped):
E (GPa)    -> KS_stat=0.1938, p=0.4652, Same
EL (%)     -> KS_stat=0.1215, p=0.9306, Same
σy (MPa)   -> KS_stat=0.3415, p=0.0270, Different
σu (MPa)   -> KS_stat=0.1508, p=0.7607, Same
σf' (MPa)  -> KS_stat=0.1015, p=0.9844, Same
b          -> KS_stat=0.2062, p=0.3963, Same
εf' (%)    -> KS_stat=0.2677, p=0.1379, Same
c          -> KS_stat=0.3400, p=0.0279, Different
