In [73]:
import pandas as pd
from sklearn.utils import resample # Augment small dataset
from scipy.stats import ks_2samp # KS Test for goodness of fit

original_df = pd.read_excel("tensile_fatigue_data_genel_raw.xlsx")

n_original = len(original_df)
n_target = 200 # No. of samples needed
bootstrap_df = pd.DataFrame()

In [74]:
while len(bootstrap_df) < n_target:
    sample = resample(original_df, replace=True, n_samples=n_original, random_state=42)
    bootstrap_df = pd.concat([bootstrap_df, sample], ignore_index=True)

bootstrap_df = bootstrap_df.iloc[:n_target] #Obtain exactly n_target amount

bootstrap_df

Unnamed: 0,E,σy,σu,RA,BHN,σf',b,εf',c
0,200,1372,1470,38,409,2000,-0.091,0.480,-0.600
1,216,457,771,57,223,1168,-0.097,0.257,-0.464
2,210,795,951,66,300,1100,-0.067,0.996,-0.708
3,205,331,530,72,156,1005,-0.100,0.850,-0.610
4,206,786,889,60,260,1276,-0.071,0.930,-0.650
...,...,...,...,...,...,...,...,...,...
195,200,634,725,65,225,1225,-0.095,1.000,-0.660
196,200,1365,1450,51,410,1860,-0.073,0.600,-0.700
197,200,1896,2035,20,475,2070,-0.082,0.020,-0.770
198,220,493,789,47,229,1326,-0.103,0.602,-0.580


In [75]:
ks_results = {}

for col in bootstrap_df.columns:
    stat, p_value = ks_2samp(original_df[col], bootstrap_df[col])
    ks_results[col] = {
        "KS_statistic": round(stat, 4),
        "p_value": round(p_value, 4),
        "same_distribution": p_value > 0.05
    }

bootstrap_df.to_excel("tensile_fatigue_data_genel_bootstrap.xlsx", index = False)

print("K-S Test Results (Original vs Bootstrapped):")

for col, result in ks_results.items():
    print(f"{col:5} -> KS_stat={result['KS_statistic']:.4f}, p={result['p_value']:.4f}, "
          f"{'Same' if result['same_distribution'] else 'Different'}")

K-S Test Results (Original vs Bootstrapped):
E     -> KS_stat=0.0693, p=0.9393, Same
σy    -> KS_stat=0.0783, p=0.8660, Same
σu    -> KS_stat=0.1149, p=0.4401, Same
RA    -> KS_stat=0.1140, p=0.4499, Same
BHN   -> KS_stat=0.0738, p=0.9066, Same
σf'   -> KS_stat=0.0603, p=0.9817, Same
b     -> KS_stat=0.0981, p=0.6378, Same
εf'   -> KS_stat=0.0647, p=0.9649, Same
c     -> KS_stat=0.1777, p=0.0589, Same
