# This code is intended solely for testing purposes and is not to be used for any commercial activities.

## 产生数据

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

# 高斯函数定义，添加浓度参数
def gaussian(x, A, cen, sigma):
    ln2 = np.log(2)
    PI = np.pi
    amplitude = A  # 振幅乘以浓度
    return (amplitude * np.sqrt(4 * ln2) / (sigma * np.sqrt(PI))) * np.exp(-4 * ln2 * (x - cen)**2 / (sigma**2))


"""
def generate_data(num_samples, num_peaks, area, center, sigma, noise_level):
    # 生成数据容器
    X_data = []
    y_data = []
    labels = []
    x = np.linspace(0, 100, 100) 

    for _ in range(num_samples):
        # x轴范围和点数

        # 随机选择多个浓度
        #concentrations = np.random.choice(concentration_list, num_peaks)

        # 叠加多个高斯峰
        y_total = np.zeros_like(x)
        label = []
        for area in area:
            for center in center:
                y_total += gaussian(x, area, center, sigma)
                label.append({
                    "area": area, 
                    "center": center, 
                    "sigma": sigma, 
                    "noise_level": noise_level
                })
        
        # 加入噪声
        y_total += np.random.normal(0,  noise_level, x.shape)
    
        # 添加到数据容器中
        X_data.append(x)
        y_data.append(y_total)
        labels.append(label)
    
    return np.array(X_data), np.array(y_data), labels
"""

def generate_data(areas, centers, sigma, noise_level):
    X_data = []
    y_data = []
    labels = []
    x = np.linspace(0, 100, 100)  # x轴范围和点数

    # 生成所有可能的峰组合

    combinations = list(itertools.product(areas, repeat=len(centers)))

    for combination in combinations:
        y_total = np.zeros_like(x)
        label = []
        for area, center in zip(combination, centers):
            y_total += gaussian(x, area, center, sigma)
            label.append({
                "area": area, 
                "center": center, 
                "sigma": sigma, 
                "noise_level": noise_level
            })
        
        # 加入噪声
        y_total += np.random.normal(0, noise_level, x.shape)
    
        # 添加到数据容器中
        X_data.append(x)
        y_data.append(y_total)
        labels.append(label)
    
    return np.array(X_data), np.array(y_data), labels

"""
def generate_data(areas, centers, sigma, noise_level):
    X_data = []
    y_data = []
    labels = []
    x = np.linspace(0, 100, 100)  # x轴范围和点数

    # 指定10组较为合理的组合
    combinations = [
        [(30, 20), (100, 30), (60, 40), (30, 60), (100, 70)]  # 混合组合
    ]
    
    for sigma_test in sigma:
        for combination in combinations:
            y_total = np.zeros_like(x)
            label = []
            for area_center in combination:
                    y_total += gaussian(x, area_center[0], area_center[1], sigma_test)
                    label.append({
                        "area": area_center[0], 
                        "center": area_center[1], 
                        "sigma": sigma_test, 
                        "noise_level": noise_level
                    })

        # 加入噪声
        y_total += np.random.normal(0, noise_level, x.shape)
    
        # 添加到数据容器中
        X_data.append(x)
        y_data.append(y_total)
        labels.append(label)
    
    return np.array(X_data), np.array(y_data), labels
"""



'\ndef generate_data(areas, centers, sigma, noise_level):\n    X_data = []\n    y_data = []\n    labels = []\n    x = np.linspace(0, 100, 100)  # x轴范围和点数\n\n    # 指定10组较为合理的组合\n    combinations = [\n        [(30, 20), (100, 30), (60, 40), (30, 60), (100, 70)]  # 混合组合\n    ]\n    \n    for sigma_test in sigma:\n        for combination in combinations:\n            y_total = np.zeros_like(x)\n            label = []\n            for area_center in combination:\n                    y_total += gaussian(x, area_center[0], area_center[1], sigma_test)\n                    label.append({\n                        "area": area_center[0], \n                        "center": area_center[1], \n                        "sigma": sigma_test, \n                        "noise_level": noise_level\n                    })\n\n        # 加入噪声\n        y_total += np.random.normal(0, noise_level, x.shape)\n    \n        # 添加到数据容器中\n        X_data.append(x)\n        y_data.append(y_total)\n        labels.append(la

In [14]:
"""
#Test

# 参数设置
Training_area = [0,30,60,100]
Training_center = [20, 30, 40, 60, 70]
sigma = 8
noise_level = 1 / 300

X_data, y_data, labels = generate_data(Training_area, Training_center, sigma, noise_level)
print(X_data.shape)


# 分别绘制每组数据
for i in range(len(X_data)):  # 绘制所有组合的数据
    plt.figure()
    plt.plot(X_data[i], y_data[i])
    plt.title(f"Combination {i+1}")
    for lbl in labels[i]:
        print(f"Area={lbl['area']}, Center={lbl['center']}, Sigma={lbl['sigma']}, Noise Level={lbl['noise_level']}")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.grid(True)
    plt.show()


"""

'\n#Test\n\n# 参数设置\nTraining_area = [0,30,60,100]\nTraining_center = [20, 30, 40, 60, 70]\nsigma = 8\nnoise_level = 1 / 300\n\nX_data, y_data, labels = generate_data(Training_area, Training_center, sigma, noise_level)\nprint(X_data.shape)\n\n\n# 分别绘制每组数据\nfor i in range(len(X_data)):  # 绘制所有组合的数据\n    plt.figure()\n    plt.plot(X_data[i], y_data[i])\n    plt.title(f"Combination {i+1}")\n    for lbl in labels[i]:\n        print(f"Area={lbl[\'area\']}, Center={lbl[\'center\']}, Sigma={lbl[\'sigma\']}, Noise Level={lbl[\'noise_level\']}")\n    plt.xlabel("X")\n    plt.ylabel("Y")\n    plt.grid(True)\n    plt.show()\n\n\n'

## 产生数据并存入excel

In [15]:
import os
def generate_data_set(area, center, sigma, noise_level,file_name="gaussian_peaks.xlsx"):
    
    # 删除之前的数据
    if os.path.exists(file_name):
        os.remove(file_name)

    # 生成数据
    X_data, y_data, labels = generate_data(area, center, sigma, noise_level)

    # 查看前三个样本
    for i in range(3):  
        print(f"Labels for Sample {i+1}:")
        for j, label in enumerate(labels[i]):
            print(f"  Peak {j+1}: {label}")

    # 导出数据到Excel
    max_rows_per_sheet = 1048576  # Excel单个工作表的最大行数
    rows_per_sample = 100  # 每个样本的行数
    samples_per_sheet = max_rows_per_sheet // rows_per_sample

    # 转换元数据为DataFrame
    meta_data_list = []
    for i in range(len(labels)):
        sample_number = i + 1
        for j, label in enumerate(labels[i]):
            meta_data_list.append({
                "Sample": sample_number,
                "Peak": j + 1,
                "Area": label["area"],
                "Center": label["center"],
                "Sigma": label["sigma"],
                "Noise Level": label["noise_level"]
            })

    df_meta = pd.DataFrame(meta_data_list)

    # 转换实际数据为DataFrame
    data_list = []
    for i in range(len(X_data)):
        sample_number = i + 1
        for x_value, y_value in zip(X_data[i], y_data[i]):
            data_list.append({
                "Sample": sample_number,
                "X": x_value,
                "Y": y_value
            })

    df_data = pd.DataFrame(data_list)

    # 将数据分成多个工作表
    with pd.ExcelWriter(file_name) as writer:
        df_meta.to_excel(writer, sheet_name="Metadata", index=False)
        
        for start in range(0, len(X_data), samples_per_sheet):
            end = min(start + samples_per_sheet, len(X_data))
            df_data_subset = df_data[df_data["Sample"].between(start + 1, end)]
            df_data_subset.to_excel(writer, sheet_name=f"Data_{start // samples_per_sheet + 1}", index=False)

    print(f"Data successfully exported to {file_name}.xlsx")

    # 自动打开Excel文件
    #os.system("start EXCEL.EXE gaussian_peaks.xlsx")

    return X_data, y_data, labels



In [16]:
# Training set
Training_area = [0,30,60,100] 
Training_center = [20,30,40,60,70]
sigma =  8   # 固定半高全宽
noise_level = 1/300  # 噪声水平

# Test set
Test_area = [0,10,90,100] #
Test_center = [20,30,40,60,70]  
sigma = 8    # 固定半高全宽
noise_level = 1/300  # 噪声水平

def test_combinations(Training_area, Training_center, sigma, noise_level):
    # 生成所有可能的峰组合
    combinations = list(itertools.product(Training_area, repeat=len(Training_center)))
    x = np.linspace(0, 100, 100)  # x轴范围和点数

    for combination in combinations:
        y_total = np.zeros_like(x)
        label = []
        for area, center in zip(combination, Training_center):
            y_total += gaussian(x, area, center, sigma)
            label.append({
                "area": area, 
                "center": center, 
                "sigma": sigma, 
                "noise_level": noise_level
            })
    
    # 打印所有生成的组合
    for combination in combinations:
        print(combination)
#test_combinations(Training_area, Training_center, sigma, noise_level)

#X_data, y_data, labels = generate_data_set(Training_area, Training_center, sigma, noise_level,file_name="gaussian_peaks.xlsx")


"""
# 分别画出后10个样本
for i in range(10):
    plt.plot(X_data[i], y_data[i])
    plt.title(f"Generated Data with Peaks {i+1}")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.show()
"""




'\n# 分别画出后10个样本\nfor i in range(10):\n    plt.plot(X_data[i], y_data[i])\n    plt.title(f"Generated Data with Peaks {i+1}")\n    plt.xlabel("X")\n    plt.ylabel("Y")\n    plt.show()\n'

## 筛选出类别1

In [17]:
def normal_peak_picking(x, y_total, user_scale, noise_level,peak_diag=2):
    """
    if os.path.exists(file_name):
        os.remove(file_name)
    """

    min_intensity = noise_level * user_scale
    print(f"Minimal peak intensity is set to {min_intensity}")

    all_p1, all_p2, all_p_type = [], [], []
    count_finish = 0

    for row_x, row_y in zip(x, y_total):
        count_class1 = 0
    
        p1, p2, p_type = [], [], []
        for i in range(1, len(row_x) - 1):
            if row_y[i] > row_y[i - 1] and row_y[i] > row_y[i + 1] and row_y[i] > min_intensity:
                ndiag = 0
                if i > 1 and row_y[i] > row_y[i - 2]:
                    ndiag += 1
                if i < len(row_x) - 2 and row_y[i] > row_y[i + 2]:
                    ndiag += 1
                if i > 1 and row_y[i] > row_y[i - 2]:
                    ndiag += 1
                if i < len(row_x) - 2 and row_y[i] > row_y[i + 2]:
                    ndiag += 1
                if ndiag >= peak_diag:
                    p1.append(row_x[i])
                    p2.append(row_y[i])
                    p_type.append(1)
                    count_class1 += 1
        if count_class1 == 5:
            count_finish += 1
        all_p1.append(p1)
        all_p2.append(p2)
        all_p_type.append(p_type)

    #print(f"Picked peaks for {len(y_total)} rows")


    """
    with pd.ExcelWriter(file_name) as writer:
        for idx, (p1_row, p2_row, p_type_row) in enumerate(zip(all_p1, all_p2, all_p_type)):
            if p1_row and p2_row and p_type_row:  # Ensure there are peaks to write and lists are not empty
                df = pd.DataFrame({
                    'X': p1_row,
                    'Y': p2_row,
                    'Type': p_type_row
                })
                df.to_excel(writer, sheet_name=f'Sheet{idx+1}', index=False)
            else:
                print(f"No peaks found in row {idx+1}")


    print(f"Data successfully exported to {file_name}")
    """
        
    #os.system(f"start EXCEL.EXE {file_name}")  

    print(f"Class1 has been successfully classified")
        
    return all_p1, all_p2, all_p_type, count_finish

In [18]:
"""
## test normal_peak_picking
X_data, y_data, labels = generate_data_set(Training_area, Training_center, sigma, noise_level,file_name="gaussian_peaks.xlsx")
p1, p2, p_type,count_finish = normal_peak_picking(X_data, y_data, user_scale, noise_level)
print(count_finish)
"""

'\n## test normal_peak_picking\nX_data, y_data, labels = generate_data_set(Training_area, Training_center, sigma, noise_level,file_name="gaussian_peaks.xlsx")\np1, p2, p_type,count_finish = normal_peak_picking(X_data, y_data, user_scale, noise_level)\nprint(count_finish)\n'

## 筛选出类别2

In [19]:
from scipy.ndimage import gaussian_laplace

def get_median_width_x(sigmas):
    """Calculate the median width of the Gaussian peaks."""
    return np.median(sigmas)


def laplacing_of_gaussian_convolution(data, sigma):
    """Apply Laplacian of Gaussian convolution to 2D data."""
    return gaussian_laplace(data, sigma=sigma)

def shoulder_peak_picking(x, y_total, user_scale, noise_level, median_width_x, peak_diag=2):
    """
    if os.path.exists(file_name):
        os.remove(file_name)
    """

    min_intensity = noise_level * user_scale
    print(f"Minimal peak for shoulder intensity is set to {min_intensity}")

    all_p1, all_p2, all_p_type = [], [], []

    for row_x, row_y in zip(x, y_total):
        xdim = len(row_x)
        nshoulder1 = int(median_width_x / 2)
        #print(f"In shoulder peaks picking, nshoulder is {nshoulder1}")

        peak_map = np.zeros(xdim, dtype=int)
        p1, p2, p_type = [], [], []

        shoulder = laplacing_of_gaussian_convolution(row_y, median_width_x / np.sqrt(2))

        for i in range(1, xdim - 1):
            if (shoulder[i] > shoulder[i - 1] and shoulder[i] > shoulder[i + 1] and 
                row_y[i] > min_intensity and peak_map[i] == 0):
                
                ndiag = 0
                if i > 1 and shoulder[i] > shoulder[i - 2]:
                    ndiag += 1
                if i < xdim - 2 and shoulder[i] > shoulder[i + 2]:
                    ndiag += 1
                if i > 1 and shoulder[i] > shoulder[i - 2]:
                    ndiag += 1
                if i < xdim - 2 and shoulder[i] > shoulder[i + 2]:
                    ndiag += 1

                if ndiag >= peak_diag:
                    p1.append(row_x[i])
                    p2.append(row_y[i])
                    p_type.append(2)

        all_p1.append(p1)
        all_p2.append(p2)
        all_p_type.append(p_type)

    #print(f"Picked shoulder peaks for {len(y_total)} rows")
    """
    with pd.ExcelWriter(file_name) as writer:
        for idx, (p1_row, p2_row, p_type_row) in enumerate(zip(all_p1, all_p2, all_p_type)):
            if p1_row and p2_row and p_type_row:  # Ensure there are peaks to write and lists are not empty
                df = pd.DataFrame({
                    'X': p1_row,
                    'Y': p2_row,
                    'Type': p_type_row
                })
                df.to_excel(writer, sheet_name=f'Sheet{idx+1}', index=False)
            else:
                print(f"No peaks found in row {idx+1}")

   
    """

    #os.system(f"start EXCEL.EXE {file_name}")
    print(f"Class2 has been successfully classified")

    return all_p1, all_p2, all_p_type

In [20]:
def save_peaks_to_excel(writer, sheet_name, x, y_total, class1_results, class2_results):
    class1_count = 0
    class2_count = 0
    count_all = 0

    # 创建空的训练数据和目标标签列表
    train_x = []
    train_y = []
    
    for idx, (row_x, row_y) in enumerate(zip(x, y_total)):
        
        p1_class1, p2_class1, p_type_class1,count_all_class1= class1_results
        p1_class2, p2_class2, p_type_class2 = class2_results
        
        # 初始化所有点为 class 0
        classes = np.zeros_like(row_y, dtype=int)

        # 将已分类的点标记为 class 1
        class1_present = False
        for px, py, pt in zip(p1_class1[idx], p2_class1[idx], p_type_class1[idx]):
            idx_x = np.where(row_x == px)[0]
            if len(idx_x) > 0:
                classes[idx_x[0]] = pt
                class1_present = True
        
        if class1_present:
            class1_count += 1

        train_x.append(list(row_x))
        train_y.append(list(classes))

        # 将已分类的点标记为 class 2
        class2_present = False
        for px, py, pt in zip(p1_class2[idx], p2_class2[idx], p_type_class2[idx]):
            idx_x = np.where(row_x == px)[0]
            if len(idx_x) > 0:
                classes[idx_x[0]] = pt
                class2_present = True
        
        if class2_present:
            class2_count+= 1

        df = pd.DataFrame({
            'X': row_x,
            'Y': row_y,
            'Class': classes
        })
        df.to_excel(writer, sheet_name=f'{sheet_name}_{idx+1}', index=False)
        count_all += 1

    return class1_count,class2_count,count_all,train_x, train_y

##  Find user_scale and  create training and test set

In [21]:
# Training set
Training_area = [0,30,60,100] 
Training_center = [20,30,40,60,70]
sigma =  8   # 固定半高全宽
noise_level = 1/300  # 噪声水平

# Test set
Test_area = [0,10,90,100] #
Test_center = [20,30,40,60,70]  
sigma = 8    # 固定半高全宽
noise_level = 1/300  # 噪声水平

def test_combinations(Training_area, Training_center, sigma, noise_level):
    # 生成所有可能的峰组合
    combinations = list(itertools.product(Training_area, repeat=len(Training_center)))
    x = np.linspace(0, 100, 100)  # x轴范围和点数

    for combination in combinations:
        y_total = np.zeros_like(x)
        label = []
        for area, center in zip(combination, Training_center):
            y_total += gaussian(x, area, center, sigma)
            label.append({
                "area": area, 
                "center": center, 
                "sigma": sigma, 
                "noise_level": noise_level
            })
    
    # 打印所有生成的组合
    for combination in combinations:
        print(combination)
#test_combinations(Training_area, Training_center, sigma, noise_level)

# Training set
X_data, y_data, labels = generate_data_set(Training_area, Training_center, sigma, noise_level,file_name="gaussian_peaks_train.xlsx")
train_user_scale = np.max(y_data)  # 用户缩放因子


# Test set
X_test_data, y_test_data, test_labels = generate_data_set(Test_area, Test_center, sigma, noise_level,file_name="gaussian_peaks_test.xlsx")
test_user_scale = np.max(y_test_data)  # 用户缩放因子



"""
# 分别画出后10个样本
for i in range(10):
    plt.plot(X_data[i], y_data[i])
    plt.title(f"Generated Data with Peaks {i+1}")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.show()
"""

Labels for Sample 1:
  Peak 1: {'area': 0, 'center': 20, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 2: {'area': 0, 'center': 30, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 3: {'area': 0, 'center': 40, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 4: {'area': 0, 'center': 60, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 5: {'area': 0, 'center': 70, 'sigma': 8, 'noise_level': 0.0033333333333333335}
Labels for Sample 2:
  Peak 1: {'area': 0, 'center': 20, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 2: {'area': 0, 'center': 30, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 3: {'area': 0, 'center': 40, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 4: {'area': 0, 'center': 60, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 5: {'area': 30, 'center': 70, 'sigma': 8, 'noise_level': 0.0033333333333333335}
Labels for Sample 3:
  Peak 1: {'area': 0, 'center': 20, 'sigma': 8, 'noise_level': 0.00333333333

'\n# 分别画出后10个样本\nfor i in range(10):\n    plt.plot(X_data[i], y_data[i])\n    plt.title(f"Generated Data with Peaks {i+1}")\n    plt.xlabel("X")\n    plt.ylabel("Y")\n    plt.show()\n'

In [22]:
#test shoulder_peak_picking
median_width_x = get_median_width_x(sigma)
#print(y_data.shape)
class1_results = normal_peak_picking(X_data, y_data, train_user_scale, noise_level)
class2_results = shoulder_peak_picking(X_data, y_data,train_user_scale, noise_level,median_width_x)

if os.path.exists("Train_Classification.xlsx"):
    os.remove("Train_Classification.xlsx")

with pd.ExcelWriter("Train_Classification.xlsx") as writer:
    count1,count2,count_all,train_x,train_y = save_peaks_to_excel(writer, "Peak", X_data, y_data, class1_results, class2_results)
    
#os.system("start EXCEL.EXE Classification.xlsx")
print(count1)
print(count2)
print(count_all)
print(len(train_x))
print(len(train_y))


Minimal peak intensity is set to 0.040069446877984104
Class1 has been successfully classified
Minimal peak for shoulder intensity is set to 0.040069446877984104
Class2 has been successfully classified
1023
972
1024
1024
1024


## 产生训练数据


In [23]:
import pandas as pd

def create_training_set(train_x, train_y, file_name="train_data.xlsx"):

    if os.path.exists(file_name):
        os.remove(file_name)

    with pd.ExcelWriter(file_name) as writer:
        for i, (x, y) in enumerate(zip(train_x, train_y)):
            # 将 x 和 y 转换为 pandas DataFrame
            df = pd.DataFrame({
                'X': x,
                'Y': y
            })

            # 将 DataFrame 写入一个新的工作表
            df.to_excel(writer, sheet_name=f'Sample_{i+1}', index=False)



creation = create_training_set(train_x, train_y, file_name="train_data.xlsx")

## 产生测试数据

In [24]:
#test shoulder_peak_picking
X_test_data, y_test_data, test_labels = generate_data_set(Test_area, Test_center, sigma, noise_level,file_name="gaussian_peaks_test.xlsx")

median_width_x = get_median_width_x(sigma)
#print(y_data.shape)
test_class1_results = normal_peak_picking(X_test_data, y_test_data, test_user_scale, noise_level)
test_class2_results = shoulder_peak_picking(X_test_data, y_data,test_user_scale, noise_level,median_width_x)

if os.path.exists("Test_Classification.xlsx"):
    os.remove("Test_Classification.xlsx")

with pd.ExcelWriter("Test_Classification.xlsx") as writer:
    count1,count2,count_all,test_x,test_y = save_peaks_to_excel(writer, "Peak", X_test_data, y_test_data, test_class1_results , test_class2_results)
    
#os.system("start EXCEL.EXE Classification.xlsx")
print(count1)
print(count2)
print(count_all)
print(len(test_x))
print(len(test_y))

Labels for Sample 1:
  Peak 1: {'area': 0, 'center': 20, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 2: {'area': 0, 'center': 30, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 3: {'area': 0, 'center': 40, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 4: {'area': 0, 'center': 60, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 5: {'area': 0, 'center': 70, 'sigma': 8, 'noise_level': 0.0033333333333333335}
Labels for Sample 2:
  Peak 1: {'area': 0, 'center': 20, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 2: {'area': 0, 'center': 30, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 3: {'area': 0, 'center': 40, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 4: {'area': 0, 'center': 60, 'sigma': 8, 'noise_level': 0.0033333333333333335}
  Peak 5: {'area': 10, 'center': 70, 'sigma': 8, 'noise_level': 0.0033333333333333335}
Labels for Sample 3:
  Peak 1: {'area': 0, 'center': 20, 'sigma': 8, 'noise_level': 0.00333333333

In [25]:
def create_test_set(test_x, test_y, file_name="test_data.xlsx"):

    if os.path.exists(file_name):
        os.remove(file_name)

    with pd.ExcelWriter(file_name) as writer:
        for i, (x, y) in enumerate(zip(test_x, test_y)):
            # 将 x 和 y 转换为 pandas DataFrame
            df = pd.DataFrame({
                'X': x,
                'Y': y
            })

            # 将 DataFrame 写入一个新的工作表
            df.to_excel(writer, sheet_name=f'Sample_{i+1}', index=False)

creation = create_test_set(test_x, test_y, file_name="test_data.xlsx")