# This code is intended solely for testing purposes and is not to be used for any commercial activities.

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


## 产生数据

In [12]:
import numpy as np
import itertools

def gaussian(x, A, cen, sigma):
    ln2 = np.log(2)
    PI = np.pi
    amplitude = A  # 振幅乘以浓度
    return (amplitude * np.sqrt(4 * ln2) / (sigma * np.sqrt(PI))) * np.exp(-4 * ln2 * (x - cen)**2 / (sigma**2))



def generate_data(centers, areas,sigma,noise_level,x_size=100):
    X_data = []
    y_classification = []
    y_regression = []
    
    # 生成所有可能的area组合
    area_combinations = list(itertools.product(areas, repeat=len(centers)))

    for area_comb in area_combinations:
        # 创建一个长度为x_size的全零数组作为基础的Ydata
        y_base = np.zeros(x_size)
        # 记录area作为回归任务的参数
        y_reg = []
        
        # 创建一个长度为x_size的全零数组作为基础的Xdata
        x_base = np.zeros(x_size)

        # 对于每个center和对应的area
        for center, area in zip(centers, area_comb):
            # 如果area不为0，将center位置的Ydata值设为1
            if area != 0:
                if center - 1 < 0:
                    y_base[center] = 1
                else:
                    y_base[center-1] = 1
            y_reg.append(area)
            
            # 使用gaussian函数生成y值
            y_gaussian = gaussian(np.arange(x_size), area, center, sigma) 
            y_gaussian += np.random.normal(0, noise_level, x_size)
            # 将生成的y值加到x_base上
            x_base += y_gaussian

        # 将基础的Ydata添加到Ydata列表中
        y_classification.append(y_base.tolist())
        # 记录area作为回归任务的参数
        y_regression.append(y_reg)

        # X_data是一系列的索引值，从0到x_size-1
        X_data.append(x_base.tolist())

    return np.array(X_data), np.array(y_classification), np.array(y_regression)

# 测试数据生成函数
centers = [20,30,40,60,70]
areas = [0,5,40,60,95,100]
noise_level = 0.1
sigma = 8
X_data, y_classification, y_regression = generate_data(centers, areas, sigma,noise_level, x_size=100)
print(f'X_data shape: {X_data.shape}')
print(f'y_classification shape: {y_classification.shape}')
print(f'y_regression shape: {y_regression.shape}')


print(y_regression[45])

X_data shape: (7776, 100)
y_classification shape: (7776, 100)
y_regression shape: (7776, 5)
[ 0  0  5  5 60]


In [9]:
import pandas as pd
import os
import concurrent.futures

def generate_data_frame(x, y):
    # 将每个样本的数据转换为DataFrame
    df_X = pd.DataFrame(x, columns=['X'])
    df_y_classification = pd.DataFrame(y[0], columns=['y_classification'])
    df_y_regression = pd.DataFrame(y[1], columns=['y_regression'])
    
    # 将数据写入一个新的Excel工作表
    df = pd.concat([df_X, df_y_classification, df_y_regression], axis=1)
    
    return df

def create_set(train_x, train_y, file_name="train_data.xlsx"):
    if os.path.exists(file_name):
        os.remove(file_name)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        # 生成所有需要写入的数据
        data_to_write = list(executor.map(generate_data_frame, train_x, train_y))

    with pd.ExcelWriter(file_name, engine='xlsxwriter') as writer:
        for i, df in enumerate(data_to_write):
            df.to_excel(writer, sheet_name=f'Sample_{i+1}', index=False)

    print(f"Data successfully exported to {file_name}")

"""
# 示例调用
centers = [20,30,40,60,70]
areas = [0,30,60,100,0,0]
X_data, y_classification, y_regression = generate_data(centers, areas)

train_x = [X_data[i] for i in range(X_data.shape[0])]
train_y = [[y_classification[i], y_regression[i]] for i in range(y_classification.shape[0])]

create_training_set(train_x, train_y, file_name="train_data.xlsx")
"""

'\n# 示例调用\ncenters = [20,30,40,60,70]\nareas = [0,30,60,100,0,0]\nX_data, y_classification, y_regression = generate_data(centers, areas)\n\ntrain_x = [X_data[i] for i in range(X_data.shape[0])]\ntrain_y = [[y_classification[i], y_regression[i]] for i in range(y_classification.shape[0])]\n\ncreate_training_set(train_x, train_y, file_name="train_data.xlsx")\n'

## 产生数据并存入excel

##  Find user_scale and  create training and test set

In [10]:
# Training set
Training_area = [0,1,5,50,95,500,800]
Training_center = [20,30,40,60,70]
sigma =  8   # 固定e半高全宽
noise_level = 1/300  # 噪声水平

# Test set
Test_area =  [0,2,30,60,750] 
Test_center = [20,30,40,60,70]  
sigma = 8    # 固定半高全宽
noise_level = 1/300  # 噪声水平


# Training set
X_data, y_classification, y_regression = generate_data(Training_center, Training_area,noise_level, sigma)
train_x = [X_data[i] for i in range(X_data.shape[0])]
train_y = [[y_classification[i], y_regression[i]] for i in range(y_classification.shape[0])]

create_set(train_x, train_y, file_name="train_data.xlsx")


# Test set
X_data, y_classification, y_regression = generate_data(Test_center, Test_area,noise_level, sigma)
test_x = [X_data[i] for i in range(X_data.shape[0])]
test_y = [[y_classification[i], y_regression[i]] for i in range(y_classification.shape[0])]
create_set(test_x, test_y, file_name="test_data.xlsx")


Data successfully exported to train_data.xlsx
Data successfully exported to test_data.xlsx
