In [109]:
import numpy as np
import pandas as pd

In [110]:
n_train = 10000
n_test = 5000

width = 100
height = 100

n_refs = 12

In [111]:
def create_data(n_samples, n_refs, width = 100, height = 100):
    target_x = np.random.randint(0, width, size=(n_samples, 1))
    target_y = np.random.randint(0, height, size=(n_samples, 1))
    
    reference_x = np.random.randint(0, width,  size=(n_samples, n_refs))
    reference_y = np.random.randint(0, height, size=(n_samples, n_refs))
    
    distances = np.zeros(shape=(n_samples, n_refs))
    
    for ip in range(n_refs):
        distances[:, ip:ip+1] = np.sqrt(np.square(reference_x[:, ip:ip+1] - target_x) + np.square(reference_y[:, ip:ip+1] - target_y))
        
    target_x_cols = [ "target_x" ]
    target_y_cols = [ "target_y" ]
    reference_x_cols = [ "ref_x_{}".format(i+1) for i in range(n_refs) ]
    reference_y_cols = [ "ref_y_{}".format(i+1) for i in range(n_refs) ]
    distances_cols = [ "distance_{}".format(i+1) for i in range(n_refs) ]
    
    target_x_df = pd.DataFrame(target_x, columns=target_x_cols)
    target_y_df = pd.DataFrame(target_y, columns=target_y_cols)
    reference_x_df = pd.DataFrame(reference_x, columns=reference_x_cols)
    reference_y_df = pd.DataFrame(reference_y, columns=reference_y_cols)
    distances_df = pd.DataFrame(distances, columns=distances_cols)
    
    data = pd.concat([target_x_df, target_y_df, reference_x_df, reference_y_df, distances_df], axis=1)

    target_cols = [ "target_x", "target_y" ]
    reference_cols = []
    for x_col, y_col in zip(reference_x_cols, reference_y_cols):
        reference_cols += [x_col, y_col]
    cols = target_cols + reference_cols + distances_cols

    data = data[cols]

    return data

In [112]:
train_data = create_data(n_samples=n_train, n_refs=n_refs, width=width, height=height)
test_data = create_data(n_samples=n_train, n_refs=n_refs, width=width, height=height)

In [113]:
def add_noise(data_df, cols, mu=0, sigma=1, min_noise=0, max_noise=5, n_b=1, p_b=0.1, R_NLoS=5):
    
    n_samples = len(data_df)
    df_list = [data_df]
    for item in data_df[cols]:
        distance = data_df[[item]].to_numpy()
        
        # 정규 분포를 따르는 노이즈
        noise_with_std = sigma * np.random.randn(n_samples, 1) + mu
        # print(min(noise_with_std), max(noise_with_std))

        # 균등 분포를 따르는 노이즈
        noise_with_uniform = np.random.rand(n_samples, 1) * max_noise
        noise_with_uniform[noise_with_uniform < min_noise] = min_noise
        # print(min(noise_with_uniform), max(noise_with_uniform))

        # 베르누이 분포를 따르는 노이즈
        noise_with_binomial = R_NLoS * np.random.binomial(n=n_b, p=p_b, size=(n_samples, 1))
        # print(min(noise_with_binomial), max(noise_with_binomial))

        total_noise = noise_with_std + noise_with_uniform + noise_with_binomial
        
        distance_with_noise = distance + total_noise
                
        distance_with_noise[distance_with_noise < 0] = 0
        
        df_list.append( pd.DataFrame(distance_with_noise, columns=[item + "_with_noise"]) )
        
    return pd.concat(df_list, axis=1)


In [114]:
distances_cols = [ "distance_{}".format(i+1) for i in range(n_refs) ]

train_data = add_noise(train_data, distances_cols)
test_data = add_noise(test_data, distances_cols)

train_data.to_csv("train.csv", index=False)
test_data.to_csv("test.csv", index=False)