In [None]:
import numpy as np
import pandas as pd

def augment_continuous_feature(X, n):
    Y = X.copy().tolist()
    while len(Y) < n:
        H, bin_edges = np.histogram(X, bins='doane')
        bin_widths = np.diff(bin_edges)
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

        for i in range(len(H)):
            bin_count = H[i]
            if bin_count > 0:
                random_vals = np.random.normal(bin_centers[i], bin_widths[i] * 0.25, bin_count)
                Y.extend(random_vals.tolist())
                if len(Y) >= n:
                    break
        X = Y.copy()
    np.random.shuffle(Y)
    return Y[:n]

# Exemplo de uso:
df = pd.read_csv('/content/Jubarte_SEx_Ex_Ez_EW01_2002_Poly3_AI_MCD.csv')
columns_to_augment = ['RxGridE', 'RxGridN', 'RxElev', 'Altitude Receiver', 'Freq', 'SourceGridE', 'SourceGridN', 'SourceElev', 'Altitude Source [positive from Seabottom]', 'Offset', 'AmpObs', 'PhaseObs', 'AmpErr', 'PhaseErr', 'AmpMod', 'PhaseMod']
n_samples = 200000  # Número desejado de amostras após o aumento

augmented_data = {column: augment_continuous_feature(df[column].values, n_samples) for column in columns_to_augment}

# Criar um novo dataframe com os dados aumentados
augmented_df = pd.DataFrame(augmented_data)

# Adicionar as características discretas ao novo dataframe
for column in ['Stn', 'SourceParameters', 'StnID', 'LineID']:
    augmented_df[column] = np.random.choice(df[column], n_samples)

# Salvar o novo dataframe aumentado
augmented_df.to_csv('/content/Jubarte_SEx_Ex_Ez_EW01_2002_Poly3_AI_MCD_augmented.csv', index=False)