In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import resample
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from tqdm import tqdm

In [11]:
data = pd.read_csv("signals.csv")

X = data.iloc[:,1:-1]
y = data.iloc[:,-1:]

In [12]:
y = y['y'].apply(lambda x: 1 if x == 1 else 0)
y = pd.DataFrame(data=y)

In [13]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, shuffle=False)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((10350, 178), (1150, 178), (10350, 1), (1150, 1))

In [14]:
augmented_data = []

for idx, row in x_train.iterrows():
    data = row.values
    label = y.iloc[idx]

    noise_level = np.random.uniform(0, 0.1)
    noisy_data = data + noise_level * np.random.normal(size=len(data))

    factor = np.random.uniform(0.9, 1.1)
    x = np.arange(len(noisy_data))
    f = interp1d(x, noisy_data)
    new_x = np.linspace(0, len(noisy_data) - 1, int(len(noisy_data) * factor))
    interpolated_data = f(new_x)

    resampled_data = resample(interpolated_data, len(data))

    augmented_data.append(np.append(resampled_data, label))

augmented_data = np.array(augmented_data)

np.random.shuffle(augmented_data)

X_augmented_noise = pd.DataFrame(augmented_data[:, :-1], columns=X.columns)
y_augmented_noise = pd.DataFrame(augmented_data[:, -1], columns=['y'])

X_augmented_noise.shape, y_augmented_noise.shape

((10350, 178), (10350, 1))

In [15]:
rus = RandomUnderSampler(random_state=42)
X_under_resampled, y_under_resampled = rus.fit_resample(x_train, y_train)

ros = RandomOverSampler(random_state=42)
X_over_resampled, y_over_resampled = ros.fit_resample(x_train, y_train)

X_under_resampled.shape, y_under_resampled.shape, X_over_resampled.shape, y_over_resampled.shape

((4126, 178), (4126, 1), (16574, 178), (16574, 1))

In [16]:
x_columns= x_train.columns
y_columns = y_train.columns

x_train = x_train.values
y_train = y_train.values

X_augmented_noise = X_augmented_noise.values
y_augmented_noise = y_augmented_noise.values

X_under_resampled = X_under_resampled.values
y_under_resampled = y_under_resampled.values

X_over_resampled = X_over_resampled.values
y_over_resampled = y_over_resampled.values

x_train = np.vstack((X, X_augmented_noise, X_under_resampled, X_over_resampled))
y_train = np.concatenate((y, y_augmented_noise, y_under_resampled, y_over_resampled))

x_train = pd.DataFrame(x_train, columns=x_columns)
y_train = pd.DataFrame(y_train, columns=y_columns)

x_train.shape, y_train.shape

((42550, 178), (42550, 1))

In [21]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((42550, 178), (1150, 178), (42550, 1), (1150, 1))

In [23]:
x_train.to_csv('Data/x_train.csv', index=False)
x_test.to_csv('Data/x_test.csv', index=False)
y_train.to_csv('Data/y_train.csv', index=False)
y_test.to_csv('Data/y_test.csv', index=False)

In [None]:
# Random Sampling
# Kappa Coefficient