In [1]:
import random
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

np.random.seed(0)

In [2]:
def generate_sin_signal(n_time_points, n_points_in_ones):
    t = np.linspace(0, n_time_points/n_points_in_ones , n_time_points)
    w1 = np.random.normal(1, 0.01, size=(n_time_points))
    fi = np.random.uniform(-0.1, 0.1, size=n_time_points)
    signal = np.sin(t * w1 + fi)
    return signal

def generate_disc_signal(signal, win_disc):
    n_time_points = len(signal)
    new_signal = np.zeros(n_time_points)
    for i in range(0, n_time_points-1, win_disc):
        new_signal_part = signal[i:(i+win_disc)].mean()
        new_signal[i:(i+win_disc)] = new_signal_part

    return new_signal

In [3]:
n_samples = 2000
min_n_time_points = 50
max_n_time_points = 100
n_points_in_ones = 3
win_disc = 7

X_synth = np.zeros((n_samples, max_n_time_points))
y_synth = np.repeat([0, 1], 1000).reshape(2, -1).T.reshape(-1, 1)

for i_samp in range(0, n_samples, 2):

    n_time_points = int(np.random.uniform(min_n_time_points, max_n_time_points))
    
    sin_signal = generate_sin_signal(n_time_points, n_points_in_ones)
    disc_signal = generate_disc_signal(sin_signal, win_disc)

    X_synth[i_samp, :n_time_points] = sin_signal
    X_synth[i_samp + 1, :n_time_points] = disc_signal

In [4]:
synth_dataset = np.concatenate([y_synth, X_synth], axis=1)

synth_dataset = pd.DataFrame(synth_dataset)
synth_dataset = synth_dataset.sample(frac=1).reset_index(drop=True)
synth_dataset[0] = synth_dataset[0].astype(int)
synth_dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,0,-0.023927,0.332728,0.626686,0.82864,0.98044,0.9888,0.931743,0.765654,0.541405,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,-0.030262,0.26289,0.628357,0.872193,0.962486,0.99072,0.903754,0.626032,0.49918,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,0.667132,0.667132,0.667132,0.667132,0.667132,0.667132,0.667132,-0.185633,-0.185633,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,-0.035087,0.375331,0.638837,0.837876,0.99188,0.981919,0.889204,0.654002,0.515431,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,0.097725,0.253609,0.6189,0.788993,0.961905,0.980734,0.858261,0.729795,0.353422,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
train_size = 0.8
n_train = int(len(synth_dataset) * train_size)

train_synth = synth_dataset.iloc[:n_train]
test_synth = synth_dataset.iloc[n_train:]

In [6]:
dir_path = '../data/TS2Vec/UCR/MySynth'
if not os.path.exists(dir_path): 
    os.makedirs(dir_path) 

train_synth.to_csv(os.path.join(dir_path, 'MySynth_TRAIN.tsv'), index=None, header=None, sep='\t')
test_synth.to_csv(os.path.join(dir_path, 'MySynth_TEST.tsv'), index=None, header=None, sep='\t')