In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
PREPROCESSED_FOLDER = "data/google_stocks/preprocessed"

df = pd.read_csv("data/google_stocks/stock_data.csv")
df_mean = df[["Open", "Close", "High", 'Low']].mean(axis=1)

# Normalize the dataset.
def normalize(dataframe:pd.DataFrame):
    _min, _max = dataframe.min(), dataframe.max()

    return (dataframe - _min)/(_max - _min)

df_normalized = normalize(df_mean)

In [None]:
def train_test_split(dset:np.ndarray, train_size=0.7):
    shape = dset.shape
    train_idx = int(shape[0]*train_size)
    
    dset = dset.reshape((shape[0], 1, shape[-1])).astype(np.float32)
    
    train = dset[:train_idx]
    test = dset[train_idx:]
    
    return train, test 

In [None]:
def make_dataset(dset:np.ndarray):
    rng = np.random.default_rng()
    window_length = 30
    sequences = []
    
    for i in range(0, dset.shape[0]- window_length):
        sequence = dset[i: i+window_length]
        sequences.append(sequence)
        
    return rng.permutation(np.array(sequences))


In [None]:
# Plot Somme Sequences
def plot_several_sequence(dset, count):
    for i in range(count):
        fig = plt.figure(figsize=(18, 3))
        
        plt.plot(dset[i][0], '.-')
        plt.grid()
        plt.show()
        plt.close(fig)

### Make the in sample dataset.

In [None]:
style_dataset = make_dataset(df_normalized.values)

style_train, style_test= train_test_split(style_dataset)

np.save(f"{PREPROCESSED_FOLDER}/style_train.npy", style_train)
np.save(f"{PREPROCESSED_FOLDER}/style_test.npy", style_test)

## Make the "In Sample Dataset"

As said in the paper, this content dataset bahave in the same dataset. So, a Simple permutation is enough.

In [None]:
rng = np.random.default_rng() # Applying the new numpy Generator.

content_train = rng.permutation(style_train)
content_test = rng.permutation(style_test)

np.save(f"{PREPROCESSED_FOLDER}/normal_content_train.npy", content_train)
np.save(f"{PREPROCESSED_FOLDER}/normal_content_test.npy", content_test)

In [None]:
plot_several_sequence(content_train, 5)

## Make the "perturbed" Dataset.

The "Perturbed" Dataset a modification for the content part of the Style Time. 

"""
For example, one can add a randomly shifted and scaled unit-step function to each example in the training dataset. 
"""


In [None]:
def make_step_function(
    x:np.ndarray, 
    step_time:float, 
    step_value:float):
    
    _step = np.zeros_like(x)
    _step[step_time:] = step_value
    
    return x+ _step


def make_perturbed_dataset(dset:pd.DataFrame):
    rng = np.random.default_rng() # Applying the new numpy Generator.
    perturbed_dataset = []

    for sequence in dset:
        random_time = int(np.random.uniform(0, sequence.shape[0]))
        std_value= np.std(sequence)
        random_value = np.random.uniform(-std_value, std_value)
        
        perturbed_dataset.append(make_step_function(sequence, random_time, random_value))
    
    return rng.permutation(perturbed_dataset)

In [None]:
perturbed_train = make_perturbed_dataset(style_train)
perturbed_test = make_perturbed_dataset(style_test)

plot_several_sequence(perturbed_train, 5)

np.save(f"{PREPROCESSED_FOLDER}/perturbed_content_train.npy", perturbed_train)
np.save(f"{PREPROCESSED_FOLDER}/perturbed_content_test.npy", perturbed_test)