In [1]:
import os
import numpy as np
import pandas as pd
from codebase.generate_datasets import generate_friedmen_ranges
from codebase.generic_processing import dataset_name_generator

RANDOM_STATE = 42
N_SAMPLES = 1000
NUMBER_DATASETS = 10
NOISE = 0.2
WINDOW_SIZE = 2
SHIFT_DATASET = 0

np.random.seed(RANDOM_STATE)

# Generates a friendman dataset
X, y = generate_friedmen_ranges(number_datasets=NUMBER_DATASETS, n_samples=N_SAMPLES,
                                window_size=WINDOW_SIZE, noise=NOISE,
                                shift_dataset=SHIFT_DATASET)
# Converst the data to dataframes to be exported
all_datasets = []
for data, target in zip(X, y):
    dataset = pd.DataFrame({'0': data[:, 0], '1': data[:, 1], '2': data[:, 2], '3': data[:, 3]})
    dataset['target'] = target
    all_datasets.append(dataset)
    
dataset_name = dataset_name_generator(noise=NOISE, window_size=WINDOW_SIZE, shift_dataset=SHIFT_DATASET, n_samples=N_SAMPLES, number_datasets=NUMBER_DATASETS)

# saves the dataframes to a direcotry
directory = 'toy_datasets/' + dataset_name
if not os.path.exists(directory):
    os.makedirs(directory)
for i in range(NUMBER_DATASETS + 1):
    all_datasets[i].to_csv(directory + '/' + dataset_name + '_seed_' + str(i) + '.csv')
