In [1]:
import numpy as np
import numbers
import os
import pandas as pd

In [2]:
def check_random_state(seed):
    """Turn seed into a np.random.RandomState instance
    Parameters
    ----------
    seed : None | int | instance of RandomState
        If seed is None, return the RandomState singleton used by np.random.
        If seed is an int, return a new RandomState instance seeded with seed.
        If seed is already a RandomState instance, return it.
        Otherwise raise ValueError.
    """
    if seed is None or seed is np.random:
        return np.random.mtrand._rand
    if isinstance(seed, numbers.Integral):
        return np.random.RandomState(seed)
    if isinstance(seed, np.random.RandomState):
        return seed
    raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
                     ' instance' % seed)

In [3]:

def generate_friedmen_ranges(amount_breaks):
    datasets_feature=[]
    datasets_target=[]
    for i in range(amount_breaks+1):
        generator = check_random_state(i)
        X = generator.rand(n_samples, 4)
        X/=window_size
        dataset_range = generate_X_values((X+i/amount_breaks*(window_size-1)/window_size))
        if shift_dataset > 0:
            dataset_range = generate_X_values(X)
        datasets_feature.append(dataset_range)
        datasets_target.append(generate_y(dataset_range,i))
    return datasets_feature, datasets_target

def generate_X_values(X):
    X[:, 0] *= 100
    X[:, 1] *= 520 * np.pi
    X[:, 1] += 40 * np.pi
    X[:, 3] *= 10
    X[:, 3] += 1
    return X

def generate_y(X,i):
    generator = check_random_state(i)
    
    y= np.arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) + noise * generator.randn(n_samples)
    
    y+=i*shift_dataset
    return y



In [4]:
n_samples=10000
random_state=42
np.random.seed(random_state)
amount_datasets=10
noise=0.1
window_size=2
shift_dataset=0

X,y = generate_friedmen_ranges(amount_datasets)


all_datasets=[]
for data, target in zip (X,y):
    dataset = pd.DataFrame({'0': data[:, 0], '1': data[:, 1],'2': data[:, 2], '3': data[:, 3]})
    dataset['target']=target
    all_datasets.append(dataset)
dataset_name='friedman_10'
if(noise>0):
    dataset_name+='_noise_'+str(noise)
if(window_size>0):
    dataset_name+='_window_size_'+str(window_size)
if(shift_dataset>0):
    dataset_name+='_shift_dataset_'+str(shift_dataset)
directory='toy_datasets/'+dataset_name
if not os.path.exists(directory):
    os.makedirs(directory)
for i in range(amount_datasets+1):
    all_datasets[i].to_csv(directory+'/'+dataset_name+'_seed_'+str(i)+'.csv')