In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
def split(df, sample_amount):
    offset = len(df.values) % sample_amount
    if offset != 0:
        df = df.iloc[0:-offset]
    reshaped_data = np.reshape(df.values, (-1, sample_amount))
    return reshaped_data

In [3]:
def extract_data(data_folder, filename, dim, sample_amount):
    file_path = os.path.join(data_folder, filename)
    df = pd.read_csv(file_path, header=None)
    
    if df.isnull().values.any():
        return None
    
    data_all = None
    for i in range(0, dim):
        data = split(df.iloc[:, i], sample_amount)
        if data_all is None:
            data_all = data
        else:
            data_all = np.vstack([data_all, data])
    return data_all

In [4]:
def sample_amount(samples_param):
    data = None 
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "chaotic","dim2")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 2, samples_param)
        if extracted_data is not None:
            if data is None:
                data = extracted_data
            else:
                data = np.vstack([data, extracted_data])

    if data is None:
        return None
    
    labels = np.flip(np.ones((len(data), 1)))
    len_data_prev = len(data)
    #------------------------------------------
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "chaotic","dim3")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 3, samples_param)
        if extracted_data is not None:
            data = np.vstack([data, extracted_data])
            labels = np.vstack([labels, np.ones((len(data)-len_data_prev,1))])
            len_data_prev = len(data)
    #------------------------------------------
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "non-chaotic","dim1")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 1, samples_param)
        if extracted_data is not None:
            data = np.vstack([data, extracted_data])
            labels = np.vstack([labels, np.zeros((len(data)-len_data_prev,1))])
            len_data_prev = len(data)
    #------------------------------------------
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "non-chaotic","dim2")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 2, samples_param)
        if extracted_data is not None:
            data = np.vstack([data, extracted_data])
            labels = np.vstack([labels, np.zeros((len(data)-len_data_prev,1))])
            len_data_prev = len(data)
    #------------------------------------------
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "non-chaotic","dim2_2")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 2, samples_param)
        if extracted_data is not None:
            data = np.vstack([data, extracted_data])
            labels = np.vstack([labels, np.zeros((len(data)-len_data_prev,1))])
            len_data_prev = len(data)
    #------------------------------------------
    if data is None:
        return None
    
    df = pd.DataFrame(data)
    df = df.dropna()
    df["labels"] = labels
    file_name = "../data/data_proccesed_" + str(samples_param) + ".csv"
    df.to_csv(file_name, index=False)
    return df

In [6]:
# df = sample_amount(50)
df = sample_amount(100)
df = sample_amount(200)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,191,192,193,194,195,196,197,198,199,labels
0,2.276861,2.227629,2.117613,1.984616,1.829651,1.624213,1.336045,0.955136,0.505665,0.037093,...,0.710122,0.446387,0.157005,-0.104288,-0.299817,-0.425890,-0.515863,-0.621327,-0.783584,1.0
1,-1.010176,-1.268003,-1.496421,-1.636484,-1.661526,-1.590556,-1.477616,-1.379780,-1.326969,-1.307750,...,1.240163,1.251893,1.184174,1.081262,0.999649,0.975752,1.007713,1.057354,1.070649,1.0
2,1.008290,0.867220,0.683757,0.512635,0.399515,0.356901,0.358896,0.355512,0.299448,0.171800,...,-0.300633,-0.207724,-0.186228,-0.221135,-0.267164,-0.273067,-0.209127,-0.083499,0.062539,1.0
3,0.176592,0.223475,0.203601,0.153505,0.125829,0.160638,0.264482,0.406478,0.534645,0.603570,...,-0.386973,-0.275500,-0.217251,-0.230515,-0.295089,-0.363063,-0.384476,-0.334402,-0.226374,1.0
4,-0.104951,-0.021777,-0.008086,-0.057559,-0.130133,-0.173186,-0.150340,-0.060435,0.061464,0.162957,...,0.312495,0.401863,0.476847,0.492740,0.436587,0.333678,0.234483,0.186864,0.210540,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143465,-0.052827,-0.051430,-0.050033,-0.048636,-0.047195,-0.045744,-0.044292,-0.042810,-0.041311,-0.039811,...,-0.000436,-0.001890,-0.003341,-0.004782,-0.006223,-0.007664,-0.009094,-0.010511,-0.011928,0.0
143466,-0.013346,-0.014761,-0.016142,-0.017524,-0.018905,-0.020287,-0.021639,-0.022973,-0.024308,-0.025643,...,0.047113,0.047880,0.048648,0.049345,0.050027,0.050709,0.051385,0.051986,0.052588,0.0
143467,0.053189,0.053762,0.054279,0.054797,0.055315,0.055781,0.056212,0.056644,0.057076,0.057435,...,-0.052138,-0.051923,-0.051707,-0.051443,-0.051158,-0.050873,-0.050570,-0.050217,-0.049865,0.0
143468,-0.049512,-0.049103,-0.048685,-0.048267,-0.047820,-0.047339,-0.046857,-0.046374,-0.045832,-0.045290,...,0.023807,0.023012,0.022217,0.021423,0.020612,0.019791,0.018970,0.018149,0.017307,0.0
