In [6]:
import os
import pandas as pd
import numpy as np

In [7]:
def split(df, sample_amount):
    offset = len(df.values) % sample_amount
    if offset != 0:
        df = df.iloc[0:-offset]
    reshaped_data = np.reshape(df.values, (-1, sample_amount))
    return reshaped_data

In [8]:
def extract_data(data_folder, filename, dim, sample_amount):
    file_path = os.path.join(data_folder, filename)
    df = pd.read_csv(file_path, header=None)
    
    if df.isnull().values.any():
        return None
    
    data_all = None
    for i in range(0, dim):
        data = split(df.iloc[:, i], sample_amount)
        if data_all is None:
            data_all = data
        else:
            data_all = np.vstack([data_all, data])
    return data_all

In [9]:
def sample_amount(samples_param):
    data = None 
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "chaotic","dim2")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 2, samples_param)
        if extracted_data is not None:
            if data is None:
                data = extracted_data
            else:
                data = np.vstack([data, extracted_data])

    if data is None:
        return None
    
    labels = np.flip(np.ones((len(data), 1)))
    len_data_prev = len(data)
    #------------------------------------------
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "chaotic","dim3")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 3, samples_param)
        if extracted_data is not None:
            data = np.vstack([data, extracted_data])
            labels = np.vstack([labels, np.ones((len(data)-len_data_prev,1))])
            len_data_prev = len(data)
    #------------------------------------------
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "non-chaotic","dim1")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 1, samples_param)
        if extracted_data is not None:
            data = np.vstack([data, extracted_data])
            labels = np.vstack([labels, np.zeros((len(data)-len_data_prev,1))])
            len_data_prev = len(data)
    #------------------------------------------
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "non-chaotic","dim2")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 2, samples_param)
        if extracted_data is not None:
            data = np.vstack([data, extracted_data])
            labels = np.vstack([labels, np.zeros((len(data)-len_data_prev,1))])
            len_data_prev = len(data)
    #------------------------------------------
    data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data', "non-chaotic","dim2_2")
    for filename in os.listdir(data_folder):
        extracted_data = extract_data(data_folder, filename, 2, samples_param)
        if extracted_data is not None:
            data = np.vstack([data, extracted_data])
            labels = np.vstack([labels, np.zeros((len(data)-len_data_prev,1))])
            len_data_prev = len(data)
    #------------------------------------------
    if data is None:
        return None
    
    df = pd.DataFrame(data)
    df = df.dropna()
    df["labels"] = labels
    file_name = "../data/data_proccesed_" + str(samples_param) + ".csv"
    df.to_csv(file_name, index=False)
    return df

In [10]:
df = sample_amount(50)
# df = sample_amount(100)
# df = sample_amount(200)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,labels
0,2.276861,2.227629,2.117613,1.984616,1.829651,1.624213,1.336045,0.955136,0.505665,0.037093,...,0.255472,-0.171191,-0.587297,-0.944519,-1.219609,-1.425525,-1.599660,-1.779223,-1.972281,1.0
1,-2.144068,-2.228635,-2.164510,-1.931253,-1.565629,-1.141457,-0.734575,-0.391784,-0.116258,0.127660,...,-0.715637,-0.509855,-0.306925,-0.058408,0.259867,0.633566,1.015572,1.346327,1.581704,1.0
2,1.714023,1.773047,1.805933,1.845738,1.889318,1.897349,1.817737,1.617480,1.306309,0.933619,...,1.964965,1.844477,1.647567,1.443679,1.278955,1.157424,1.045089,0.893727,0.670831,1.0
3,0.378136,0.053143,-0.251552,-0.493514,-0.663397,-0.787506,-0.915261,-1.089347,-1.320696,-1.576564,...,0.710122,0.446387,0.157005,-0.104288,-0.299817,-0.425890,-0.515863,-0.621327,-0.783584,1.0
4,-1.010176,-1.268003,-1.496421,-1.636484,-1.661526,-1.590556,-1.477616,-1.379780,-1.326969,-1.307750,...,0.176967,-0.044789,-0.301522,-0.540945,-0.718545,-0.821124,-0.873398,-0.924561,-1.020215,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
573875,0.045588,0.045374,0.045142,0.044866,0.044590,0.044314,0.043991,0.043655,0.043318,0.042966,...,0.023807,0.023012,0.022217,0.021423,0.020612,0.019791,0.018970,0.018149,0.017307,0.0
573876,0.016464,0.015621,0.014777,0.013916,0.013055,0.012194,0.011331,0.010458,0.009584,0.008711,...,-0.018484,-0.019233,-0.019982,-0.020730,-0.021479,-0.022198,-0.022898,-0.023599,-0.024300,0.0
573877,-0.025000,-0.025661,-0.026308,-0.026955,-0.027602,-0.028248,-0.028837,-0.029426,-0.030015,-0.030604,...,-0.041146,-0.041229,-0.041267,-0.041289,-0.041311,-0.041329,-0.041290,-0.041251,-0.041212,0.0
573878,-0.041150,-0.041051,-0.040952,-0.040854,-0.040710,-0.040553,-0.040396,-0.040228,-0.040015,-0.039801,...,-0.025645,-0.025013,-0.024379,-0.023714,-0.023049,-0.022385,-0.021712,-0.021018,-0.020324,0.0
