# Imports

In [None]:
import pandas as pd
import numpy as np
from caits.dataset._dataset3 import DatasetArray, DatasetList
from caits.filtering import filter_butterworth
from caits.fe import mean_value, std_value
from caits.fe import melspectrogram

# Dataset Array test

## Dataset

In [None]:
data = pd.read_csv("data/AirQuality.csv", sep=";", decimal=",")
data

In [None]:
data_X = data.iloc[:, 2:-2]
data_X = data_X.fillna(data_X.mean())
data_y = data.iloc[:, -2:]
data_y = data_y.fillna(data_y.mean())

In [None]:
data_X

In [None]:
data_y

In [None]:
datasetArrayObj = DatasetArray(data_X, data_y)

In [None]:
len(datasetArrayObj)

In [None]:
datasetArrayObj

In [None]:
datasetArrayObj[3]

In [None]:
datasetArrayObj.y

In [None]:
for i, row in enumerate(datasetArrayObj):
    print(i)

In [None]:
for i, batch in enumerate(datasetArrayObj.batch(10)):
    print(i)

In [None]:
train_obj, test_obj = datasetArrayObj.train_test_split()

In [None]:
len(train_obj), len(test_obj)

In [None]:
newDatasetArrayObj = train_obj + test_obj
len(newDatasetArrayObj)

In [None]:
train_obj, test_obj = datasetArrayObj.train_test_split(random_state=42)

In [None]:
len(train_obj), len(test_obj)

In [None]:
newDatasetArrayObj = train_obj + test_obj
len(newDatasetArrayObj)

In [None]:
datasetArrayObj.to_dict()

In [None]:
datasetArrayObj.apply(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)

# DatasetList

## Dataset

In [None]:
from caits.loading import csv_loader

data = csv_loader("data/GestureSet_small")

In [None]:
X, y, id = data["X"], data["y"], data["id"]

In [None]:
datasetListObj = DatasetList(X, y, id)
datasetListObj

In [None]:
len(datasetListObj)

In [None]:
datasetListObj[3]

In [None]:
for i, row in enumerate(datasetListObj):
    print(i)

In [None]:
for i, batch in enumerate(datasetListObj.batch(10)):
    print(i)

In [None]:
train_obj, test_obj = datasetListObj.train_test_split()

In [None]:
len(train_obj), len(test_obj)

In [None]:
newDatasetListObj = train_obj + test_obj
len(newDatasetListObj)

In [None]:
train_obj, test_obj = datasetListObj.train_test_split(random_state=42)
len(train_obj), len(test_obj)

In [None]:
newDatasetListObj = train_obj + test_obj
len(newDatasetListObj)

In [None]:
datasetListObj.apply(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)

# Pipeline steps test

## DatasetArray

In [None]:
from caits.transformers._func_transformer_v2 import FunctionTransformer
from caits.transformers._feature_extractor_v2 import FeatureExtractor
from caits.transformers._func_transformer_2d_v2 import FunctionTransformer2D
from caits.transformers._feature_extractor_2d_v2 import FeatureExtractor2D

functionTransformer = FunctionTransformer(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)
transformedArray = functionTransformer.fit_transform(datasetArrayObj)

In [None]:
datasetArrayObj.X

In [None]:
transformedArray.X

In [None]:
featureExtractor = FeatureExtractor([
    {
        "func": mean_value,
        "params": {}
    },
    {
        "func": std_value,
        "params": {
            "ddof": 0
        }
    }
])

In [None]:
tmp = featureExtractor.fit_transform(datasetArrayObj)
tmp.keys(), len(tmp["mean_value"]), type(tmp["mean_value"]), tmp["mean_value"].shape

In [None]:
datasetArrayObj.dict_to_dataset(tmp)

In [None]:
featureExtractor2D = FunctionTransformer2D(melspectrogram, n_fft=100, hop_length=10)

In [None]:
tmp = featureExtractor2D.fit_transform(datasetArrayObj)
type(tmp), tmp.shape

# DatasetList

In [None]:
functionTransformer = FunctionTransformer(filter_butterworth, fs=200, filter_type='highpass', cutoff_freq=50)
transformedList = functionTransformer.fit_transform(datasetListObj)
transformedList

In [None]:
datasetListObj.X

In [None]:
transformedList.X

In [None]:
tmp = featureExtractor.fit_transform(datasetListObj)
tmp.keys(), len(tmp["mean_value"]), type(tmp["mean_value"][0]), tmp["mean_value"][0].shape

In [None]:
tmpToDataset = datasetListObj.dict_to_dataset(tmp)

In [None]:
tmpToDataset[0]

In [None]:
featureExtractor2D = FunctionTransformer2D(melspectrogram, n_fft=10, hop_length=10)

In [None]:
tmp = featureExtractor2D.fit_transform(datasetListObj)
type(tmp), len(tmp), type(tmp[0]), tmp[0].shape