# Imports

In [None]:
import numpy as np

import pandas as pd
from sklearn.preprocessing import StandardScaler

from caits.dataset._dataset3 import CaitsArray, DatasetArray, DatasetList
from caits.filtering import filter_butterworth
from caits.fe import mean_value, std_value
from caits.fe import melspectrogram, stft, istft

## CaitsArray test

In [None]:
data = pd.read_csv("data/AirQuality.csv", sep=";", decimal=",")
print(data.iloc[:, 6:8])

In [None]:
vals = data.values
axis_names = {"axis_1": {name: i for i, name in enumerate(data.columns)}}

caitsArr = CaitsArray(values=vals, axis_names=axis_names)
caitsArr

In [None]:
caitsArr.values

In [None]:
caitsArr.axis_names

In [None]:
caitsArr.iloc[:, 3:6]

In [None]:
caitsArr.loc[1:7, "NOx(GT)":"T"]

In [None]:
len(caitsArr)

# Dataset Array test

## Dataset

In [None]:
data_X = data.iloc[:, 2:-2]
data_X = data_X.fillna(data_X.mean())
data_y = data.iloc[:, -2:]
data_y = data_y.fillna(data_y.mean())

In [None]:
data_X

In [None]:
data_y

In [None]:
data_X_vals = data_X.values
data_X_axis_names = {"axis_1": {name: i for i, name in enumerate(list(data_X.columns))}}
data_y_vals = data_y.values
data_y_axis_names = {"axis_1": {name: i for i, name in enumerate((data_y.columns))}}
data_X = CaitsArray(values=data_X_vals, axis_names=data_X_axis_names)
data_y = CaitsArray(values=data_y_vals, axis_names=data_y_axis_names)
datasetArrayObj = DatasetArray(data_X, data_y)

In [None]:
len(datasetArrayObj)

In [None]:
datasetArrayObj

## Indexing

In [None]:
datasetArrayObj[3]

In [None]:
datasetArrayObj.y

In [None]:
for i, row in enumerate(datasetArrayObj):
    print(i)
    # print(row)

In [None]:
for i, batch in enumerate(datasetArrayObj.batch(10)):
    print(batch)
    # print(i)

In [None]:
train_obj, test_obj = datasetArrayObj.train_test_split()

In [None]:
len(train_obj), len(test_obj)

In [None]:
newDatasetArrayObj = train_obj + test_obj
len(newDatasetArrayObj)

In [None]:
train_obj, test_obj = datasetArrayObj.train_test_split(random_state=42)

In [None]:
train_obj.X

In [None]:
train_obj.y

In [None]:
newDatasetArrayObj = train_obj + test_obj
newDatasetArrayObj

In [None]:
newDatasetArrayObj.X

In [None]:
datasetArrayObj.to_dict()

In [None]:
datasetArrayObj.apply(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)

# DatasetList

## Dataset

In [None]:
from caits.loading import csv_loader

data = csv_loader("data/GestureSet_small")

In [None]:
X, y, id = data["X"], data["y"], data["id"]
caitsX = [CaitsArray(values=x.values, axis_names={
    "axis_1": {
        col: i for i, col in enumerate(x.columns)
    }
}) for x in X]
type(caitsX[0]), type(y[0]), type(id[0])

In [None]:
datasetListObj = DatasetList(caitsX, y, id)
datasetListObj

In [None]:
len(datasetListObj)

## Indexing

In [None]:
datasetListObj[3]

In [None]:
datasetListObj[3:15]

In [None]:
datasetListObj[[3,8,16,107]]

In [None]:
datasetListObj[1, 4]

In [None]:
tmp = datasetListObj[1, 2:5]
tmp, tmp.X[0].shape

In [None]:
tmp = datasetListObj[1, [3,4]]
tmp, tmp.X[0].shape

In [None]:
datasetListObj.X[0].axis_names["axis_1"]

In [None]:
tmp = datasetListObj[1, "acc_x_axis_g"]
tmp, tmp.X[0].shape, tmp.X[0], tmp.y, tmp._id

In [None]:
tmp = datasetListObj[1, ["acc_x_axis_g", "acc_z_axis_g"]]
tmp, tmp.X[0].shape

In [None]:
tmp = datasetListObj[1, "acc_x_axis_g":"gyr_x_axis_deg/s"]
tmp, tmp.X[0].shape, tmp.X[0]

In [None]:
datasetListObj[1:4, 1]

In [None]:
datasetListObj[1:4, 3:5]

In [None]:
datasetListObj[1:4, [1,5]]

In [None]:
datasetListObj[1:4, "acc_x_axis_g"]

In [None]:
datasetListObj[1:4, ["acc_z_axis_g", "gyr_z_axis_deg/s"]]

In [None]:
tmp = datasetListObj[1:4, "acc_x_axis_g":"gyr_x_axis_deg/s"]
tmp, tmp.X[0].shape, tmp.X[0]

In [None]:
tmp1 = datasetListObj[:100, "acc_x_axis_g":"acc_z_axis_g"]
tmp2 = datasetListObj[:100, "gyr_x_axis_deg/s":"gyr_y_axis_deg/s"]
len(tmp1), len(tmp2), tmp1.X[0].shape, tmp2.X[0].shape, {axis: len(names) for axis, names in tmp1.X[0].axis_names.items()}, {axis: len(names) for axis, names in tmp2.X[0].axis_names.items()}

In [None]:
axis_names = {**tmp1.X[0].axis_names["axis_1"], **tmp2.X[0].axis_names["axis_1"]}
axis_names

In [None]:
tmp = tmp1.unify([tmp2], axis=1)
tmp, tmp.X[0].shape, tmp.X[0]

In [None]:
tmp1 = datasetListObj[:100, ["acc_x_axis_g"]]
tmp2 = datasetListObj[:100, ["acc_y_axis_g"]]
tmp3 = datasetListObj[:100, ["acc_z_axis_g", "gyr_z_axis_deg/s"]]
tmp1.X[0], tmp2.X[0], tmp3.X[0]

In [None]:
tmp = tmp1.unify([tmp2, tmp3], axis_names={"axis_1": {"col1": 0, "col2": 1, "col3": 2, "col4": 3}}, axis=1)
tmp, tmp.X[0].shape, tmp.X[0].axis_names

In [None]:
tmp[:, ["col1", "col3"]].X

In [None]:
for i, row in enumerate(datasetListObj):
    print(i)

In [None]:
for i, batch in enumerate(datasetListObj.batch(10)):
    print(batch)

In [None]:
train_obj, test_obj = datasetListObj.train_test_split()

In [None]:
len(train_obj), len(test_obj)

In [None]:
train_obj.X

In [None]:
newDatasetListObj = train_obj + test_obj
len(newDatasetListObj)

In [None]:
train_obj, test_obj = datasetListObj.train_test_split(random_state=42)
len(train_obj), len(test_obj)

In [None]:
newDatasetListObj = train_obj + test_obj
len(newDatasetListObj)

In [None]:
datasetListObj.apply(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)

# Pipeline steps test

## DatasetArray

In [None]:
from caits.transformers._func_transformer_v2 import FunctionTransformer
from caits.transformers._feature_extractor_v2 import FeatureExtractor
from caits.transformers._func_transformer_2d_v2 import FunctionTransformer2D
from caits.transformers._feature_extractor_2d_v2 import FeatureExtractor2D
from caits.transformers._sliding_window_v2 import SlidingWindow

functionTransformer = FunctionTransformer(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)
transformedArray = functionTransformer.fit_transform(datasetArrayObj)

In [None]:
datasetArrayObj.X.values

In [None]:
transformedArray.X.values

In [None]:
featureExtractor = FeatureExtractor([
    {
        "func": mean_value,
        "params": {}
    },
    {
        "func": std_value,
        "params": {
            "ddof": 0
        }
    }
])

In [None]:
tmp = featureExtractor.fit_transform(datasetArrayObj)
tmp.keys(), len(tmp["mean_value"]), type(tmp["mean_value"]), tmp["mean_value"].shape

In [None]:
datasetArrayObj.dict_to_dataset(tmp).values

In [None]:
featureExtractor2D = FeatureExtractor2D(melspectrogram, n_fft=100, hop_length=10)
tmp = featureExtractor2D.fit_transform(datasetArrayObj)

In [None]:
tmp.X.shape

In [None]:
featureExtractor2D = FeatureExtractor2D(stft, n_fft=100, hop_length=10)
tmp1 = featureExtractor2D.fit_transform(datasetArrayObj)

In [None]:
tmp1.X.shape

In [None]:
functionTransformer = FunctionTransformer2D(istft, n_fft=100, hop_length=10)
tmp2 = functionTransformer.fit_transform(tmp1)

In [None]:
tmp2.X.shape

In [None]:
slidingWindow = SlidingWindow(window_size=10, overlap=5)
tmp = slidingWindow.fit_transform(datasetArrayObj)

In [None]:
len(tmp), tmp.X[0].shape

# DatasetList

In [None]:
functionTransformer = FunctionTransformer(filter_butterworth, fs=200, filter_type='highpass', cutoff_freq=50)
transformedList = functionTransformer.fit_transform(datasetListObj)
transformedList

In [None]:
datasetListObj.X[0].values

In [None]:
transformedList.X[0].values

In [None]:
tmp = featureExtractor.fit_transform(datasetListObj)
tmp.keys(), len(tmp["mean_value"]), type(tmp["mean_value"][0]), tmp["mean_value"][0].shape

In [None]:
tmpToDataset = datasetListObj.dict_to_dataset(tmp)

In [None]:
tmpToDataset[0]

In [None]:
featureExtractor2D = FeatureExtractor2D(melspectrogram, n_fft=10, hop_length=10)
tmp = featureExtractor2D.fit_transform(datasetListObj)

In [None]:
tmp

In [None]:
featureExtractor2D = FeatureExtractor2D(stft, n_fft=10, hop_length=10)
tmp = featureExtractor2D.fit_transform(datasetListObj)

In [None]:
tmp.X[900].values.shape

In [None]:
functionTransformer = FunctionTransformer2D(istft, hop_length=10)
tmp = functionTransformer.fit_transform(tmp)

In [None]:
tmp.X[100].shape, datasetListObj.X[100].shape

In [None]:
from caits.transformers._sliding_window_v2 import SlidingWindow

slidingWindow = SlidingWindow(window_size=10, overlap=5)
tmp = slidingWindow.fit_transform(datasetListObj)

In [None]:
len(tmp.X), len(tmp.y), len(tmp._id)

In [None]:
tmp.to_numpy()

In [None]:
from caits.transformers._data_converters_v2 import DatasetToArray

dataConverter = DatasetToArray(flatten=True, dtype=np.float64)

dataConverter.fit(tmp)


In [None]:
tmp_conv = dataConverter.transform(tmp)
tmp_conv, tmp_conv.shape

In [None]:
from caits.transformers._data_converters_v2 import ArrayToDataset

shape = tmp.X[0].shape

dataConverterInv = ArrayToDataset(
    shape=shape,
    data_class_fun=tmp.numpy_to_dataset,
    dtype=np.float64,
    axis_names={"axis_1": tmp.X[0].axis_names["axis_1"]}
)

dataConverterInv.fit(tmp)

tmp_conv_inv = dataConverterInv.transform(tmp_conv)


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipeline = Pipeline(
    [
        ("conv", dataConverter),
        ("scaler", StandardScaler()),
    ]
)

In [None]:
tmp_train, tmp_test = tmp.train_test_split(random_state=42)
tmp_train, tmp_test

In [None]:
pipeline.fit(tmp_train)

In [None]:
final_train = pipeline.transform(tmp_train)
final_test = pipeline.transform(tmp_test)

final_train.shape, final_test.shape


In [None]:
final_train_dataset = dataConverterInv.fit_transform(final_train)
final_test_dataset = dataConverterInv.transform(final_test)

In [None]:
final_train_dataset, final_test_dataset