# Pipeline steps test

In [1]:
import pandas as pd
from caits.filtering import filter_butterworth
from caits.fe import mean_value, std_value, stft, istft, melspectrogram
from caits.dataset._dataset3 import CaitsArray, DatasetArray
from caits.transformers._func_transformer_v2 import FunctionTransformer
from caits.transformers._feature_extractor_v2 import FeatureExtractor
from caits.transformers._func_transformer_2d_v2 import FunctionTransformer2D
from caits.transformers._feature_extractor_2d_v2 import FeatureExtractor2D
from caits.transformers._sliding_window_v2 import SlidingWindow

## Dataset

In [2]:
data = pd.read_csv("data/AirQuality.csv", sep=";", decimal=",")
print(data.iloc[:, 6:8])

      PT08.S2(NMHC)  NOx(GT)
0            1046.0    166.0
1             955.0    103.0
2             939.0    131.0
3             948.0    172.0
4             836.0    131.0
...             ...      ...
9466            NaN      NaN
9467            NaN      NaN
9468            NaN      NaN
9469            NaN      NaN
9470            NaN      NaN

[9471 rows x 2 columns]


In [3]:
data_X = data.iloc[:, 2:-2]
data_X = data_X.fillna(data_X.mean())
data_y = data.iloc[:, -2:]
data_y = data_y.fillna(data_y.mean())

In [4]:
data_X_vals = data_X.values
data_X_axis_names = {"axis_1": {name: i for i, name in enumerate(list(data_X.columns))}}
data_y_vals = data_y.values
data_y_axis_names = {"axis_1": {name: i for i, name in enumerate((data_y.columns))}}
data_X = CaitsArray(values=data_X_vals, axis_names=data_X_axis_names)
data_y = CaitsArray(values=data_y_vals, axis_names=data_y_axis_names)
datasetArrayObj = DatasetArray(data_X, data_y)

In [5]:
functionTransformer = FunctionTransformer(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)
transformedArray = functionTransformer.fit_transform(datasetArrayObj)

In [6]:
datasetArrayObj.X.values

array([[ 2.60000000e+00,  1.36000000e+03,  1.50000000e+02, ...,
         1.36000000e+01,  4.89000000e+01,  7.57800000e-01],
       [ 2.00000000e+00,  1.29200000e+03,  1.12000000e+02, ...,
         1.33000000e+01,  4.77000000e+01,  7.25500000e-01],
       [ 2.20000000e+00,  1.40200000e+03,  8.80000000e+01, ...,
         1.19000000e+01,  5.40000000e+01,  7.50200000e-01],
       ...,
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00],
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00],
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00]])

In [7]:
transformedArray.X.values

array([[ 2.60027232e+00,  1.36000482e+03,  1.50014594e+02, ...,
         1.36002492e+01,  4.88987796e+01,  7.57799735e-01],
       [-1.42059319e+00,  1.34921145e+03,  1.14210394e+02, ...,
         1.29907604e+01,  4.92156870e+01,  7.34216981e-01],
       [ 2.11795883e+00,  1.36625793e+03,  9.01143291e+01, ...,
         1.20434872e+01,  5.34476279e+01,  7.47262977e-01],
       ...,
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00],
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00],
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00]])

In [8]:
featureExtractor = FeatureExtractor([
    {
        "func": mean_value,
        "params": {}
    },
    {
        "func": std_value,
        "params": {
            "ddof": 0
        }
    }
])

In [9]:
tmp = featureExtractor.fit_transform(datasetArrayObj)
tmp.keys(), len(tmp["mean_value"]), type(tmp["mean_value"]), tmp["mean_value"].shape

ValueError: zero-dimensional arrays cannot be concatenated

In [10]:
datasetArrayObj.dict_to_dataset(tmp).values

NameError: name 'tmp' is not defined

In [11]:
featureExtractor2D = FeatureExtractor2D(melspectrogram, n_fft=100, hop_length=10)
tmp = featureExtractor2D.fit_transform(datasetArrayObj)

  mel_basis = mel_filter(sr=sr, n_fft=n_fft, **kwargs)


In [12]:
tmp.X.shape

(13, 128, 948)

In [13]:
featureExtractor2D = FeatureExtractor2D(stft, n_fft=100, hop_length=10)
tmp1 = featureExtractor2D.fit_transform(datasetArrayObj)

In [14]:
tmp1.X.shape

(13, 51, 948)

In [15]:
functionTransformer = FunctionTransformer2D(istft, n_fft=100, hop_length=10)
tmp2 = functionTransformer.fit_transform(tmp1)

In [16]:
tmp2.X.shape

(9470, 13)

In [17]:
slidingWindow = SlidingWindow(window_size=10, overlap=5)
tmp = slidingWindow.fit_transform(datasetArrayObj)

In [18]:
len(tmp), tmp.X[0].shape

(10, (13,))