# Imports

In [1]:
import numpy as np
import pandas as pd
from caits.dataset._dataset3 import CaitsArray, DatasetArray, DatasetList
from caits.filtering import filter_butterworth
from caits.fe import mean_value, std_value
from caits.fe import melspectrogram, stft, istft

## CaitsArray test

In [2]:
data = pd.read_csv("data/AirQuality.csv", sep=";", decimal=",")
print(data.iloc[:, 6:8])

      PT08.S2(NMHC)  NOx(GT)
0            1046.0    166.0
1             955.0    103.0
2             939.0    131.0
3             948.0    172.0
4             836.0    131.0
...             ...      ...
9466            NaN      NaN
9467            NaN      NaN
9468            NaN      NaN
9469            NaN      NaN
9470            NaN      NaN

[9471 rows x 2 columns]


In [3]:
vals = data.values
axis_names = {"axis_1": {name: i for i, name in enumerate(data.columns)}}

caitsArr = CaitsArray(values=vals, axis_names=axis_names)
caitsArr

            Date      Time  CO(GT)  PT08.S1(CO)  NMHC(GT)  C6H6(GT)  \
   0  10/03/2004  18.00.00     2.6       1360.0     150.0      11.9  
   1  10/03/2004  19.00.00     2.0       1292.0     112.0       9.4  
   2  10/03/2004  20.00.00     2.2       1402.0      88.0       9.0  
   3  10/03/2004  21.00.00     2.2       1376.0      80.0       9.2  
   4  10/03/2004  22.00.00     1.6       1272.0      51.0       6.5  
 ...         ...       ...     ...          ...       ...       ...  
9466         nan       nan     nan          nan       nan       nan  
9467         nan       nan     nan          nan       nan       nan  
9468         nan       nan     nan          nan       nan       nan  
9469         nan       nan     nan          nan       nan       nan  
9470         nan       nan     nan          nan       nan       nan  

      PT08.S2(NMHC)  NOx(GT)  PT08.S3(NOx)  NO2(GT)  PT08.S4(NO2)  PT08.S5(O3)  \
   0         1046.0    166.0        1056.0    113.0        1692.0       1268

In [4]:
caitsArr.values

array([['10/03/2004', '18.00.00', 2.6, ..., 0.7578, nan, nan],
       ['10/03/2004', '19.00.00', 2.0, ..., 0.7255, nan, nan],
       ['10/03/2004', '20.00.00', 2.2, ..., 0.7502, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

In [5]:
caitsArr.axis_names

{'axis_0': {0: 0,
  1: 1,
  2: 2,
  3: 3,
  4: 4,
  5: 5,
  6: 6,
  7: 7,
  8: 8,
  9: 9,
  10: 10,
  11: 11,
  12: 12,
  13: 13,
  14: 14,
  15: 15,
  16: 16,
  17: 17,
  18: 18,
  19: 19,
  20: 20,
  21: 21,
  22: 22,
  23: 23,
  24: 24,
  25: 25,
  26: 26,
  27: 27,
  28: 28,
  29: 29,
  30: 30,
  31: 31,
  32: 32,
  33: 33,
  34: 34,
  35: 35,
  36: 36,
  37: 37,
  38: 38,
  39: 39,
  40: 40,
  41: 41,
  42: 42,
  43: 43,
  44: 44,
  45: 45,
  46: 46,
  47: 47,
  48: 48,
  49: 49,
  50: 50,
  51: 51,
  52: 52,
  53: 53,
  54: 54,
  55: 55,
  56: 56,
  57: 57,
  58: 58,
  59: 59,
  60: 60,
  61: 61,
  62: 62,
  63: 63,
  64: 64,
  65: 65,
  66: 66,
  67: 67,
  68: 68,
  69: 69,
  70: 70,
  71: 71,
  72: 72,
  73: 73,
  74: 74,
  75: 75,
  76: 76,
  77: 77,
  78: 78,
  79: 79,
  80: 80,
  81: 81,
  82: 82,
  83: 83,
  84: 84,
  85: 85,
  86: 86,
  87: 87,
  88: 88,
  89: 89,
  90: 90,
  91: 91,
  92: 92,
  93: 93,
  94: 94,
  95: 95,
  96: 96,
  97: 97,
  98: 98,
  99: 99,
  100: 100

In [6]:
caitsArr.iloc[:, 3:6]

      PT08.S1(CO)  NMHC(GT)  C6H6(GT)  
   0       1360.0     150.0      11.9  
   1       1292.0     112.0       9.4  
   2       1402.0      88.0       9.0  
   3       1376.0      80.0       9.2  
   4       1272.0      51.0       6.5  
 ...          ...       ...       ...  
9466          nan       nan       nan  
9467          nan       nan       nan  
9468          nan       nan       nan  
9469          nan       nan       nan  
9470          nan       nan       nan  

CaitsArray with shape (9471, 3)

In [7]:
caitsArr.loc[1:7, "NOx(GT)":"T"]

   NOx(GT)  PT08.S3(NOx)  NO2(GT)  PT08.S4(NO2)  PT08.S5(O3)     T  
1    103.0        1174.0     92.0        1559.0        972.0  13.3  
2    131.0        1140.0    114.0        1555.0       1074.0  11.9  
3    172.0        1092.0    122.0        1584.0       1203.0  11.0  
4    131.0        1205.0    116.0        1490.0       1110.0  11.2  
5     89.0        1337.0     96.0        1393.0        949.0  11.2  
6     62.0        1462.0     77.0        1333.0        733.0  11.3  
7     62.0        1453.0     76.0        1333.0        730.0  10.7  

CaitsArray with shape (7, 6)

In [8]:
len(caitsArr)

9471

# Dataset Array test

## Dataset

In [9]:
data_X = data.iloc[:, 2:-2]
data_X = data_X.fillna(data_X.mean())
data_y = data.iloc[:, -2:]
data_y = data_y.fillna(data_y.mean())

In [10]:
data_X

Unnamed: 0,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH
0,2.600000,1360.000000,150.000000,11.900000,1046.000000,166.000000,1056.000000,113.000000,1692.000000,1268.000000,13.600000,48.90000,0.757800
1,2.000000,1292.000000,112.000000,9.400000,955.000000,103.000000,1174.000000,92.000000,1559.000000,972.000000,13.300000,47.70000,0.725500
2,2.200000,1402.000000,88.000000,9.000000,939.000000,131.000000,1140.000000,114.000000,1555.000000,1074.000000,11.900000,54.00000,0.750200
3,2.200000,1376.000000,80.000000,9.200000,948.000000,172.000000,1092.000000,122.000000,1584.000000,1203.000000,11.000000,60.00000,0.786700
4,1.600000,1272.000000,51.000000,6.500000,836.000000,131.000000,1205.000000,116.000000,1490.000000,1110.000000,11.200000,59.60000,0.788800
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9466,-34.207524,1048.990061,-159.090093,1.865683,894.595276,168.616971,794.990168,58.148873,1391.479641,975.072032,9.778305,39.48538,-6.837604
9467,-34.207524,1048.990061,-159.090093,1.865683,894.595276,168.616971,794.990168,58.148873,1391.479641,975.072032,9.778305,39.48538,-6.837604
9468,-34.207524,1048.990061,-159.090093,1.865683,894.595276,168.616971,794.990168,58.148873,1391.479641,975.072032,9.778305,39.48538,-6.837604
9469,-34.207524,1048.990061,-159.090093,1.865683,894.595276,168.616971,794.990168,58.148873,1391.479641,975.072032,9.778305,39.48538,-6.837604


In [11]:
data_y

Unnamed: 0,Unnamed: 15,Unnamed: 16
0,,
1,,
2,,
3,,
4,,
...,...,...
9466,,
9467,,
9468,,
9469,,


In [12]:
data_X_vals = data_X.values
data_X_axis_names = {"axis_1": {name: i for i, name in enumerate(list(data_X.columns))}}
data_y_vals = data_y.values
data_y_axis_names = {"axis_1": {name: i for i, name in enumerate((data_y.columns))}}
data_X = CaitsArray(values=data_X_vals, axis_names=data_X_axis_names)
data_y = CaitsArray(values=data_y_vals, axis_names=data_y_axis_names)
datasetArrayObj = DatasetArray(data_X, data_y)

In [13]:
len(datasetArrayObj)

9471

In [14]:
datasetArrayObj

DatasetArray object with 9471 instances.

## Indexing

In [15]:
datasetArrayObj[3]

(       CO(GT)     2.2
   PT08.S1(CO)  1376.0
      NMHC(GT)    80.0
      C6H6(GT)     9.2
 PT08.S2(NMHC)   948.0
       NOx(GT)   172.0
  PT08.S3(NOx)  1092.0
       NO2(GT)   122.0
  PT08.S4(NO2)  1584.0
   PT08.S5(O3)  1203.0
             T    11.0
            RH    60.0
            AH  0.7867
 
 CaitsArray with shape (13,),
 Unnamed: 15  nan
 Unnamed: 16  nan
 
 CaitsArray with shape (2,))

In [16]:
datasetArrayObj.y

      Unnamed: 15  Unnamed: 16  
   0          nan          nan  
   1          nan          nan  
   2          nan          nan  
   3          nan          nan  
   4          nan          nan  
 ...          ...          ...  
9466          nan          nan  
9467          nan          nan  
9468          nan          nan  
9469          nan          nan  
9470          nan          nan  

CaitsArray with shape (9471, 2)

In [17]:
for i, row in enumerate(datasetArrayObj):
    print(i)
    # print(row)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [18]:
for i, batch in enumerate(datasetArrayObj.batch(10)):
    print(batch)
    # print(i)

(   CO(GT)  PT08.S1(CO)  NMHC(GT)  C6H6(GT)  PT08.S2(NMHC)  NOx(GT)  \
0     2.6       1360.0     150.0      11.9         1046.0    166.0  
1     2.0       1292.0     112.0       9.4          955.0    103.0  
2     2.2       1402.0      88.0       9.0          939.0    131.0  
3     2.2       1376.0      80.0       9.2          948.0    172.0  
4     1.6       1272.0      51.0       6.5          836.0    131.0  
5     1.2       1197.0      38.0       4.7          750.0     89.0  
6     1.2       1185.0      31.0       3.6          690.0     62.0  
7     1.0       1136.0      31.0       3.3          672.0     62.0  
8     0.9       1094.0      24.0       2.3          609.0     45.0  
9     0.6       1010.0      19.0       1.7          561.0   -200.0  

   PT08.S3(NOx)  NO2(GT)  PT08.S4(NO2)  PT08.S5(O3)     T    RH  \
0        1056.0    113.0        1692.0       1268.0  13.6  48.9  
1        1174.0     92.0        1559.0        972.0  13.3  47.7  
2        1140.0    114.0        1555.0 

In [19]:
train_obj, test_obj = datasetArrayObj.train_test_split()

In [20]:
len(train_obj), len(test_obj)

(7576, 1895)

In [21]:
newDatasetArrayObj = train_obj + test_obj
len(newDatasetArrayObj)

9471

In [22]:
train_obj, test_obj = datasetArrayObj.train_test_split(random_state=42)

In [23]:
train_obj.X

      CO(GT)  PT08.S1(CO)  NMHC(GT)  C6H6(GT)  PT08.S2(NMHC)  NOx(GT)  \
 774     0.7        840.0      31.0       1.6          556.0     41.0  
4625     2.1       1083.0    -200.0      10.2          984.0    251.0  
6214     3.4       1374.0    -200.0      17.4         1222.0    602.0  
6465     1.0        884.0    -200.0       3.0          651.0   -200.0  
2362  -200.0        804.0    -200.0       3.3          668.0     32.0  
 ...     ...          ...       ...       ...            ...      ...  
4783     2.5       1063.0    -200.0      11.7         1040.0    341.0  
5208  -200.0        981.0    -200.0      10.1          983.0   -200.0  
3232     2.4       1250.0    -200.0      15.5         1166.0    186.0  
5704     3.7       1341.0    -200.0      21.0         1323.0    499.0  
9129     1.2       1167.0    -200.0       6.4          829.0    202.0  

      PT08.S3(NOx)  NO2(GT)  PT08.S4(NO2)  PT08.S5(O3)     T    RH  \
 774        1474.0     58.0        1094.0        366.0  13.7  38

In [24]:
train_obj.y

      Unnamed: 15  Unnamed: 16  
 774          nan          nan  
4625          nan          nan  
6214          nan          nan  
6465          nan          nan  
2362          nan          nan  
 ...          ...          ...  
4783          nan          nan  
5208          nan          nan  
3232          nan          nan  
5704          nan          nan  
9129          nan          nan  

CaitsArray with shape (7576, 2)

In [25]:
newDatasetArrayObj = train_obj + test_obj
newDatasetArrayObj

DatasetArray object with 9471 instances.

In [26]:
newDatasetArrayObj.X

                CO(GT)         PT08.S1(CO)             NMHC(GT)            C6H6(GT)      PT08.S2(NMHC)            NOx(GT)  \
   0               0.7               840.0                 31.0                 1.6              556.0               41.0  
   1               2.1              1083.0               -200.0                10.2              984.0              251.0  
   2               3.4              1374.0               -200.0                17.4             1222.0              602.0  
   3               1.0               884.0               -200.0                 3.0              651.0             -200.0  
   4            -200.0               804.0               -200.0                 3.3              668.0               32.0  
 ...               ...                 ...                  ...                 ...                ...                ...  
9466  -34.207523778989  1048.9900609169606  -159.09009297851875  1.8656834455487867  894.5952762637597  168.6169712514695  
9467  -

In [27]:
datasetArrayObj.to_dict()

{'X':                 CO(GT)         PT08.S1(CO)             NMHC(GT)            C6H6(GT)      PT08.S2(NMHC)            NOx(GT)  \
    0               2.6              1360.0                150.0                11.9             1046.0              166.0  
    1               2.0              1292.0                112.0                 9.4              955.0              103.0  
    2               2.2              1402.0                 88.0                 9.0              939.0              131.0  
    3               2.2              1376.0                 80.0                 9.2              948.0              172.0  
    4               1.6              1272.0                 51.0                 6.5              836.0              131.0  
  ...               ...                 ...                  ...                 ...                ...                ...  
 9466  -34.207523778989  1048.9900609169606  -159.09009297851875  1.8656834455487867  894.5952762637597  168.6169712514

In [28]:
datasetArrayObj.apply(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)

array([[ 2.60027232e+00,  1.36000482e+03,  1.50014594e+02, ...,
         1.36002492e+01,  4.88987796e+01,  7.57799735e-01],
       [-1.42059319e+00,  1.34921145e+03,  1.14210394e+02, ...,
         1.29907604e+01,  4.92156870e+01,  7.34216981e-01],
       [ 2.11795883e+00,  1.36625793e+03,  9.01143291e+01, ...,
         1.20434872e+01,  5.34476279e+01,  7.47262977e-01],
       ...,
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00],
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00],
       [-3.42075238e+01,  1.04899006e+03, -1.59090093e+02, ...,
         9.77830501e+00,  3.94853799e+01, -6.83760364e+00]])

# DatasetList

## Dataset

In [29]:
from caits.loading import csv_loader

data = csv_loader("data/GestureSet_small")

Loading CSV files: 100%|██████████| 924/924 [00:00<00:00, 2816.79it/s]


In [30]:
X, y, id = data["X"], data["y"], data["id"]
caitsX = [CaitsArray(values=x.values, axis_names={
    "axis_1": {
        col: i for i, col in enumerate(x.columns)
    }
}) for x in X]
type(caitsX[0]), type(y[0]), type(id[0])

(caits.dataset._dataset3.CaitsArray, str, str)

In [31]:
datasetListObj = DatasetList(caitsX, y, id)
datasetListObj

DatasetList object with 924 instances.

In [32]:
len(datasetListObj)

924

## Indexing

In [33]:
datasetListObj[3]

DatasetList object with 1 instances.

In [34]:
datasetListObj[3:15]

DatasetList object with 12 instances.

In [35]:
datasetListObj[[3,8,16,107]]

DatasetList object with 4 instances.

In [36]:
datasetListObj[1, 4]

DatasetList object with 1 instances.

In [37]:
tmp = datasetListObj[1, 2:5]
tmp, tmp.X[0].shape

(DatasetList object with 1 instances., (48, 3))

In [38]:
tmp = datasetListObj[1, [3,4]]
tmp, tmp.X[0].shape

(DatasetList object with 1 instances., (48, 2))

In [39]:
datasetListObj.X[0].axis_names["axis_1"]

{'acc_x_axis_g': 0,
 'acc_y_axis_g': 1,
 'acc_z_axis_g': 2,
 'gyr_x_axis_deg/s': 3,
 'gyr_y_axis_deg/s': 4,
 'gyr_z_axis_deg/s': 5}

In [40]:
tmp = datasetListObj[1, "acc_x_axis_g"]
tmp, tmp.X[0].shape, tmp.X[0], tmp.y, tmp._id

(DatasetList object with 1 instances.,
 (48,),
  0  -0.468
  1  -0.237
  2   -0.49
  3   -0.58
  4  -0.509
  5  -0.611
  6  -0.724
  7  -0.825
  8  -0.861
  9  -0.963
 10  -1.332
 11  -1.555
 12  -1.664
 13  -1.623
 14  -1.489
 15  -1.424
 16  -1.374
 17  -1.281
 18  -1.187
 19  -1.147
 20  -1.101
 21  -1.093
 22    -1.1
 23  -1.099
 24  -1.106
 25  -1.114
 26  -1.128
 27  -1.118
 28  -1.067
 29    -1.0
 30  -0.904
 31  -0.827
 32  -0.771
 33  -0.706
 34  -0.661
 35  -0.657
 36  -0.625
 37  -0.604
 38  -0.608
 39   -0.62
 40  -0.629
 41  -0.619
 42  -0.598
 43  -0.583
 44  -0.574
 45  -0.572
 46  -0.566
 47  -0.539
 
 CaitsArray with shape (48,),
 ['01e'],
 ['01e_0_100_AccGyr_1_0_0_03c_4_5e812ad92ba4a9c752eea16b.csv'])

In [41]:
tmp = datasetListObj[1, ["acc_x_axis_g", "acc_z_axis_g"]]
tmp, tmp.X[0].shape

(DatasetList object with 1 instances., (48, 2))

In [42]:
tmp = datasetListObj[1, "acc_x_axis_g":"gyr_x_axis_deg/s"]
tmp, tmp.X[0].shape, tmp.X[0]

(DatasetList object with 1 instances.,
 (48, 4),
     acc_x_axis_g  acc_y_axis_g  acc_z_axis_g  gyr_x_axis_deg/s  
  0        -0.468         0.098         0.878           115.732  
  1        -0.237         0.212         0.969           139.268  
  2         -0.49         0.392         0.852           138.841  
  3         -0.58         0.499          0.66            116.22  
  4        -0.509         0.619          0.51            90.427  
 ...           ...           ...           ...               ...  
 43        -0.583         0.417         0.299           -44.268  
 44        -0.574         0.407         0.367           -39.329  
 45        -0.572           0.4         0.419           -33.963  
 46        -0.566         0.389         0.448           -27.744  
 47        -0.539         0.375         0.448           -21.159  
 
 CaitsArray with shape (48, 4))

In [43]:
datasetListObj[1:4, 1]

DatasetList object with 3 instances.

In [44]:
datasetListObj[1:4, 3:5]

DatasetList object with 3 instances.

In [45]:
datasetListObj[1:4, [1,5]]

DatasetList object with 3 instances.

In [46]:
datasetListObj[1:4, "acc_x_axis_g"]

DatasetList object with 3 instances.

In [47]:
datasetListObj[1:4, ["acc_z_axis_g", "gyr_z_axis_deg/s"]]

DatasetList object with 3 instances.

In [48]:
tmp = datasetListObj[1:4, "acc_x_axis_g":"gyr_x_axis_deg/s"]
tmp, tmp.X[0].shape, tmp.X[0]

(DatasetList object with 3 instances.,
 (48, 4),
     acc_x_axis_g  acc_y_axis_g  acc_z_axis_g  gyr_x_axis_deg/s  
  0        -0.468         0.098         0.878           115.732  
  1        -0.237         0.212         0.969           139.268  
  2         -0.49         0.392         0.852           138.841  
  3         -0.58         0.499          0.66            116.22  
  4        -0.509         0.619          0.51            90.427  
 ...           ...           ...           ...               ...  
 43        -0.583         0.417         0.299           -44.268  
 44        -0.574         0.407         0.367           -39.329  
 45        -0.572           0.4         0.419           -33.963  
 46        -0.566         0.389         0.448           -27.744  
 47        -0.539         0.375         0.448           -21.159  
 
 CaitsArray with shape (48, 4))

In [49]:
tmp1 = datasetListObj[:100, "acc_x_axis_g":"acc_z_axis_g"]
tmp2 = datasetListObj[:100, "gyr_x_axis_deg/s":"gyr_y_axis_deg/s"]
len(tmp1), len(tmp2), tmp1.X[0].shape, tmp2.X[0].shape, {axis: len(names) for axis, names in tmp1.X[0].axis_names.items()}, {axis: len(names) for axis, names in tmp2.X[0].axis_names.items()}

(100,
 100,
 (48, 3),
 (48, 2),
 {'axis_0': 48, 'axis_1': 3},
 {'axis_0': 48, 'axis_1': 2})

In [50]:
axis_names = {**tmp1.X[0].axis_names["axis_1"], **tmp2.X[0].axis_names["axis_1"]}
axis_names

{'acc_x_axis_g': 0,
 'acc_y_axis_g': 1,
 'acc_z_axis_g': 2,
 'gyr_x_axis_deg/s': 0,
 'gyr_y_axis_deg/s': 1}

In [56]:
tmp = tmp1.unify([tmp2], axis=1)
tmp, tmp.X[0].shape, tmp.X[0]

(DatasetList object with 100 instances.,
 (48, 2),
     acc_x_axis_g  acc_y_axis_g  
  0        -0.015        -0.649  
  1         0.042        -0.685  
  2        -0.002        -0.697  
  3        -0.039          -0.7  
  4         0.006        -0.738  
 ...           ...           ...  
 43        -0.314        -0.775  
 44          -0.3        -0.782  
 45        -0.293        -0.786  
 46         -0.28        -0.794  
 47         -0.26        -0.814  
 
 CaitsArray with shape (48, 2))

In [52]:
tmp1 = datasetListObj[:100, ["acc_x_axis_g"]]
tmp2 = datasetListObj[:100, ["acc_y_axis_g"]]
tmp3 = datasetListObj[:100, ["acc_z_axis_g", "gyr_z_axis_deg/s"]]
tmp1.X[0], tmp2.X[0], tmp3.X[0]

(    acc_x_axis_g  
  0        -0.015  
  1         0.042  
  2        -0.002  
  3        -0.039  
  4         0.006  
 ...           ...  
 43        -0.314  
 44          -0.3  
 45        -0.293  
 46         -0.28  
 47         -0.26  
 
 CaitsArray with shape (48, 1),
     acc_y_axis_g  
  0        -0.649  
  1        -0.685  
  2        -0.697  
  3          -0.7  
  4        -0.738  
 ...           ...  
 43        -0.775  
 44        -0.782  
 45        -0.786  
 46        -0.794  
 47        -0.814  
 
 CaitsArray with shape (48, 1),
     acc_z_axis_g  gyr_z_axis_deg/s  
  0         0.599            15.305  
  1         0.566            29.878  
  2          0.54             39.39  
  3         0.514            45.976  
  4         0.515             50.61  
 ...           ...               ...  
 43         0.444            -6.768  
 44         0.445            -5.976  
 45          0.45            -5.183  
 46         0.452            -4.024  
 47          0.46            -2

In [55]:
tmp = tmp1.unify([tmp2, tmp3], axis=1)
tmp, tmp.X[0].shape, tmp.X[0].axis_names

(DatasetList object with 100 instances.,
 (48, 4),
 {'axis_0': {0: 0,
   1: 1,
   2: 2,
   3: 3,
   4: 4,
   5: 5,
   6: 6,
   7: 7,
   8: 8,
   9: 9,
   10: 10,
   11: 11,
   12: 12,
   13: 13,
   14: 14,
   15: 15,
   16: 16,
   17: 17,
   18: 18,
   19: 19,
   20: 20,
   21: 21,
   22: 22,
   23: 23,
   24: 24,
   25: 25,
   26: 26,
   27: 27,
   28: 28,
   29: 29,
   30: 30,
   31: 31,
   32: 32,
   33: 33,
   34: 34,
   35: 35,
   36: 36,
   37: 37,
   38: 38,
   39: 39,
   40: 40,
   41: 41,
   42: 42,
   43: 43,
   44: 44,
   45: 45,
   46: 46,
   47: 47},
  'axis_1': {'acc_x_axis_g': 0,
   'acc_y_axis_g': 1,
   'acc_z_axis_g': 2,
   'gyr_z_axis_deg/s': 3}})

In [None]:
for i, row in enumerate(datasetListObj):
    print(i)

In [None]:
for i, batch in enumerate(datasetListObj.batch(10)):
    print(batch)

In [None]:
train_obj, test_obj = datasetListObj.train_test_split()

In [None]:
len(train_obj), len(test_obj)

In [None]:
train_obj.X

In [None]:
newDatasetListObj = train_obj + test_obj
len(newDatasetListObj)

In [None]:
train_obj, test_obj = datasetListObj.train_test_split(random_state=42)
len(train_obj), len(test_obj)

In [None]:
newDatasetListObj = train_obj + test_obj
len(newDatasetListObj)

In [None]:
datasetListObj.apply(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)

# Pipeline steps test

## DatasetArray

In [None]:
from caits.transformers._func_transformer_v2 import FunctionTransformer
from caits.transformers._feature_extractor_v2 import FeatureExtractor
from caits.transformers._func_transformer_2d_v2 import FunctionTransformer2D
from caits.transformers._feature_extractor_2d_v2 import FeatureExtractor2D
from caits.transformers._sliding_window_v2 import SlidingWindow

functionTransformer = FunctionTransformer(filter_butterworth, fs=200, filter_type='lowpass', cutoff_freq=50)
transformedArray = functionTransformer.fit_transform(datasetArrayObj)

In [None]:
datasetArrayObj.X.values

In [None]:
transformedArray.X.values

In [None]:
featureExtractor = FeatureExtractor([
    {
        "func": mean_value,
        "params": {}
    },
    {
        "func": std_value,
        "params": {
            "ddof": 0
        }
    }
])

In [None]:
tmp = featureExtractor.fit_transform(datasetArrayObj)
tmp.keys(), len(tmp["mean_value"]), type(tmp["mean_value"]), tmp["mean_value"].shape

In [None]:
datasetArrayObj.dict_to_dataset(tmp).values

In [None]:
featureExtractor2D = FeatureExtractor2D(melspectrogram, n_fft=100, hop_length=10)
tmp = featureExtractor2D.fit_transform(datasetArrayObj)

In [None]:
tmp.X.shape

In [None]:
featureExtractor2D = FeatureExtractor2D(stft, n_fft=100, hop_length=10)
tmp1 = featureExtractor2D.fit_transform(datasetArrayObj)

In [None]:
tmp1.X.shape

In [None]:
functionTransformer = FunctionTransformer2D(istft, n_fft=100, hop_length=10)
tmp2 = functionTransformer.fit_transform(tmp1)

In [None]:
tmp2.X.shape

In [None]:
slidingWindow = SlidingWindow(window_size=10, overlap=5)
tmp = slidingWindow.fit_transform(datasetArrayObj)

In [None]:
len(tmp), tmp.X[0].shape

# DatasetList

In [None]:
functionTransformer = FunctionTransformer(filter_butterworth, fs=200, filter_type='highpass', cutoff_freq=50)
transformedList = functionTransformer.fit_transform(datasetListObj)
transformedList

In [None]:
datasetListObj.X[0].values

In [None]:
transformedList.X[0].values

In [None]:
tmp = featureExtractor.fit_transform(datasetListObj)
tmp.keys(), len(tmp["mean_value"]), type(tmp["mean_value"][0]), tmp["mean_value"][0].shape

In [None]:
tmpToDataset = datasetListObj.dict_to_dataset(tmp)

In [None]:
tmpToDataset[0]

In [None]:
featureExtractor2D = FeatureExtractor2D(melspectrogram, n_fft=10, hop_length=10)
tmp = featureExtractor2D.fit_transform(datasetListObj)

In [None]:
tmp

In [None]:
featureExtractor2D = FeatureExtractor2D(stft, n_fft=10, hop_length=10)
tmp = featureExtractor2D.fit_transform(datasetListObj)

In [None]:
tmp.X[900].values.shape

In [None]:
functionTransformer = FunctionTransformer2D(istft, hop_length=10)
tmp = functionTransformer.fit_transform(tmp)

In [None]:
tmp.X[100].shape, datasetListObj.X[100].shape

In [None]:
slidingWindow = SlidingWindow(window_size=10, overlap=5)
tmp = slidingWindow.fit_transform(datasetListObj)

In [None]:
len(tmp.X), len(tmp.y), len(tmp._id)

In [None]:
tmp.to_numpy()