# Extract MiniROCKET features, and write to ts file

The purpose of this notebook is to extract the MiniROCKET feature set, and write it to train and test .ts files for later exploration

In [1]:
import numpy as np
import pandas as pd
import os
import sys
import time as time
module_path = os.path.abspath(os.path.join('../..'))
from sktime.utils.data_io import load_from_tsfile_to_dataframe

from sklearn.model_selection import train_test_split
from sktime.transformations.panel.rocket import MiniRocket

UCR archive dataset

In [12]:
name_UCR = 'Heartbeat'

# Code below is to fix the issue that this dataset does not have .ts files yet 
filepath = f'/home/andrine/anaconda3/lib/python3.7/site-packages/sktime/datasets/data/{name_UCR}/{name_UCR}'

X_train, y_train = load_from_tsfile_to_dataframe(filepath + '_TRAIN.ts')
X_test, y_test = load_from_tsfile_to_dataframe(filepath + '_TEST.ts')
X = pd.concat([X_train, X_test])
y = np.concatenate([y_train, y_test])

Lung sound datasets ( Kaggle or Tromsø)

In [2]:
start = time.time()
X, y = load_from_tsfile_to_dataframe(module_path + '/data/ts_files/UiT_compressed_rms.ts')
print('Time to fetch X y without compression: ', time.time() - start)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Time to fetch X y without compression:  37.23524594306946


Using MiniROCKET to transform the features

In [3]:
start = time.time()
minirocket = MiniRocket()  # by default, MiniRocket uses ~10,000 kernels
minirocket.fit(X_train)
print('Time to fit MiniROCKET: ', time.time() - start)
start = time.time()
X_train_transform = minirocket.transform(X_train)
print('Time to transform X_train: ', time.time() - start)
start = time.time()
X_test_transform = minirocket.transform(X_test)
print('Time to transform X_test: ', time.time() - start)

Time to fit MiniROCKET:  1.511939287185669
Time to transform X_train:  28.37428641319275
Time to transform X_test:  8.864571332931519


Writing the transformed data to train and test files

In [9]:
def write_to_ts(filepath, X, y):
    
    w = open(filepath, 'w+')
    
    w.write('@problemName LungSoundsMiniROCKET \n')
    w.write('@timeStamps false \n')
    w.write('@missing false \n')
    w.write('@univariate true \n')
    w.write('@equalLength true \n')
    w.write(f'@seriesLength {str(len(X.columns))} \n')
    w.write('@classLabel true normal crackle wheeze\n')
    w.write('@data \n')
    
    for (idx, row) in X.iterrows():
        new_row = str((list(row)))[1:-1].replace(' ', '') + ':' + y[idx] + '\n'
        print(y[idx])
        w.write(new_row)
        

In [6]:
write_to_ts(module_path + '/features/extracted_features_ts_files/uit_MiniROCKET_TRAIN.ts', X_train_transform, y_train )
write_to_ts(module_path + '/features/extracted_features_ts_files/uit_MiniROCKET_TEST.ts', X_test_transform, y_test )