In [1]:
import sys
sys.path.append('./libraries')

In [2]:
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

from libraries.functions import contains_non_numeric

# Hand-Crafted features
from libraries.hand_crafted_features import return_HC_features

# Catch22 Features
from pycatch22 import catch22_all

# ROCKET Features
from sktime.transformations.panel.rocket import (
    MiniRocket,
    MiniRocketMultivariateVariable,
)

# File paths

In [3]:
windowed_dataset_path = './dataset/windowed_data_six_labels_dataset.pkl'

HC_feature_dataset_save_path = './dataset/feature_datasets/HC_feature_data_6_behaviours.pkl'
Catch24_feature_dataset_save_path = './dataset/feature_datasets/Catch24_feature_data_6_behaviours.pkl'
ROCKET_feature_dataset_save_path = './dataset/feature_datasets/ROCKET_feature_data_6_behaviours.pkl'

# Consts

In [4]:
CONSIDERED_LABELS = ['drinking_milk', 'grooming', 'lying', 'running', 'walking', 'other']

In [5]:
keys = ['Accx', 'Accy', 'Accz', 'Amag', 'ODBA', 'VeDBA', 'pitch', 'roll']

# Importing window data

In [6]:
with open(windowed_dataset_path, 'rb') as f:
    window_data = pickle.load(f)

# Hand-Crafted Feature Derivation

In [7]:
%%time

HC_feature_data = {}

for calf, label_data in window_data.items():
    HC_feature_data[calf] = {}
    for label, windows in label_data.items():
        HC_feature_data[calf][label] = []
        for window in windows:
            features = []
            for key in keys:
                data_array = np.array(window[key])
                HC_features = return_HC_features(data_array)
                features.extend(HC_features)
            
            if not contains_non_numeric(features):  # Ensure that all features are present
                HC_feature_data[calf][label].append(features)

CPU times: user 11min 36s, sys: 559 ms, total: 11min 37s
Wall time: 11min 37s


In [8]:
# save HC Feature data
with open(HC_feature_dataset_save_path, 'wb') as f:
    pickle.dump(HC_feature_data, f)

In [9]:
del HC_feature_data

# Catch24 Feature Derivation

In [10]:
%%time

Catch22_feature_data = {}

for calf, label_data in window_data.items():
    Catch22_feature_data[calf] = {}
    for label, windows in label_data.items():
        Catch22_feature_data[calf][label] = []
        for window in windows:
            features = []
            for key in keys:
                data_array = np.array(window[key])
                catch22_features = catch22_all(data_array, catch24=True)['values']
                features.extend(catch22_features)
            
            if not contains_non_numeric(features):  # Ensure that all features are present
                Catch22_feature_data[calf][label].append(features)

CPU times: user 3min 33s, sys: 2.05 s, total: 3min 35s
Wall time: 3min 22s


In [11]:
# save Catch24 feature data
with open(Catch24_feature_dataset_save_path, 'wb') as f:
    pickle.dump(Catch22_feature_data, f)

In [12]:
del Catch22_feature_data

# ROCKET Feature Derivation

In [13]:
def derive_ROCKET_features(data_df, minirocket_multi, scaler, keys, trim=0):
    
    minirocket_multi.fit(data_df)
    X_transform = minirocket_multi.transform(data_df)

    X_scaled_transform = scaler.fit_transform(X_transform)
    
    return X_scaled_transform

In [14]:
%%time

def derive_ROCKET_features(data_df, minirocket_multi, scaler, keys, trim=0):
    
    minirocket_multi.fit(data_df)
    X_transform = minirocket_multi.transform(data_df)

    X_scaled_transform = scaler.fit_transform(X_transform)
    
    return X_scaled_transform

ROCKET_feature_data = {}

minirocket_multi_var = MiniRocketMultivariateVariable(num_kernels=10000, random_state=42)
scaler = StandardScaler(with_mean=False)

for calf_id, label_data in window_data.items():
    ROCKET_feature_data[calf_id] = {}
    for label, windows in label_data.items():
        series_data = []
        for window in windows:
            series_list = [window[col] for col in keys]
            series_data.append(series_list)
        series_df = pd.DataFrame(series_data)
        ROCKET_feature_data[calf_id][label] = derive_ROCKET_features(series_df, minirocket_multi_var, 
                                                                     scaler, keys)
        
'''
UserWarning: X is of equal length, consider using MiniRocketMultivariate for speedup and stability instead,
occurs because there can be some cases where'''

  self._fit(X=X_inner, y=y_inner)
  self._fit(X=X_inner, y=y_inner)
  self._fit(X=X_inner, y=y_inner)


CPU times: user 2min 42s, sys: 1.77 s, total: 2min 44s
Wall time: 2min 44s


In [15]:
# save ROCKET feature data
with open(ROCKET_feature_dataset_save_path, 'wb') as f:
    pickle.dump(ROCKET_feature_data, f)