# Loading the features
In the following cell, the features extracted from the windowed time-series are loaded into the notebook.

In [6]:
'''Reading the tabular data'''
import pickle
import gzip
import shutil
import os
import yaml
import warnings
import pandas as pd
import numpy as np


def load_pickle_from_parts(parts_dir):
    # Combine the parts into a single compressed file
    combined_path = os.path.join(parts_dir, 'features_20s.gz')
    with open(combined_path, 'wb') as combined_file:
        part_num = 0
        while True:
            part_path = os.path.join(parts_dir, f'features_20s_part_{part_num:03d}')
            if not os.path.exists(part_path):
                break
            with open(part_path, 'rb') as part_file:
                shutil.copyfileobj(part_file, combined_file)
            part_num += 1
    
    # Decompress the combined file and load the pickle data
    with gzip.open(combined_path, 'rb') as f_in:
        data = pickle.load(f_in)
    
    # Optionally remove the combined file after loading
    os.remove(combined_path)
    
    return data

# Example usage
current_dir = os.getcwd()  # Use the current working directory
parent_dir = os.path.dirname(current_dir)
parts_dir = os.path.join(parent_dir, 'data')
data_dict = load_pickle_from_parts(parts_dir)


In [22]:
'''Loading the data'''

# Name of the devices to include in the pipeline
devices = [
    'corsano_wrist',
    'cosinuss_ear',
    'sensomative_back',
    'sensomative_bottom',
    'vivalink_patch',
    'zurichmove_wheel'
]
n_devices = len(devices)

# Load parameters from the yaml file
# Get the current directory
current_dir = os.getcwd()  # Use the current working directory
# Get the parent directory
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
# Construct the path to the yaml file
yaml_file_path = os.path.join(parent_dir, 'parameters.yaml')
# Load the yaml file
with open(yaml_file_path, 'r') as f:
    params = yaml.safe_load(f)

# Accessing the parameters
seed_number = params['seed_number']
upsample_freq = params['upsample_freq']
activities_label_mapping = params['activities_label_mapping']

'''Loading the data'''
# Ignore FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=RuntimeWarning, message='overflow encountered in cast')


# Converting the dict data to a dataframe
dfs = []
subjects = []
for i_subject, subject in enumerate(data_dict.keys()):
    df1 = data_dict[subject]['corsano_wrist']
    df2 = data_dict[subject]['cosinuss_ear']
    df3 = data_dict[subject]['sensomative_back']
    df4 = data_dict[subject]['sensomative_bottom']
    df5 = data_dict[subject]['vivalink_patch']
    df6 = data_dict[subject]['zurichmove_wheel']
    df7 = data_dict[subject]['label']
    df = pd.concat([df1, df2, df3, df4, df5, df6, df7], axis=1)
    df['subject'] = i_subject
    subjects.append(subject)
    dfs.append(df)
data_df = pd.concat(dfs)

'''Converting the dataframe to float32 except for the 'label' and 'subject' columns'''
data_df = data_df.astype('float32')
data_df[['label', 'subject']] = data_df[['label', 'subject']].astype(int)

'''Drop columns containing inf and nan values'''
data_df = data_df.replace([np.inf, -np.inf], np.nan).dropna(axis=1, how='any')

'''Updating the list of features for each device'''
device_columns = []
for device in devices:
    columns = []
    for column in data_df.columns:
        if device in column:
            columns.append(column)
    device_columns.append(columns)

# Convert the pandas df to cudf
# data_df = cudf.DataFrame.from_pandas(data_df)

# Reporting the data imbalance
X = np.array(data_df.drop(['label', 'subject'], axis=1, inplace=False))
y = np.array(data_df['label'])
subjects = np.array(data_df['subject'])
# Get unique classes and their counts
classes, counts = np.unique(y, return_counts=True)
# Create a report table
report_table = np.vstack((classes, list(activities_label_mapping.keys()), counts)).T
print("Class | Activity         | Count")
print("--------------------------------")
for row in report_table:
    print(f"{row[0]:<5} | {row[1]:<16} | {row[2]:<5}")


Class | Activity         | Count
--------------------------------
0     | calmness         | 3646 
1     | selfpropulsion   | 1760 
2     | armraises        | 1022 
3     | transfer         | 2018 
4     | usingphone       | 1786 
5     | talking          | 2524 
6     | washhands        | 2246 
7     | eating           | 1618 
8     | assistedprop     | 1534 
9     | usingcomputer    | 2494 
10    | changingclothes  | 1126 
11    | pressurerelief   | 1398 
