# Human Activity Recognition: TSFEL Feature Extraction
This notebook demonstrates the process of extracting time-series features from raw accelerometer data using the TSFEL library. The workflow includes:
- Loading and preprocessing raw sensor data
- Extracting features with TSFEL


In [15]:
import os
from pathlib import Path
import pandas as pd
import tsfel

ACTIVITIES = ['LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS']
SLICE = slice(100, 600)  
FS = 50                  # sampling rate

# Build TSFEL config ONCE with all features enabled
cfg = tsfel.get_features_by_domain()
for domain in cfg:
    for feature in cfg[domain]:
        cfg[domain][feature]['use'] = 'yes'  # use all features

def store_tsfel_split(split_name: str):
    
    base_dir = os.path.join(BASE, 'Combined', split_name) # read the raw accelerometer data stored in Combined Folder. 
    output_base_dir = os.path.join(BASE, 'Datasets', 'TSFEL_3axes_allfeatures', split_name)

    for activity in ACTIVITIES:
        activity_dir = os.path.join(base_dir, activity)
        output_activity_dir = os.path.join(output_base_dir, activity)
        Path(output_activity_dir).mkdir(parents=True, exist_ok=True)

        if not os.path.isdir(activity_dir):
            print(f"[WARN] Missing folder: {activity_dir}")
            continue

        subject_files = [f for f in os.listdir(activity_dir) if f.lower().endswith('.csv')]
        if not subject_files:
            print(f"[WARN] No CSV files in: {activity_dir}")
            continue

        print(f"[{split_name}] {activity}: {len(subject_files)} files")
        for file in subject_files:
            try:
                file_path = os.path.join(activity_dir, file)
                df = pd.read_csv(file_path).iloc[SLICE, :]   # same slice 100:600 as you had

                # TSFEL extraction call (all features enabled)
                features = tsfel.time_series_features_extractor(cfg, df, fs=FS, verbose=0)

                subject_id = os.path.splitext(file)[0]
                output_file = os.path.join(output_activity_dir, f'{subject_id}.csv')
                features.to_csv(output_file, index=False)
            except Exception as e:
                print(f"[ERROR] {split_name}/{activity}/{file}: {e}")

# -------- Run for Train and Test using YOUR code path --------
store_tsfel_split('Train')
store_tsfel_split('Test')

print("\n[Done] Stored TSFEL features at:")
print(os.path.join(BASE, 'Combined', 'TSFEL_3axes_allfeatures11'))


[Train] LAYING: 21 files
[Train] SITTING: 21 files
[Train] SITTING: 21 files
[Train] STANDING: 21 files
[Train] STANDING: 21 files
[Train] WALKING: 21 files
[Train] WALKING: 21 files
[Train] WALKING_UPSTAIRS: 21 files
[Train] WALKING_UPSTAIRS: 21 files
[Train] WALKING_DOWNSTAIRS: 21 files
[Train] WALKING_DOWNSTAIRS: 21 files
[Test] LAYING: 9 files
[Test] LAYING: 9 files
[Test] SITTING: 9 files
[Test] SITTING: 9 files
[Test] STANDING: 9 files
[Test] STANDING: 9 files
[Test] WALKING: 9 files
[Test] WALKING: 9 files
[Test] WALKING_UPSTAIRS: 9 files
[Test] WALKING_UPSTAIRS: 9 files
[Test] WALKING_DOWNSTAIRS: 9 files
[Test] WALKING_DOWNSTAIRS: 9 files

[Done] Stored TSFEL features at:
C:\Users\ginis\Downloads\ML\UCI HAR Dataset including combined folder\Combined\TSFEL_3axes_allfeatures11

[Done] Stored TSFEL features at:
C:\Users\ginis\Downloads\ML\UCI HAR Dataset including combined folder\Combined\TSFEL_3axes_allfeatures11


## TSFEL Feature Extraction
This section extracts time-series features from the raw accelerometer data using the TSFEL library for each activity and subject.

In [16]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

def load_tsfel_dataset(base_dir, dataset_type):
    activities = ['LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS', 'WALKING_UPSTAIRS']

    dfs = []      # to keep DataFrames (with feature names)
    arrays = []   # to keep NumPy arrays
    labels = []   # activity labels

    for activity in activities:
        folder_path = os.path.join(base_dir, dataset_type, activity)
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(folder_path, file_name)
                df = pd.read_csv(file_path)

                dfs.append(df)              # keep DataFrame (1 sample, many features)
                arrays.append(df.values)    # keep numpy values
                labels.append(activity)

    X_df = pd.concat(dfs, axis=0, ignore_index=True)
    y_labels = np.array(labels)

    X_array = np.array(arrays)  # shape: (n_samples, 1, n_features) if each CSV is 1 row
    if X_array.ndim == 3 and X_array.shape[1] == 1:
        X_array = X_array[:, 0, :]  # squeeze to (n_samples, n_features)

    # Encode labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y_labels)

    return X_df, X_array, y_encoded, label_encoder

base_dir = 'Datasets/TSFEL_3axes_allfeatures'
X_train_df, X_train, y_train, le = load_tsfel_dataset(base_dir, 'Train')
X_test_df,  X_test,  y_test,  _ = load_tsfel_dataset(base_dir, 'Test')

print("Shapes:")
print("  X_train_df:", X_train_df.shape)   
print("  X_train:", X_train.shape)  
print("  X_test:", X_test.shape)  
print("  y_train:", y_train.shape)
print("First 5 feature names:", X_train_df.columns[:5].tolist())
print("Classes:", list(le.classes_))


Shapes:
  X_train_df: (126, 489)
  X_train: (126, 489)
  X_test: (54, 489)
  y_train: (126,)
First 5 feature names: ['accx_Absolute energy', 'accx_Area under the curve', 'accx_Autocorrelation', 'accx_Average power', 'accx_Centroid']
Classes: [np.str_('LAYING'), np.str_('SITTING'), np.str_('STANDING'), np.str_('WALKING'), np.str_('WALKING_DOWNSTAIRS'), np.str_('WALKING_UPSTAIRS')]
