In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#%pip install regex
import regex as re

In [3]:
from Python3Code.Chapter4.FrequencyAbstraction import FourierTransformation
from Python3Code.Chapter4.TemporalAbstraction import NumericalAbstraction

In [4]:
df = pd.read_csv('data/dataset_imputed.csv')

### Display the preview of our imputed data

In [5]:
# Display preview
print("Data Preview:")
display(df.head())
print("Columns:", df.columns.tolist())

Data Preview:


Unnamed: 0,seconds_elapsed,z_accelerometer,y_accelerometer,x_accelerometer,z_gyroscope,y_gyroscope,x_gyroscope,user,z_accelerometer_lowpass,y_accelerometer_lowpass,...,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,pca_10,pca_11,pca_12,pca_13
0,0.040027,-0.273211,-0.279689,-0.342461,0.145889,0.136416,0.005533,3,-0.087919,-0.502079,...,-0.358698,-0.992872,-1.506684,0.264751,0.385887,-0.647561,-1.242256,0.212099,0.806083,-1.118422
1,0.04046,1.260347,-0.319836,-0.342097,-0.110204,0.161805,-0.30624,1,-0.09402,-0.49913,...,-0.143218,-1.130201,-1.464593,0.341156,0.633756,-0.420294,-1.484735,0.022319,0.635951,-1.051559
2,0.049252,1.260347,-0.319836,-0.342097,-0.110204,0.161805,-0.30624,1,-0.100092,-0.496159,...,-0.135123,-1.127834,-1.465137,0.352453,0.62888,-0.427888,-1.466034,0.014356,0.629667,-1.0241
3,0.04996,-0.273211,-0.279689,-0.342461,0.145889,0.136416,0.005533,3,-0.10613,-0.493168,...,-0.334416,-0.985791,-1.508386,0.298638,0.371265,-0.670332,-1.186157,0.188215,0.787236,-1.036056
4,0.059245,1.274834,-0.240963,-0.35897,-0.145028,0.080974,-0.38048,1,-0.112131,-0.490155,...,-0.129642,-1.15496,-1.456594,0.324531,0.656941,-0.430889,-1.43752,-0.029626,0.61783,-0.974045


Columns: ['seconds_elapsed', 'z_accelerometer', 'y_accelerometer', 'x_accelerometer', 'z_gyroscope', 'y_gyroscope', 'x_gyroscope', 'user', 'z_accelerometer_lowpass', 'y_accelerometer_lowpass', 'x_accelerometer_lowpass', 'z_gyroscope_lowpass', 'y_gyroscope_lowpass', 'x_gyroscope_lowpass', 'pca_1', 'pca_2', 'pca_3', 'pca_4', 'pca_5', 'pca_6', 'pca_7', 'pca_8', 'pca_9', 'pca_10', 'pca_11', 'pca_12', 'pca_13']


### Temporal Abstraction

In [6]:
columns_to_transform = ['z_accelerometer_lowpass', 'y_accelerometer_lowpass', 'x_accelerometer_lowpass', 'z_gyroscope_lowpass', 'y_gyroscope_lowpass', 'x_gyroscope_lowpass', 'z_accelerometer', 'y_accelerometer', 'x_accelerometer', 'z_gyroscope', 'y_gyroscope', 'x_gyroscope']  # change to your actual column names
# Example configuration — adjust based on your sampling and signals
window_size = 40  # should match your time-series resolution
sampling_rate = 100  # Hz, change this to your sensor's rate

##### Add the features per user

In [7]:
na = NumericalAbstraction()
processed_users = []

# Process the data by user
for user in df.user.unique():
    user_mask = df.user == user
    user_df = df[user_mask].copy()

    user_df = na.abstract_numerical(data_table=user_df, cols=columns_to_transform, window_size=window_size, aggregation_function_name='mean')
    user_df = na.abstract_numerical(data_table=user_df, cols=columns_to_transform, window_size=window_size, aggregation_function_name='std')
    user_df = na.abstract_numerical(data_table=user_df, cols=columns_to_transform, window_size=window_size, aggregation_function_name='min')
    user_df = na.abstract_numerical(data_table=user_df, cols=columns_to_transform, window_size=window_size, aggregation_function_name='max')

    processed_users.append(user_df)

# Recombine all processed user data
df = pd.concat(processed_users).sort_index()
df

Unnamed: 0,seconds_elapsed,z_accelerometer,y_accelerometer,x_accelerometer,z_gyroscope,y_gyroscope,x_gyroscope,user,z_accelerometer_lowpass,y_accelerometer_lowpass,...,x_accelerometer_lowpass_temp_max_ws_40,z_gyroscope_lowpass_temp_max_ws_40,y_gyroscope_lowpass_temp_max_ws_40,x_gyroscope_lowpass_temp_max_ws_40,z_accelerometer_temp_max_ws_40,y_accelerometer_temp_max_ws_40,x_accelerometer_temp_max_ws_40,z_gyroscope_temp_max_ws_40,y_gyroscope_temp_max_ws_40,x_gyroscope_temp_max_ws_40
0,0.040027,-0.273211,-0.279689,-0.342461,0.145889,0.136416,0.005533,3,-0.087919,-0.502079,...,,,,,,,,,,
1,0.040460,1.260347,-0.319836,-0.342097,-0.110204,0.161805,-0.306240,1,-0.094020,-0.499130,...,,,,,,,,,,
2,0.049252,1.260347,-0.319836,-0.342097,-0.110204,0.161805,-0.306240,1,-0.100092,-0.496159,...,,,,,,,,,,
3,0.049960,-0.273211,-0.279689,-0.342461,0.145889,0.136416,0.005533,3,-0.106130,-0.493168,...,,,,,,,,,,
4,0.059245,1.274834,-0.240963,-0.358970,-0.145028,0.080974,-0.380480,1,-0.112131,-0.490155,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232641,775.538496,-0.992985,-5.758652,4.859346,1.199442,-0.058727,1.369393,3,-1.337381,0.147406,...,0.333657,-0.345793,0.174361,-0.558866,5.432712,2.351401,4.859346,1.199442,3.424362,1.369393
232642,775.539918,-7.387298,-8.144462,0.807643,-0.370583,-1.712168,-0.019191,2,-1.338083,0.146336,...,0.333903,-0.351329,0.174521,-0.574431,7.008690,8.385260,8.410290,0.494739,1.324911,-0.019191
232643,775.544682,0.172228,-0.416443,0.286711,-0.334846,-0.421050,-0.304001,1,-1.338672,0.145438,...,0.334107,-0.356845,0.174461,-0.589739,2.472252,1.391907,0.286711,-0.135751,0.047113,0.599179
232644,775.548429,-0.732786,-5.806568,5.015984,1.414098,0.109211,1.592959,3,-1.339158,0.144694,...,0.334275,-0.362338,0.174361,-0.604784,4.298582,2.351401,5.015984,1.414098,3.424362,1.592959


### Frequency Abstraction

Set up for FTT.
Single user per process

In [8]:
# Apply Fourier Transformation
ft = FourierTransformation()
processed_users = []

for user in df.user.unique():
    user_mask = df.user == user
    user_df = df[user_mask].copy()

    user_df = ft.abstract_frequency(
        data_table=user_df,
        columns=columns_to_transform,
        window_size=window_size,
        sampling_rate=sampling_rate
    )

    processed_users.append(user_df)

# Recombine all processed user data
df = pd.concat(processed_users).sort_index()
df

  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)
  data_table[collist] = pd.DataFrame(f

Unnamed: 0,seconds_elapsed,z_accelerometer,y_accelerometer,x_accelerometer,z_gyroscope,y_gyroscope,x_gyroscope,user,z_accelerometer_lowpass,y_accelerometer_lowpass,...,x_gyroscope_freq_27.5_Hz_ws_40,x_gyroscope_freq_30.0_Hz_ws_40,x_gyroscope_freq_32.5_Hz_ws_40,x_gyroscope_freq_35.0_Hz_ws_40,x_gyroscope_freq_37.5_Hz_ws_40,x_gyroscope_freq_40.0_Hz_ws_40,x_gyroscope_freq_42.5_Hz_ws_40,x_gyroscope_freq_45.0_Hz_ws_40,x_gyroscope_freq_47.5_Hz_ws_40,x_gyroscope_freq_50.0_Hz_ws_40
0,0.040027,-0.273211,-0.279689,-0.342461,0.145889,0.136416,0.005533,3,-0.087919,-0.502079,...,,,,,,,,,,
1,0.040460,1.260347,-0.319836,-0.342097,-0.110204,0.161805,-0.306240,1,-0.094020,-0.499130,...,,,,,,,,,,
2,0.049252,1.260347,-0.319836,-0.342097,-0.110204,0.161805,-0.306240,1,-0.100092,-0.496159,...,,,,,,,,,,
3,0.049960,-0.273211,-0.279689,-0.342461,0.145889,0.136416,0.005533,3,-0.106130,-0.493168,...,,,,,,,,,,
4,0.059245,1.274834,-0.240963,-0.358970,-0.145028,0.080974,-0.380480,1,-0.112131,-0.490155,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232641,775.538496,-0.992985,-5.758652,4.859346,1.199442,-0.058727,1.369393,3,-1.337381,0.147406,...,-1.756484,-1.835327,-1.676813,-1.806152,-1.818741,-1.880917,-1.797777,-1.850762,-1.890624,-1.985770
232642,775.539918,-7.387298,-8.144462,0.807643,-0.370583,-1.712168,-0.019191,2,-1.338083,0.146336,...,-0.081609,-0.366085,-0.257498,-0.277366,-0.286400,-0.336105,-0.286651,-0.273499,-0.273911,-0.286809
232643,775.544682,0.172228,-0.416443,0.286711,-0.334846,-0.421050,-0.304001,1,-1.338672,0.145438,...,0.200181,0.292764,0.318034,0.220598,0.134340,0.207274,0.237348,0.203494,0.181727,0.187381
232644,775.548429,-0.732786,-5.806568,5.015984,1.414098,0.109211,1.592959,3,-1.339158,0.144694,...,-2.191315,-2.116063,-2.358711,-2.283660,-2.258479,-2.155638,-2.221200,-2.232408,-2.212610,-2.124797


### Let's remove the added missing values

In [16]:
df = df.dropna()
df

Unnamed: 0,seconds_elapsed,z_accelerometer,y_accelerometer,x_accelerometer,z_gyroscope,y_gyroscope,x_gyroscope,user,z_accelerometer_lowpass,y_accelerometer_lowpass,...,x_gyroscope_freq_27.5_Hz_ws_40,x_gyroscope_freq_30.0_Hz_ws_40,x_gyroscope_freq_32.5_Hz_ws_40,x_gyroscope_freq_35.0_Hz_ws_40,x_gyroscope_freq_37.5_Hz_ws_40,x_gyroscope_freq_40.0_Hz_ws_40,x_gyroscope_freq_42.5_Hz_ws_40,x_gyroscope_freq_45.0_Hz_ws_40,x_gyroscope_freq_47.5_Hz_ws_40,x_gyroscope_freq_50.0_Hz_ws_40
114,0.438087,-0.606554,0.896955,-1.459795,0.160653,-0.024878,-0.198946,3,0.009262,0.088612,...,-0.001174,-0.008170,0.053239,0.121995,0.173472,0.188852,0.178741,0.117877,0.042600,0.005056
115,0.439011,-0.651863,0.687298,0.363176,0.236960,-0.257517,0.760620,1,0.015715,0.091767,...,-0.505881,-0.520236,-0.554544,-0.567485,-0.574446,-0.534125,-0.534210,-0.542236,-0.557943,-0.563961
117,0.448021,-0.427877,0.661085,-1.474408,0.104291,-0.052918,-0.176606,3,0.028302,0.097477,...,0.124298,0.069849,-0.007237,-0.018529,-0.009509,0.018068,0.056638,0.098527,0.155658,0.178766
118,0.449005,-0.260615,0.538511,0.417578,0.253509,-0.124384,0.701848,1,0.034418,0.100028,...,-0.428012,-0.437878,-0.460331,-0.452051,-0.445384,-0.475633,-0.475844,-0.465259,-0.446488,-0.443625
120,0.457955,-0.249200,0.425215,-1.489022,0.047929,-0.080958,-0.154265,3,0.046258,0.104514,...,0.173129,0.195635,0.200188,0.167148,0.127638,0.088691,0.043387,0.029706,-0.007994,-0.020071
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232641,775.538496,-0.992985,-5.758652,4.859346,1.199442,-0.058727,1.369393,3,-1.337381,0.147406,...,-1.756484,-1.835327,-1.676813,-1.806152,-1.818741,-1.880917,-1.797777,-1.850762,-1.890624,-1.985770
232642,775.539918,-7.387298,-8.144462,0.807643,-0.370583,-1.712168,-0.019191,2,-1.338083,0.146336,...,-0.081609,-0.366085,-0.257498,-0.277366,-0.286400,-0.336105,-0.286651,-0.273499,-0.273911,-0.286809
232643,775.544682,0.172228,-0.416443,0.286711,-0.334846,-0.421050,-0.304001,1,-1.338672,0.145438,...,0.200181,0.292764,0.318034,0.220598,0.134340,0.207274,0.237348,0.203494,0.181727,0.187381
232644,775.548429,-0.732786,-5.806568,5.015984,1.414098,0.109211,1.592959,3,-1.339158,0.144694,...,-2.191315,-2.116063,-2.358711,-2.283660,-2.258479,-2.155638,-2.221200,-2.232408,-2.212610,-2.124797


In [17]:
len(df.columns)

363

In [18]:
'''
# 5. Visualize Frequency Features
freq_cols = [col for col in df_transformed.columns if '_freq_' in col]

# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df_transformed[freq_cols].corr(), cmap="coolwarm", center=0)
plt.title("Correlation between Fourier-derived Features")
plt.show()
'''

'\n# 5. Visualize Frequency Features\nfreq_cols = [col for col in df_transformed.columns if \'_freq_\' in col]\n\n# Correlation heatmap\nplt.figure(figsize=(12, 8))\nsns.heatmap(df_transformed[freq_cols].corr(), cmap="coolwarm", center=0)\nplt.title("Correlation between Fourier-derived Features")\nplt.show()\n'

In [19]:
# 6. Save output (optional)
#df_transformed.to_csv("user1_fourier_transformed_features.csv", index=False)
#print("Fourier-transformed features saved.")