In [12]:
import pandas as pd
from datetime import datetime

## SET CORRECT VALUES FIRST!!
file_csv = "HIMU-2021-12-10_14-57-10.csv"
user = 6 #user
exp = 10
device_is_s8 = True #true = s8, false = s7
## Thanks

df = pd.read_csv(file_csv, parse_dates=True)
df['timestamp'] = pd.to_datetime(df['timestamp'],unit='ms')

s8 = {
    "acc": ["lsm6dsl_acceleration_sensor.x", "lsm6dsl_acceleration_sensor.y", "lsm6dsl_acceleration_sensor.z"],
    "gyr": ["lsm6dsl_gyroscope_sensor.x", "lsm6dsl_gyroscope_sensor.y", "lsm6dsl_gyroscope_sensor.z"]
}

s7 = {
    "acc": ["k6ds3tr_acceleration_sensor.x", "k6ds3tr_acceleration_sensor.y", "k6ds3tr_acceleration_sensor.z"],
    "gyr": ["k6ds3tr_gyroscope_sensor.x", "k6ds3tr_gyroscope_sensor.y", "k6ds3tr_gyroscope_sensor.z"]
}
device = s7
if device_is_s8:
    device = s8

file_name = f"raw data/acc_exp{exp:02d}_user{user:02d}_device{'S8' if device_is_s8 else 'S7'}.txt"
my_file = open(file_name, 'w')
for index, row in df.iterrows():
    my_file.write(f"{row[device['acc'][0]]} {row[device['acc'][1]]} {row[device['acc'][2]]}\n")
my_file.close()

file_name = f"raw data/gyro_exp{exp:02d}_user{user:02d}_device{'S8' if device_is_s8 else 'S7'}.txt"
my_file = open(file_name, 'w')
for index, row in df.iterrows():
    my_file.write(f"{row[device['gyr'][0]]} {row[device['gyr'][1]]} {row[device['gyr'][2]]}\n")
my_file.close()

In [22]:
"""
- 'RawData/labels.txt': include all the activity labels available for the dataset (1 per row).
   Column 1: experiment number ID,
   Column 2: user number ID,
   Column 3: activity number ID
   Column 4: Label start point (in number of signal log samples (recorded at 50Hz))
   Column 5: Label end point (in number of signal log samples)

activity_type:
1 WALKING
2 WALKING_UPSTAIRS
3 WALKING_DOWNSTAIRS
4 SITTING
5 STANDING
"""
file_name_lab = f"labels_{'S8' if device_is_s8 else 'S7'}.txt"

activity_start = datetime.fromisoformat("2021-12-10 14:10:25") # Settes EN time tilbake fra log
activity_end = datetime.fromisoformat("2021-12-10 14:11:25") # "2021-12-09T08:31:15.000Z"
activity_type = 4 #between 1-5

val = df[(df['timestamp'] > activity_start) & (df['timestamp'] < activity_end)]

with open('raw data/labels.txt', "a") as a_file:
    a_file.write(f"{exp} {user} {activity_type} {val.iloc[0].name} {val.iloc[-1].name}\n")

In [1]:
from decimal import ROUND_HALF_UP, Decimal
from logging import getLogger
import math
import traceback
from typing import List, Tuple

import numpy as np
import pandas as pd
from numpy.linalg import norm
from scipy import stats
from scipy.fftpack import fft
from scipy.signal import butter, filtfilt
from statsmodels.distributions.empirical_distribution import ECDF
from statsmodels.regression.linear_model import burg

logger = getLogger(__name__)


class Preprocess:
    def __init__(self, fs: int = 50) -> None:
        """
        Args:
            fs (int, default=50): Sampling frequency of sensor signals
        """
        self.fs = fs

    def apply_filter(
            self, signal: pd.DataFrame, filter: str = "median", window: int = 5
    ) -> pd.DataFrame:
        """A denosing filter is applied to remove noise in signals.
        Args:
            signal (pd.DataFrame): Raw signal
            filter (str, default='median'): Filter name is chosen from 'mean', 'median', or 'butterworth'
            window (int, default=5): Length of filter
        Returns:
            signal (pd.DataFrame): Filtered signal
        See Also:
            'butterworth' applies a 3rd order low-pass Butterworth filter with a corner frequency of 20 Hz.
        """
        if filter == "mean":
            signal = signal.rolling(window=window, center=True, min_periods=1).mean()
        elif filter == "median":
            signal = signal.rolling(window=window, center=True, min_periods=1).median()
        elif filter == "butterworth":
            fc = 20  # cutoff frequency
            w = fc / (self.fs / 2)  # Normalize the frequency
            b, a = butter(3, w, "low")  # 3rd order low-pass Butterworth filter
            signal = pd.DataFrame(filtfilt(b, a, signal, axis=0), columns=signal.columns)
        else:
            try:
                raise ValueError("Not defined filter. See Args.")
            except ValueError:
                logger.error(traceback.format_exc())

        return signal

    def normalize(self, signal: pd.DataFrame) -> pd.DataFrame:
        """Apply normalization
        Args:
            signal (pd.DataFrame): Raw signal
        Returns:
            signal (pd.DataFrame): Normalized signal
        """
        df_mean = signal.mean()
        df_std = signal.std()
        signal = (signal - df_mean) / df_std
        return signal

    def segment_signal(
            self,
            signal: pd.DataFrame,
            window_size: int = 128,
            overlap_rate: int = 0.5,
            res_type: str = "dataframe",
    ) -> List[pd.DataFrame]:
        """Sample sensor signals in fixed-width sliding windows of 2.56 sec and 50% overlap (128 readings/window).
        Args:
            signal (pandas.DataFrame): Raw signal
            window_size (int, default=128): Window size of sliding window to segment raw signals.
            overlap_rate (float, default=0.5): Overlap rate of sliding window to segment raw signals.
            res_type (str, default='dataframe'): Type of return value; 'array' or 'dataframe'
        Returns:
            signal_seg (list of pandas.DataFrame): List of segmented sigmal.
        """
        signal_seg = []

        for start_idx in range(0, len(signal) - window_size, int(window_size * overlap_rate)):
            seg = signal.iloc[start_idx : start_idx + window_size].reset_index(drop=True)
            if res_type == "array":
                seg = seg.values
            signal_seg.append(seg)

        if res_type == "array":
            signal_seg = np.array(signal_seg)

        return signal_seg

    def separate_gravity(self, acc: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """
        Separate acceleration signal into body and gravity acceleration signal.
        Another low pass Butterworth filter with a corner frequency of 0.3 Hz is applied.
        Args:
            acc (pd.DataFrame): Segmented acceleration signal
        Returns:
            acc_body (pd.DataFrame): Body acceleration signal
            acc_grav (pd.DataFrame): Gravity acceleration signal
        """
        fc = 0.3  # cutoff frequency
        w = fc / (self.fs / 2)  # Normalize the frequency
        b, a = butter(3, w, "low")  # 3rd order low pass Butterworth filter
        acc_grav = pd.DataFrame(
            filtfilt(b, a, acc, axis=0), columns=acc.columns
        )  # Apply Butterworth filter

        # Substract gravity acceleration from acceleration sigal.
        acc_body = acc - acc_grav
        return acc_body, acc_grav

    def obtain_jerk_signal(self, signal: pd.DataFrame) -> pd.DataFrame:
        """Derive signal to obtain Jerk signals
        Args:
            signal (pd.DataFrame)
        Returns:
            jerk_signal (pd.DataFrame):
        """
        jerk_signal = signal.diff(periods=1)  # Calculate difference
        jerk_signal.iloc[0] = jerk_signal.iloc[1]  # Fillna
        jerk_signal = jerk_signal / (1 / self.fs)  # Derive in time (1 / sampling frequency)
        return jerk_signal

    def obtain_magnitude(self, signal):
        """Calculate the magnitude of these three-dimensional signals using the Euclidean norm
        Args:
            signal (pandas.DataFrame): Three-dimensional signals
        Returns:
            res (pandas.DataFrame): Magnitude of three-dimensional signals
        """
        return pd.DataFrame(norm(signal, ord=2, axis=1))

    def obtain_spectrum(self, signal):
        """Obtain spectrum using Fast Fourier Transform (FFT).
        Args:
            signal (pandas.DataFrame): Time domain signals
        Returns:
            amp (pandas.DataFrame): Amplitude spectrum
            phase (pandas.DataFrame): Phase spectrum
        """
        N = len(signal)
        columns = signal.columns

        for col in columns:
            signal[col] = signal[col] * np.hamming(N)  # hamming window

        F = fft(signal, axis=0)  # Apply FFT
        F = F[: N // 2, :]  # Remove the overlapping part

        amp = np.abs(F)  # Obtain the amplitude spectrum
        amp = amp / N * 2
        amp[0] = amp[0] / 2
        amp = pd.DataFrame(amp, columns=columns)  # Convert array to DataFrame
        phase = np.angle(F)
        phase = pd.DataFrame(phase, columns=columns)  # Convert array to DataFrame

        return amp, phase

    def obtain_ecdf_percentile(self, signal, n_bins=10):
        """Obtain ECDF (empirical cumulative distribution function) percentile values.
        Args:
            signal (DataFrame): Time domain signals
            n_bins (int, default: 10): How many percentiles to use as a feature
        Returns:
            features (array): ECDF percentile values.
        """
        idx = np.linspace(0, signal.shape[0] - 1, n_bins)  # Take n_bins linspace percentile.
        idx = [int(Decimal(str(ix)).quantize(Decimal("0"), rounding=ROUND_HALF_UP)) for ix in idx]
        features = np.array([])
        for col in signal.columns:
            ecdf = ECDF(signal[col].values)  # fit
            x = ecdf.x[1:]  # Remove -inf
            feat = x[idx]
            features = np.hstack([features, feat])

        return features

    def obtain_mean(self, signal) -> np.ndarray:
        return signal.mean().values

    def obtain_std(self, signal) -> np.ndarray:
        return signal.std().values

    def obtain_mad(self, signal) -> np.ndarray:
        return stats.median_abs_deviation(signal, scale=1/1.4826)
        #return stats.median_absolute_deviation(signal, axis=0)

    def obtain_max(self, signal) -> np.ndarray:
        return signal.max().values

    def obtain_min(self, signal) -> np.ndarray:
        return signal.min().values

    def obtain_sma(self, signal, window_size=128) -> np.ndarray:
        window_second = window_size / self.fs
        return sum(signal.sum().values - self.obtain_min(signal) * len(signal)) / window_second

    def obtain_energy(self, signal) -> np.ndarray:
        return norm(signal, ord=2, axis=0) ** 2 / len(signal)

    def obtain_iqr(self, signal) -> np.ndarray:
        return signal.quantile(0.75).values - signal.quantile(0.25).values

    def obtain_entropy(self, signal) -> np.ndarray:
        signal = signal - signal.min()
        return stats.entropy(signal)

    def obtain_arCoeff(self, signal) -> np.ndarray:
        arCoeff = np.array([])
        for col in signal.columns:
            val, _ = burg(signal[col], order=4)
            arCoeff = np.hstack((arCoeff, val))
        return arCoeff

    def obtain_correlation(self, signal) -> np.ndarray:
        if signal.shape[1] == 1:  # Signal dimension is 1
            correlation = np.array([])
        else:  # Signal dimension is 3
            xy = np.corrcoef(signal["x"], signal["y"])[0][1]
            yz = np.corrcoef(signal["y"], signal["z"])[0][1]
            zx = np.corrcoef(signal["z"], signal["x"])[0][1]
            correlation = np.hstack((xy, yz, zx))
        return correlation

    def obtain_maxInds(self, signal) -> np.ndarray:
        return signal.idxmax().values

    def obtain_meanFreq(self, signal) -> np.ndarray:
        meanFreq = np.array([])
        for col in signal.columns:
            val = np.mean(signal[col] * np.arange(len(signal)))
            meanFreq = np.hstack((meanFreq, val))
        return meanFreq

    def obtain_skewness(self, signal) -> np.ndarray:
        return signal.skew().values

    def obtain_kurtosis(self, signal) -> np.ndarray:
        return signal.kurt().values

    def obtain_bandsEnergy(self, signal) -> np.ndarray:
        bandsEnergy = np.array([])
        bins = [0, 4, 8, 12, 16, 20, 24, 29, 34, 39, 44, 49, 54, 59, 64]
        for i in range(len(bins) - 1):
            df = signal.iloc[bins[i] : bins[i + 1]]
            arr = self.obtain_energy(df)
            bandsEnergy = np.hstack((bandsEnergy, arr))
        return bandsEnergy

    def obtain_angle(self, v1, v2) -> np.ndarray:
        length = lambda v: math.sqrt(np.dot(v, v))
        return math.acos(np.dot(v1, v2) / (length(v1) * length(v2)))


In [3]:
from typing import List

import numpy as np
import pandas as pd

#from src.data_prep.preprocessing import Preprocess  # Load class for obtaining features


def create_features(acc_raw: pd.DataFrame, gyro_raw: pd.DataFrame) -> np.ndarray:
    """Create features from raw acceleration and gyroscope sensor data
    Args:
        acc_raw (pd.DataFrame): Raw 3-axial accelerometer signals with columns denoting axes.
        gyro_raw (pd.DataFrame): Raw 3-axial gyroscope signals with columns denoting axes.
    Returns:
        features (np.ndarray): Created features corresponding args with columns denoting feature names.
    """
    of = Preprocess(fs=50)  # Create an instance.

    # Remove noises by median filter & Butterworth filter
    acc_raw = of.apply_filter(signal=acc_raw, filter="median", window=5)
    acc_raw = of.apply_filter(signal=acc_raw, filter="butterworth")
    gyro_raw = of.apply_filter(signal=gyro_raw, filter="median", window=5)
    gyro_raw = of.apply_filter(signal=gyro_raw, filter="butterworth")

    # Sample signals in fixed-width sliding windows
    tAccXYZ = of.segment_signal(acc_raw, window_size=128, overlap_rate=0.5, res_type="dataframe")
    tBodyGyroXYZ = of.segment_signal(
        gyro_raw, window_size=128, overlap_rate=0.5, res_type="dataframe"
    )

    # Separate acceleration signal into body and gravity acceleration signal
    tBodyAccXYZ, tGravityAccXYZ = [], []
    for acc in tAccXYZ:
        body_acc, grav_acc = of.separate_gravity(acc.copy())
        tBodyAccXYZ.append(body_acc)
        tGravityAccXYZ.append(grav_acc)

    # Obtain Jerk signals of body linear acceleration and angular velocity
    tBodyAccJerkXYZ, tBodyGyroJerkXYZ = [], []
    for body_acc, gyro in zip(tBodyAccXYZ, tBodyGyroXYZ):
        body_acc_jerk = of.obtain_jerk_signal(body_acc.copy())
        gyro_jerk = of.obtain_jerk_signal(gyro.copy())

        tBodyAccJerkXYZ.append(body_acc_jerk)
        tBodyGyroJerkXYZ.append(gyro_jerk)

    # Calculate the magnitude of three-dimensional signals using the Euclidean norm
    tBodyAccMag, tGravityAccMag, tBodyAccJerkMag, tBodyGyroMag, tBodyGyroJerkMag = (
        [],
        [],
        [],
        [],
        [],
    )
    for body_acc, grav_acc, body_acc_jerk, gyro, gyro_jerk in zip(
            tBodyAccXYZ, tGravityAccXYZ, tBodyAccJerkXYZ, tBodyGyroXYZ, tBodyGyroJerkXYZ
    ):
        body_acc_mag = of.obtain_magnitude(body_acc.copy())
        grav_acc_mag = of.obtain_magnitude(grav_acc.copy())
        body_acc_jerk_mag = of.obtain_magnitude(body_acc_jerk.copy())
        gyro_mag = of.obtain_magnitude(gyro.copy())
        gyro_jerk_mag = of.obtain_magnitude(gyro_jerk.copy())

        tBodyAccMag.append(body_acc_mag)
        tGravityAccMag.append(grav_acc_mag)
        tBodyAccJerkMag.append(body_acc_jerk_mag)
        tBodyGyroMag.append(gyro_mag)
        tBodyGyroJerkMag.append(gyro_jerk_mag)

    # Obtain amplitude spectrum using Fast Fourier Transform (FFT).
    (
        fBodyAccXYZAmp,
        fBodyAccJerkXYZAmp,
        fBodyGyroXYZAmp,
        fBodyAccMagAmp,
        fBodyAccJerkMagAmp,
        fBodyGyroMagAmp,
        fBodyGyroJerkMagAmp,
    ) = ([], [], [], [], [], [], [])
    (
        fBodyAccXYZPhs,
        fBodyAccJerkXYZPhs,
        fBodyGyroXYZPhs,
        fBodyAccMagPhs,
        fBodyAccJerkMagPhs,
        fBodyGyroMagPhs,
        fBodyGyroJerkMagPhs,
    ) = ([], [], [], [], [], [], [])
    for (
            body_acc,
            body_acc_jerk,
            gyro,
            body_acc_mag,
            body_acc_jerk_mag,
            gyro_mag,
            gyro_jerk_mag,
    ) in zip(
        tBodyAccXYZ,
        tBodyAccJerkXYZ,
        tBodyGyroXYZ,
        tBodyAccMag,
        tBodyAccJerkMag,
        tBodyGyroMag,
        tBodyGyroJerkMag,
    ):
        body_acc_amp, body_acc_phase = of.obtain_spectrum(body_acc.copy())
        body_acc_jerk_amp, body_acc_jerk_phase = of.obtain_spectrum(body_acc_jerk.copy())
        gyro_amp, gyro_phase = of.obtain_spectrum(gyro.copy())
        body_acc_mag_amp, body_acc_mag_phase = of.obtain_spectrum(body_acc_mag.copy())
        body_acc_jerk_mag_amp, body_acc_jerk_mag_phase = of.obtain_spectrum(
            body_acc_jerk_mag.copy()
        )
        gyro_mag_amp, gyro_mag_phase = of.obtain_spectrum(gyro_mag.copy())
        gyro_jerk_mag_amp, gyro_jerk_mag_phase = of.obtain_spectrum(gyro_jerk_mag.copy())

        fBodyAccXYZAmp.append(body_acc_amp)
        fBodyAccJerkXYZAmp.append(body_acc_jerk_amp)
        fBodyGyroXYZAmp.append(gyro_amp)
        fBodyAccMagAmp.append(body_acc_mag_amp)
        fBodyAccJerkMagAmp.append(body_acc_jerk_mag_amp)
        fBodyGyroMagAmp.append(gyro_mag_amp)
        fBodyGyroJerkMagAmp.append(gyro_jerk_mag_amp)

        fBodyAccXYZPhs.append(body_acc_phase)
        fBodyAccJerkXYZPhs.append(body_acc_jerk_phase)
        fBodyGyroXYZPhs.append(gyro_phase)
        fBodyAccMagPhs.append(body_acc_mag_phase)
        fBodyAccJerkMagPhs.append(body_acc_jerk_mag_phase)
        fBodyGyroMagPhs.append(gyro_mag_phase)
        fBodyGyroJerkMagPhs.append(gyro_jerk_mag_phase)

    #  Following signals are obtained by implementing above functions.
    time_signals = [
        tBodyAccXYZ,
        tGravityAccXYZ,
        tBodyAccJerkXYZ,
        tBodyGyroXYZ,
        tBodyGyroJerkXYZ,
        tBodyAccMag,
        tGravityAccMag,
        tBodyAccJerkMag,
        tBodyGyroMag,
        tBodyGyroJerkMag,
    ]
    freq_signals = [
        fBodyAccXYZAmp,
        fBodyAccJerkXYZAmp,
        fBodyGyroXYZAmp,
        fBodyAccMagAmp,
        fBodyAccJerkMagAmp,
        fBodyGyroMagAmp,
        fBodyGyroJerkMagAmp,
        fBodyAccXYZPhs,
        fBodyAccJerkXYZPhs,
        fBodyGyroXYZPhs,
        fBodyAccMagPhs,
        fBodyAccJerkMagPhs,
        fBodyGyroMagPhs,
        fBodyGyroJerkMagPhs,
    ]

    all_signals = time_signals + freq_signals

    # Calculate feature vectors by using signals
    features = []

    for i in range(len(tBodyAccXYZ)):
        feature_vector = np.array([])

        # mean, std, mad, max, min, sma, energy, iqr, entropy
        for t_signal in all_signals:
            sig = t_signal[i].copy()
            mean = of.obtain_mean(sig)
            std = of.obtain_std(sig)
            mad = of.obtain_mad(sig)
            max_val = of.obtain_max(sig)
            min_val = of.obtain_min(sig)
            sma = of.obtain_sma(sig)
            energy = of.obtain_energy(sig)
            iqr = of.obtain_iqr(sig)
            entropy = of.obtain_entropy(sig)
            feature_vector = np.hstack(
                (feature_vector, mean, std, mad, max_val, min_val, sma, energy, iqr, entropy)
            )

        # arCoeff
        for t_signal in time_signals:
            sig = t_signal[i].copy()
            arCoeff = of.obtain_arCoeff(sig)
            feature_vector = np.hstack((feature_vector, arCoeff))

        # correlation
        for t_signal in [
            tBodyAccXYZ,
            tGravityAccXYZ,
            tBodyAccJerkXYZ,
            tBodyGyroXYZ,
            tBodyGyroJerkXYZ,
        ]:
            sig = t_signal[i].copy()
            correlation = of.obtain_correlation(sig)
            feature_vector = np.hstack((feature_vector, correlation))

        # maxInds, meanFreq, skewness, kurtosis
        for t_signal in freq_signals:
            sig = t_signal[i].copy()
            maxInds = of.obtain_maxInds(sig)
            meanFreq = of.obtain_meanFreq(sig)
            skewness = of.obtain_skewness(sig)
            kurtosis = of.obtain_kurtosis(sig)
            feature_vector = np.hstack((feature_vector, maxInds, meanFreq, skewness, kurtosis))

        # bandsEnergy
        for t_signal in [tBodyAccXYZ, tBodyAccJerkXYZ, tBodyGyroXYZ]:
            sig = t_signal[i].copy()
            bandsEnergy = of.obtain_bandsEnergy(sig)
            feature_vector = np.hstack((feature_vector, bandsEnergy))

        # angle
        gravityMean = tGravityAccXYZ[i].mean()
        tBodyAccMean = tBodyAccXYZ[i].mean()
        tBodyAccJerkMean = tBodyAccJerkXYZ[i].mean()
        tBodyGyroMean = tBodyGyroXYZ[i].mean()
        tBodyGyroJerkMean = tBodyGyroJerkXYZ[i].mean()
        tXAxisAcc = tAccXYZ[i]["x"]
        tXAxisGravity = tGravityAccXYZ[i]["x"]
        tYAxisAcc = tAccXYZ[i]["y"]
        tYAxisGravity = tGravityAccXYZ[i]["y"]
        tZAxisAcc = tAccXYZ[i]["z"]
        tZAxisGravity = tGravityAccXYZ[i]["z"]

        tBodyAccWRTGravity = of.obtain_angle(tBodyAccMean, gravityMean)
        tBodyAccJerkWRTGravity = of.obtain_angle(tBodyAccJerkMean, gravityMean)
        tBodyGyroWRTGravity = of.obtain_angle(tBodyGyroMean, gravityMean)
        tBodyGyroJerkWRTGravity = of.obtain_angle(tBodyGyroJerkMean, gravityMean)
        tXAxisAccWRTGravity = of.obtain_angle(tXAxisAcc, tXAxisGravity)
        tYAxisAccWRTGravity = of.obtain_angle(tYAxisAcc, tYAxisGravity)
        tZAxisAccWRTGravity = of.obtain_angle(tZAxisAcc, tZAxisGravity)

        feature_vector = np.hstack(
            (
                feature_vector,
                tBodyAccWRTGravity,
                tBodyAccJerkWRTGravity,
                tBodyGyroWRTGravity,
                tBodyGyroJerkWRTGravity,
                tXAxisAccWRTGravity,
                tYAxisAccWRTGravity,
                tZAxisAccWRTGravity,
            )
        )

        # ECDF
        for t_signal in [tBodyAccXYZ, tBodyGyroXYZ]:
            sig = t_signal[i].copy()
            ecdf = of.obtain_ecdf_percentile(sig)
            feature_vector = np.hstack((feature_vector, ecdf))

        features.append(feature_vector)

    return np.array(features)


def get_feature_names() -> List[str]:
    """Get feature names
    Returns:
        feature_names (List[str]): Title of features
    """
    time_signal_names = [
        "tBodyAccXYZ",
        "tGravityAccXYZ",
        "tBodyAccJerkXYZ",
        "tBodyGyroXYZ",
        "tBodyGyroJerkXYZ",
        "tBodyAccMag",
        "tGravityAccMag",
        "tBodyAccJerkMag",
        "tBodyGyroMag",
        "tBodyGyroJerkMag",
    ]
    freq_signal_names = [
        "fBodyAccXYZAmp",
        "fBodyAccJerkXYZAmp",
        "fBodyGyroXYZAmp",
        "fBodyAccMagAmp",
        "fBodyAccJerkMagAmp",
        "fBodyGyroMagAmp",
        "fBodyGyroJerkMagAmp",
        "fBodyAccXYZPhs",
        "fBodyAccJerkXYZPhs",
        "fBodyGyroXYZPhs",
        "fBodyAccMagPhs",
        "fBodyAccJerkMagPhs",
        "fBodyGyroMagPhs",
        "fBodyGyroJerkMagPhs",
    ]
    all_signal_names = time_signal_names + freq_signal_names
    feature_names = []

    for name in all_signal_names:
        for s in ["Mean", "Std", "Mad", "Max", "Min", "Sma", "Energy", "Iqr", "Entropy"]:
            if s == "Sma":
                feature_names.append(f"{name}{s}")
                continue
            if "XYZ" in name:
                n = name.replace("XYZ", "")
                feature_names += [f"{n}{s}-{ax}" for ax in ["X", "Y", "Z"]]
            else:
                feature_names.append(f"{name}{s}")

    for name in time_signal_names:
        if "XYZ" in name:
            n = name.replace("XYZ", "")
            feature_names += [f"{n}ArCoeff-{ax}{i}" for ax in ["X", "Y", "Z"] for i in range(4)]
        else:
            feature_names += [f"{name}ArCoeff{i}" for i in range(4)]

    for name in [
        "tBodyAccXYZ",
        "tGravityAccXYZ",
        "tBodyAccJerkXYZ",
        "tBodyGyroXYZ",
        "tBodyGyroJerkXYZ",
    ]:
        n = name.replace("XYZ", "")
        feature_names += [f"{n}Correlation-{ax}" for ax in ["X", "Y", "Z"]]

    for name in freq_signal_names:
        for s in ["MaxInds", "MeanFreq", "Skewness", "Kurtosis"]:
            if "XYZ" in name:
                n = name.replace("XYZ", "")
                feature_names += [f"{n}{s}-{ax}" for ax in ["X", "Y", "Z"]]
            else:
                feature_names.append(f"{name}{s}")

    for name in ["tBodyAccXYZ", "tBodyAccJerkXYZ", "tBodyGyroXYZ"]:
        n = name.replace("XYZ", "")
        feature_names += [f"{n}BandsEnergy-{ax}{i}" for i in range(14) for ax in ["X", "Y", "Z"]]

    feature_names += [
        "tBodyAccWRTGravity",
        "tBodyAccJerkWRTGravity",
        "tBodyGyroWRTGravity",
        "tBodyGyroJerkWRTGravity",
        "tXAxisAccWRTGravity",
        "tYAxisAccWRTGravity",
        "tZAxisAccWRTGravity",
    ]

    feature_names += [
        f"tBody{sensor}ECDF-{axis}{i}"
        for sensor in ["Acc", "Gyro"]
        for axis in ["X", "Y", "Z"]
        for i in range(10)
    ]
    return feature_names


In [7]:
# only used to create train and test!!
from datetime import datetime
import glob
from logging import basicConfig, getLogger, StreamHandler, DEBUG
import os
import sys

import numpy as np
import pandas as pd
from tqdm import tqdm


CUR_DIR = os.getcwd()
DATA_DIR = CUR_DIR
ATC_IDS = [1, 2, 3, 4, 5]  # Target activity ids
TRAIN_SUBJECTS = [1, 2, 3, 4, 5, 7]
TEST_SUBJECTS = [8, 6]

# Logging settings
EXEC_TIME = "generate-features-" + datetime.now().strftime("%Y%m%d-%H%M%S")
LOG_DIR = os.path.join(CUR_DIR, f"logs/{EXEC_TIME}")
os.makedirs(LOG_DIR, exist_ok=True)  # Create log directory

formatter = "%(levelname)s: %(asctime)s: %(filename)s: %(funcName)s: %(message)s"
basicConfig(filename=f"{LOG_DIR}/{EXEC_TIME}.log", level=DEBUG, format=formatter)
# Handle logging to both logging and stdout.
getLogger().addHandler(StreamHandler(sys.stdout))

logger = getLogger(__name__)
logger.setLevel(DEBUG)
logger.debug(f"{LOG_DIR}/{EXEC_TIME}.log")

def main() -> None:
    """Create features from raw acceleration and gyroscope sensor data, and save features/labels to pickle/npy files"""
    logger.debug("Start creating features...")

    # Get file names of raw dataset
    acc_files = sorted(glob.glob(os.path.join(DATA_DIR, "raw data/acc*.txt")))
    gyro_files = sorted(glob.glob(os.path.join(DATA_DIR, "raw data/gyro*.txt")))
    label_info = pd.read_table(
        os.path.join(DATA_DIR, "raw data/labels.txt"),
        sep=" ",
        header=None,
        names=["ExpID", "UserID", "ActID", "ActStart", "ActEnd"],
    )

    X_train, X_test = [], []
    subject_train, subject_test = np.array([]), np.array([])
    y_train, y_test = np.array([]), np.array([])

    # Create features from each row data file
    for i in tqdm(range(len(acc_files))):
        acc_file, gyro_file = acc_files[i], gyro_files[i]
        exp_id = int(acc_file.split("exp")[1][:2])
        user_id = int(acc_file.split("user")[1][:2])

        temp_label_info = label_info[
            (label_info.ExpID == exp_id)
            & (label_info.UserID == user_id)
            & (label_info.ActID.isin(ATC_IDS))
            ]
        acc_raw = pd.read_table(acc_file, sep=" ", header=None, names=["x", "y", "z"])
        gyro_raw = pd.read_table(gyro_file, sep=" ", header=None, names=["x", "y", "z"])

        for _, us_id, act_id, act_start, act_end in temp_label_info.values:
            temp_acc_raw = acc_raw.iloc[act_start : act_end + 1]
            temp_gyro_raw = gyro_raw.iloc[act_start : act_end + 1]
            features = create_features(temp_acc_raw, temp_gyro_raw)  # Create features
            labels = [act_id] * len(features)
            subjects = [us_id] * len(features)

            if user_id in TRAIN_SUBJECTS:
                X_train.append(features)
                y_train = np.hstack((y_train, labels))
                subject_train = np.hstack((subject_train, subjects))
            else:
                X_test.append(features)
                y_test = np.hstack((y_test, labels))
                subject_test = np.hstack((subject_test, subjects))

    columns = get_feature_names()
    X_train = pd.DataFrame(np.vstack(X_train), columns=columns)
    X_test = pd.DataFrame(np.vstack(X_test), columns=columns)

    logger.debug(f"{X_train.shape=}, {X_test.shape=}")
    logger.debug(f"{y_train.shape=}, {y_test.shape=}")
    logger.debug(f"{subject_train.shape=}, {subject_test.shape=}")
    # Save features/labels to pickle/npy files
    X_train.to_pickle(os.path.join(DATA_DIR, "my_dataset/X_train.pickle"))
    X_test.to_pickle(os.path.join(DATA_DIR, "my_dataset/X_test.pickle"))
    np.save(os.path.join(DATA_DIR, "my_dataset/y_train.npy"), y_train)
    np.save(os.path.join(DATA_DIR, "my_dataset/y_test.npy"), y_test)

    DATA_DIR2 = os.getcwd()
    # Replicate the data set of Test and Train in text style
    file_name = "Test/subject_id_test.txt"
    my_file = open(file_name, 'w')
    for row in subject_test:
        my_file.write(f"{int(row)}\n")
    my_file.close()

    file_name = "Train/y_train.txt"
    my_file = open(file_name, 'w')
    for row in y_train:
        my_file.write(f"{int(row)}\n")
    my_file.close()

    file_name = "Test/y_test.txt"
    my_file = open(file_name, 'w')
    for row in y_test:
        my_file.write(f"{int(row)}\n")
    my_file.close()

    with open("Test/X_test.txt", 'a') as f:
        dfAsString = X_test.to_string(header=False, index=False)
        f.write(dfAsString)

    with open("Train/X_train.txt", 'a') as f:
        dfAsString = X_train.to_string(header=False, index=False)
        f.write(dfAsString)

main()

/Users/andreas/Documents/git/dsProject/s8/logs/generate-features-20211212-210940/generate-features-20211212-210940.log
/Users/andreas/Documents/git/dsProject/s8/logs/generate-features-20211212-210940/generate-features-20211212-210940.log
/Users/andreas/Documents/git/dsProject/s8/logs/generate-features-20211212-210940/generate-features-20211212-210940.log
/Users/andreas/Documents/git/dsProject/s8/logs/generate-features-20211212-210940/generate-features-20211212-210940.log
Start creating features...
Start creating features...
Start creating features...
Start creating features...


100%|██████████| 12/12 [15:40<00:00, 78.34s/it] 

X_train.shape=(2459, 784), X_test.shape=(830, 784)
X_train.shape=(2459, 784), X_test.shape=(830, 784)
X_train.shape=(2459, 784), X_test.shape=(830, 784)
X_train.shape=(2459, 784), X_test.shape=(830, 784)
y_train.shape=(2459,), y_test.shape=(830,)
y_train.shape=(2459,), y_test.shape=(830,)
y_train.shape=(2459,), y_test.shape=(830,)
y_train.shape=(2459,), y_test.shape=(830,)
subject_train.shape=(2459,), subject_test.shape=(830,)
subject_train.shape=(2459,), subject_test.shape=(830,)
subject_train.shape=(2459,), subject_test.shape=(830,)
subject_train.shape=(2459,), subject_test.shape=(830,)



