In [39]:
from enum import IntEnum
from pathlib import Path
from functools import partial

import pandas as pd
import numpy as np

## Load data

In [25]:
DATA_PATH = Path('train_set')
WINDOW_SIZE = 100

activity_label_map: dict[str, int] = {}
feature_df_list: list[pd.DataFrame] = []

for activity_class_path in DATA_PATH.iterdir():
    class_samples = []
    for sample_file_path in activity_class_path.glob("*.txt"):
        sample_df = pd.read_csv(sample_file_path, sep="\t")
        sample_feature_df = (
            sample_df
            .rolling(window=WINDOW_SIZE, step=WINDOW_SIZE)
            .mean()
            .dropna(axis="index", how="all")
        )
        feature_df_list.append(sample_feature_df)
    activity_class = activity_class_path.stem
    activity_label_map[activity_class] = sample_df["LABEL"].unique()[0]

feature_df = pd.concat(feature_df_list).convert_dtypes()
label_series = feature_df.pop("LABEL")
feature_df

Unnamed: 0,A_X [mg],A_Y [mg],A_Z [mg],G_X [dps],G_Y [dps],G_Z [dps]
100,-528.603052,339.785076,765.702783,7.626247,2.427705,-4.052376
200,-473.420352,540.424089,706.021734,20.270977,20.076961,3.788026
300,-406.772613,600.130302,677.172802,-61.501799,-9.428375,21.992674
400,-489.208149,39.461775,817.214965,0.881396,-13.968121,19.48656
500,-303.244285,371.931881,825.334763,34.184294,33.466694,-0.787482
...,...,...,...,...,...,...
5800,-661.418676,905.089112,-9.773697,22.88068,21.501385,-15.114063
5900,-843.071031,1255.936004,-25.977357,34.726594,-7.940121,-50.933828
6000,-535.951809,1131.596839,-44.396183,26.10528,2.041256,-16.753543
6100,-561.364483,996.505024,58.851915,25.752353,11.070853,0.770632


In [26]:
label_series

100     4
200     4
300     4
400     4
500     4
       ..
5800    3
5900    3
6000    3
6100    3
6200    3
Name: LABEL, Length: 2512, dtype: Int64

In [7]:
activity_label_map

{'cycling': 4, 'walking': 2, 'stationary': 1, 'running': 3}

## Converting model from `emlearn` back to Python

### Simplified tree model generated by `emlearn`:

```c
#pragma once

#include <stdint.h>

static inline int32_t
activity_classifier_predict_tree_0(const float *features) {
  if (features[2] < 337.0892333984375) {
    if (features[1] < 752.7792358398438) {
      if (features[1] < -619.2154235839844) {
        return 1;
      } else {
        if (features[1] < 558.8316955566406) {
          if (features[0] < -903.6760559082031) {
            return 3;
          } else {
            return 3;
          }
        } else {
          return 3;
        }
      }
    } else {
      if (features[0] < -198.67298889160156) {
        if (features[1] < 964.0451965332031) {
          if (features[0] < -688.8497314453125) {
            return 1;
          } else {
            return 1;
          }
        } else {
          return 1;
        }
      } else {
        if (features[0] < 68.24848175048828) {
          return 2;
        } else {
          if (features[3] < -0.5337255299091339) {
            return 2;
          } else {
            return 2;
          }
        }
      }
    }
  } else {
    if (features[0] < -67.42924499511719) {
      if (features[2] < 437.91233825683594) {
        if (features[0] < -886.8507690429688) {
          return 3;
        } else {
          if (features[5] < -0.637690544128418) {
            return 2;
          } else {
            return 2;
          }
        }
      } else {
        if (features[5] < -23.546794891357422) {
          return 3;
        } else {
          if (features[2] < 697.09912109375) {
            return 0;
          } else {
            return 0;
          }
        }
      }
    } else {
      if (features[3] < -0.9509706199169159) {
        return 2;
      } else {
        if (features[3] < 1.3026073575019836) {
          return 2;
        } else {
          return 2;
        }
      }
    }
  }
}

uint8_t activity_classifier_predict(const float *features) {
  return activity_classifier_predict_tree_0(features);
}

```

### Translation to Python

In [35]:
# # Original
# class ActivityClass(IntEnum):
#     CYCLING = 0
#     RUNNING = 1
#     STATIONARY = 2
#     WALKING = 3

class ActivityClass(IntEnum):
    CYCLING = 4
    RUNNING = 3
    STATIONARY = 1
    WALKING = 2


Features = tuple[float, float, float, float, float, float]
Parameter = float | int

TRAINED_PARAMETERS = [
    337.0892333984375,
    752.7792358398438,
    -619.2154235839844,
    558.8316955566406,
    -903.6760559082031,
    -198.67298889160156,
    964.0451965332031,
    -688.8497314453125,
    68.24848175048828,
    -0.5337255299091339,
    -67.42924499511719,
    437.91233825683594,
    -886.8507690429688,
    -0.637690544128418,
    -23.546794891357422,
    697.09912109375,
    -0.9509706199169159,
    1.3026073575019836,
]


def classify(parameters: list[Parameter], features: Features) -> ActivityClass:
    if features[2] < parameters[0]:
        if features[1] < parameters[1]:
            if features[1] < parameters[2]:
                return ActivityClass.RUNNING
            else:
                if features[1] < parameters[3]:
                    if features[0] < parameters[4]:
                        return ActivityClass.WALKING
                    else:
                        return ActivityClass.WALKING
                else:
                    return ActivityClass.WALKING
        else:
            if features[0] < parameters[5]:
                if features[1] < parameters[6]:
                    if features[0] < parameters[7]:
                        return ActivityClass.RUNNING
                    else:
                        return ActivityClass.RUNNING
                else:
                    return ActivityClass.RUNNING
            else:
                if features[0] < parameters[8]:
                    return ActivityClass.STATIONARY
                else:
                    if features[3] < parameters[9]:
                        return ActivityClass.STATIONARY
                    else:
                        return ActivityClass.STATIONARY
    else:
        if features[0] < parameters[10]:
            if features[2] < parameters[11]:
                if features[0] < parameters[12]:
                    return ActivityClass.WALKING
                else:
                    if features[5] < parameters[13]:
                        return ActivityClass.STATIONARY
                    else:
                        return ActivityClass.STATIONARY
                        # I'm here
            else:
                if features[5] < parameters[14]:
                    return ActivityClass.WALKING
                else:
                    if features[2] < parameters[15]:
                        return ActivityClass.CYCLING
                    else:
                        return ActivityClass.CYCLING
        else:
            if features[3] < parameters[16]:
                return ActivityClass.STATIONARY
            else:
                if features[3] < parameters[17]:
                    return ActivityClass.STATIONARY
                else:
                    return ActivityClass.STATIONARY

In [40]:
trained_parameters_np = np.array(TRAINED_PARAMETERS)
trained_parameters_np

array([ 3.37089233e+02,  7.52779236e+02, -6.19215424e+02,  5.58831696e+02,
       -9.03676056e+02, -1.98672989e+02,  9.64045197e+02, -6.88849731e+02,
        6.82484818e+01, -5.33725530e-01, -6.74292450e+01,  4.37912338e+02,
       -8.86850769e+02, -6.37690544e-01, -2.35467949e+01,  6.97099121e+02,
       -9.50970620e-01,  1.30260736e+00])

In [36]:
def classify_series(parameters: list[Parameter], series: pd.Series) -> ActivityClass:
    return classify(parameters, series.to_list())

classify_series(TRAINED_PARAMETERS, feature_df.iloc[0])

<ActivityClass.CYCLING: 4>

### Sanity check

In [37]:
predicted_label_series = feature_df.apply(
    partial(classify_series, TRAINED_PARAMETERS),
    axis="columns",
)
predicted_label_series

100     4
200     4
300     4
400     4
500     4
       ..
5800    3
5900    3
6000    3
6100    3
6200    3
Length: 2512, dtype: int64

In [38]:
(predicted_label_series == label_series).mean()

0.9096337579617835

## Experimenting with truncation

### Input

In [None]:
decimals_history, accuracy_history = []
for decimals in range(1, 10+1):
    truncated_feature_df = feature_df.round(decimals)
    predicted_label_series = feature_df.apply(
        partial(classify_series, TRAINED_PARAMETERS),
        axis="columns",
    )

predicted_label_series

### Output