In [1]:
import pandas as pd
import numpy as np
import glob

In [2]:
WINDOW_SIZE = 10
TARGET_THRESHOLD = 0.5

ENCODER = {
  "rest": 0,
  "circ": 1,
  "rev_circ": 2,
  "stab": 3
}

Class = {
  "c": 1, #circular
  "r": 2, #reverse cricular
  "s": 3 #stabbing
}

In [3]:
def create_names():
  names = []
  masks = ['ax','ay','az','gx','gy','gz']
  for j in range(len(masks)):
    for i in range(WINDOW_SIZE):
      names.append(masks[j]+str(i))
  
  return names

def motion_captured(targets):
  count = 0
  for target in targets:
    if target == "rest":
      count+=1
  return (1-count/len(targets)) > TARGET_THRESHOLD


In [4]:
def data_from_csv(dataframe, class_type = 1):
  colnames = create_names()
  X = pd.DataFrame(columns=colnames)
  y = pd.DataFrame(columns=['target'])
  for i in range(len(dataframe)-WINDOW_SIZE):
    ax_tab = dataframe['ax'].iloc[i:WINDOW_SIZE+i].T
    ay_tab = dataframe['ay'].iloc[i:WINDOW_SIZE+i].T
    az_tab = dataframe['az'].iloc[i:WINDOW_SIZE+i].T
    gx_tab = dataframe['gx'].iloc[i:WINDOW_SIZE+i].T
    gy_tab = dataframe['gy'].iloc[i:WINDOW_SIZE+i].T
    gz_tab = dataframe['gz'].iloc[i:WINDOW_SIZE+i].T
    tabularized = np.concatenate([ax_tab, ay_tab, az_tab, gx_tab, gy_tab, gz_tab])
    targets = dataframe['target'].iloc[i:WINDOW_SIZE+i].T
    if motion_captured(targets):
      y.loc[len(y)] = class_type
    else: 
      y.loc[len(y)] = 0
    X.loc[len(X)] = tabularized 
  return X, y

In [5]:
df = pd.read_csv('rev_circle5(5 s).csv')
#df['time']
df

Unnamed: 0,time,ax,ay,az,gx,gy,gz,target
0,5,1664,988,15212,858,-1647,-69,rest
1,17,1656,980,14592,652,-1818,-219,rest
2,30,1756,1032,14496,477,-1428,-298,rest
3,72,1640,1172,14288,485,-943,-100,rest
4,114,1980,1180,15272,-93,-621,-445,rest
...,...,...,...,...,...,...,...,...
175,7533,3084,1924,14528,-20,-221,196,rest
176,7576,2872,2192,15084,252,-143,321,rest
177,7619,3484,1820,14664,-66,-200,-251,rest
178,7662,2552,2368,15400,-44,-151,312,rest


In [6]:
names = glob.glob('*.csv')
names

['circle7(2 s).csv',
 'circle8(4 s).csv',
 'rev_circle5(5 s).csv',
 'stabbing5(5 s).csv',
 'circle4(3 s).csv',
 'rev_circle8(4 s).csv',
 'stabbing1(4 s).csv',
 'circle5(5 s).csv',
 'rev_circle1(4 s).csv',
 'circle2(3 s).csv',
 'rev_circle4(3 s).csv',
 'stabbing8(4 s).csv',
 'stabbing7(2 s).csv',
 'circle1(4 s).csv',
 'circle3(2 s).csv',
 'stabbing2(3 s).csv',
 'rev_circle7(2 s).csv',
 'rev_circle3(2 s).csv',
 'stabbing3(2 s).csv',
 'rev_circle6(3 s).csv',
 'circle6(3 s).csv',
 'stabbing6(3 s).csv',
 'rev_circle2(3 s).csv',
 'stabbing4(3 s).csv']

In [14]:
xes =[]
ys = []
for name in names:
  df = pd.read_csv(name)
  X, y = data_from_csv(df, Class[name[0]])
  xes.append(X)
  ys.append(y)

X = pd.concat(xes,ignore_index=True)
y = pd.concat(ys,ignore_index=True)

In [8]:
from sklearn import metrics
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.base import ClassifierMixin
from sklearn import metrics

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [16]:
def trainer_helper(
        clf: ClassifierMixin,
        X_train: np.ndarray,
        X_test: np.ndarray,
        y_train: np.ndarray,
        y_test: np.ndarray):
    '''
    Function to get metrics of a trained scikit-learn model.
    
    Args:
        clf: Scikit-learn classifier instance.
        X_train: Training dataset.
        X_test: Testing dataset.
        y_train: Training labels.
        y_test: Testing labels.
    Returns:
        Accuracy, F1 score, precisio, recall, scikit-learn model instance.
    '''
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    
    accuracy = metrics.accuracy_score(y_test, y_pred)
    f1 = metrics.f1_score(y_test, y_pred, average='macro')
    precision = metrics.precision_score(y_test, y_pred, average='macro')
    recall = metrics.recall_score(y_test, y_pred, average='macro')

    return accuracy, f1, precision, recall, clf

In [17]:
def run_training(
        X_train: np.ndarray,
        X_test: np.ndarray,
        y_train: np.ndarray,
        y_test: np.ndarray):
    '''
    Runs training with 4 different scikit-learn classifiers.
    
    Args:
        X_train: Training dataset.
        X_test: Testing dataset.
        y_train: Training labels.
        y_test: Testing labels.
    Returns:
        Metrics collected for the training run.
    '''
    collect_metrics = {}

    dt = DecisionTreeClassifier(random_state=42)
    rf = RandomForestClassifier(random_state=42, n_estimators=4)
    lsvc = LinearSVC(random_state=42, max_iter=3_000)
    lr = LogisticRegression(random_state=42, max_iter=3_000)

    model_setup = [
                    (dt, 'decision_tree'),
                    (rf,'random_forest'),
                    (lsvc, 'svc'),
                    (lr, 'logistic_regression')
                ]

    for item in model_setup:
        clf = item[0]
        name = item[1]

        collect_metrics[name] = {}

        print('\n')
        print(f'{name}')

        accuracy, f1, precision, recall, model = trainer_helper(clf, X_train, X_test, y_train, y_test)

        collect_metrics[name] = {
                'accuracy':accuracy,
                'f1':f1,
                'precision':precision,
                'recall':recall}

    return collect_metrics

In [18]:
run_training(X_train, X_test, y_train, y_test)



decision_tree


random_forest


svc


  clf.fit(X_train, y_train)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)




logistic_regression


{'decision_tree': {'accuracy': 0.9267515923566879,
  'f1': 0.8677815503497913,
  'precision': 0.8844373835068793,
  'recall': 0.8534909613030788},
 'random_forest': {'accuracy': 0.9490445859872612,
  'f1': 0.9026371583224626,
  'precision': 0.9340136954580038,
  'recall': 0.8792264734959043},
 'svc': {'accuracy': 0.6751592356687898,
  'f1': 0.26755248297801487,
  'precision': 0.6682847896440129,
  'recall': 0.28657894736842104},
 'logistic_regression': {'accuracy': 0.6878980891719745,
  'f1': 0.31284121664305997,
  'precision': 0.509009009009009,
  'recall': 0.3140208078335373}}

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
clf = RandomForestClassifier(random_state=42, n_estimators=6)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
    
accuracy = metrics.accuracy_score(y_test, y_pred)
f1 = metrics.f1_score(y_test, y_pred, average='macro')
precision = metrics.precision_score(y_test, y_pred, average='macro')
recall = metrics.recall_score(y_test, y_pred, average='macro')
print(accuracy, f1, precision, recall)

0.9363057324840764 0.8662834228913524 0.9243496672716274 0.83715746996997


  clf.fit(X_train, y_train)


In [None]:
!pip install m2cgen --quiet

In [None]:
import m2cgen as m2c 

In [None]:
model_to_c = m2c.export_to_c(clf)

In [None]:
model_to_c

In [20]:
!pip install micromlgen

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [22]:
from micromlgen import port

In [23]:
c_code = port(clf)
print(c_code)

#pragma once
#include <cstdarg>
namespace Eloquent {
    namespace ML {
        namespace Port {
            class RandomForest {
                public:
                    /**
                    * Predict class for features vector
                    */
                    int predict(float *x) {
                        uint8_t votes[4] = { 0 };
                        // tree #1
                        if (x[57] <= -1257.0) {
                            if (x[28] <= 13604.0) {
                                if (x[59] <= -6305.0) {
                                    if (x[16] <= -12196.0) {
                                        votes[3] += 1;
                                    }

                                    else {
                                        votes[1] += 1;
                                    }
                                }

                                else {
                                    if (x[16] <= -4882.0) {
                                 