In [8]:
# This is the template for the submission. You can develop your algorithm in a regular Python script and copy the code here for submission.

# TEAM NAME ON KAGGLE
# "EXAMPLE_GROUP"

# GROUP NUMBER
# "group_32"

# TEAM MEMBERS (E-MAIL, LEGI, KAGGLE USERNAME):
# dbekatli@student.ethz.ch, 19-946-037, dbekatli
# obasinska@student.ethz.ch 19-934-199, Oliwia
# chbucher@student.ethz.ch 19-924-240, Christoph Bucher

In [17]:
from os import listdir
from os.path import isfile, join
import re
import joblib
import numpy as np
import scipy.stats as stats
from scipy import signal
from scipy.signal import find_peaks
from scipy.signal import peak_prominences
from scipy.fft import fft, fftfreq
from tqdm import tqdm
import pandas as pd
from scipy.spatial.transform import Rotation

# You may change the mhealth_activity module but your algorithm must support the original version
from mhealth_activity import Recording

# For interactive graphs
# %matplotlib widget

In [2]:
# Get the path for all test traces
dir_traces = '/kaggle/input/24-exercise2/data/test'
dir_traces = 'data/test'
filenames = [join(dir_traces, f) for f in listdir(dir_traces) if isfile(join(dir_traces, f))]
filenames.sort()

### Define feature extraction related functions for watch position classification

In [3]:
def features_extraction_watchpos(df, prefix): 
    FEATURES = ['Median', 'Numneg', 'Numpos', 'Numabovmed', 'Mean', 'STD', 'MAD', 'Var', 'Min', 'Max', 'SMA', 'Energy', 'IQR', 'Entropy', 'Npeaks', 'avgprom', 'avgpeakdist', 'Sum_f', 'Max_f', 'NPeak_f', 'Avgprom_f','avgpeakdist_f', 'Mean_f', 'Skew_f', 'Kurtosis_f']

    for idx, feature in enumerate(FEATURES):
        FEATURES[idx] = FEATURES[idx] + '_' + prefix
    
    Median=[];Numneg=[];Numpos=[];Numabovmed=[];
    Min=[];Max=[];Mean=[];Mad=[];Sma=[];Eng=[];Iqr=[];Entr=[];Std=[];Var=[];Kurt=[];Skew=[];Npeaks=[];Avgprom=[]
    Min_d=[];Max_d=[];Mean_d=[];Mad_d=[];Sma_d=[];Eng_d=[];Iqr_d=[];Entr_d=[];Std_d=[];Var_d=[];
    Max_f=[];NPeak_f=[];Avgprom_f=[];Mean_f=[];Skew_f=[];Kurtosis_f=[];Sum_f=[]; Avgpeakdist=[]; Avgpeakdist_f=[];
    
    ## TIME DOMAIN ##
    Median.append(np.median(df))
    Numneg.append(np.sum(np.array(df) < 0, axis=0))
    Numpos.append(np.sum(np.array(df) > 0, axis=0))
    Numabovmed.append(np.sum(np.array(df) > np.median(df), axis=0))

    Mean.append(np.mean(df))
    Std.append(np.std(df))
    #median absolute deviation
    Mad.append(stats.median_abs_deviation(df, scale=1))
    Var.append(np.var(df))
    Min.append(np.min(df))
    Max.append(np.max(df))
    #Signal Magnitude Area
    Sma.append(np.sum(df))
    #energy measure
    Eng.append(np.sum(df**2)/len(df))
    Iqr.append(stats.iqr(df))
    Entr.append(stats.entropy(df))

    npeaks, _ = find_peaks(df, distance=5)
    Npeaks.append(len(npeaks))
    prom = peak_prominences(df, npeaks)
    Avgprom.append(np.mean(prom))

    Apeakdist = 0
    for i in range(len(npeaks)-1):
        Apeakdist += abs(npeaks[i] - npeaks[i+1])
    Avgpeakdist.append(Apeakdist/(len(npeaks)-1))

    ## FREQ DOMAIN ##
    ft = np.abs(fft(df))
    Sum_f.append(np.sum(ft))
    Max_f.append(np.max(ft))

    npeaks, _ = find_peaks(ft, distance=5)
    NPeak_f.append(len(npeaks))
    prom = peak_prominences(ft, npeaks)
    Avgprom_f.append(np.mean(prom))

    Apeakdist = 0
    for i in range(len(npeaks)-1):
        Apeakdist += abs(npeaks[i] - npeaks[i+1])
    Avgpeakdist_f.append(Apeakdist/(len(npeaks)-1))

    Mean_f.append(np.mean(ft))
    Skew_f.append(stats.skew(ft))
    Kurtosis_f.append(stats.kurtosis(ft))

    #Create dataframe from features
    df_features = pd.DataFrame(index = [FEATURES], 
                               data = [Median, Numneg, Numpos, Numabovmed, Mean,Std, Mad, Var, Min, Max, Sma, Eng, Iqr, Entr, Npeaks, Avgprom, Avgpeakdist, Sum_f, Max_f, NPeak_f, Avgprom_f, Avgpeakdist_f, Mean_f, Skew_f, Kurtosis_f]) 
    df_features = pd.DataFrame.transpose(df_features)
    df_features.columns = df_features.columns.map(''.join)
    # get rid of multiindex
    return df_features

def calc_norm(xarr, yarr, zarr, label):
    norm = []
    for i, x in enumerate(xarr):
        norm.append(np.sqrt(xarr[i]**2 + yarr[i]**2 + zarr[i]**2))
    return np.array(norm)
    

### Define feature extraction related functions for path classification

In [19]:
def features_extraction_altitude_pathid(df):
    FEATURES = ['min', 'max', 'amp', 'len', 'gain']
    for idx, feature in enumerate(FEATURES):
        FEATURES[idx] = FEATURES[idx] + '_alt' 
    
    Min=[]; Max=[]; amp=[];leng=[];gain=[]; segs =[]

    segfeats = []
    numsegs = 15
    for i in range(numsegs):
            segfeats.append(f"alt_seg{i}")
            segs.append([])
    
    sos = signal.cheby2(1, 20, 0.5, 'lowpass', fs=12.5, output='sos')
    alt = signal.sosfiltfilt(sos,df)

    Min.append(min(alt))
    Max.append(max(alt))
    #fit altitude data to a linear function to determine whether we are climbing or descending
    x = np.arange(0,len(alt),1)
    m,b = np.polyfit(x, alt, 1)
    amp.append(m*1000)
    leng.append(len(alt))
    gain.append(m*len(alt))
    df_features = pd.DataFrame(index = [FEATURES],  data = [Min, Max, amp, leng, gain])

    #split altitude data into segments and return average altitude for each segment
    seglen = int(len(alt)/numsegs)
    for i in range(numsegs):
        segs[i].append(np.mean(alt[i*seglen : (i+1)*seglen]))

    

    segs = pd.DataFrame(index = [segfeats],  data = segs)
    segs = pd.DataFrame.transpose(segs)
    segs.columns = segs.columns.map(''.join)   

    df_features = pd.DataFrame.transpose(df_features)
    df_features.columns = df_features.columns.map(''.join)   
    df_features = pd.concat([df_features,  segs], axis=1)
    return df_features

#Madgwick filter, used to generate quaternions from fused accelerometer, gyroscope and magnetometer data
class Madgwick:
    def __init__(self, gyr: np.ndarray = None, acc: np.ndarray = None, mag: np.ndarray = None, **kwargs):
        self.gyr: np.ndarray = gyr
        self.acc: np.ndarray = acc
        self.mag: np.ndarray = mag
        self.frequency: float = kwargs.get('frequency', 100.0)
        self.Dt: float = kwargs.get('Dt', (1.0/self.frequency) if self.frequency else 0.01)
        self.q0: np.ndarray = kwargs.get('q0')
        self._set_gain(**kwargs)
        self._assert_validity_of_inputs()
        if self.acc is not None and self.gyr is not None:
            self.Q: np.ndarray = self._compute_all()

    def _set_gain(self, **kwargs) -> None:
        """Set the gain parameter."""
        self.gain_imu: float = kwargs.get('gain_imu', 0.033)
        self.gain_marg: float = kwargs.get('gain_marg', 0.041)
        self.gain: float = kwargs.get('beta')  # Setting gain with `beta` will be removed in the future.
        if self.gain is None:
            self.gain: float = kwargs.get('gain', self.gain_imu if self.mag is None else self.gain_marg)

    def _assert_validity_of_inputs(self):
        """Asserts the validity of the inputs."""
        for item in ["frequency", "Dt", "gain", "gain_imu", "gain_marg"]:
            if isinstance(self.__getattribute__(item), bool):
                raise TypeError(f"Parameter '{item}' must be numeric.")
            if not isinstance(self.__getattribute__(item), (int, float)):
                raise TypeError(f"Parameter '{item}' is not a non-zero number.")
            if self.__getattribute__(item) <= 0.0:
                raise ValueError(f"Parameter '{item}' must be a non-zero number.")
        if self.q0 is not None:
            if not isinstance(self.q0, (list, tuple, np.ndarray)):
                raise TypeError(f"Parameter 'q0' must be an array. Got {type(self.q0)}.")
            self.q0 = np.copy(self.q0)
            if self.q0.shape != (4,):
                raise ValueError(f"Parameter 'q0' must be an array of shape (4,). It is {self.q0.shape}.")
            if not np.allclose(np.linalg.norm(self.q0), 1.0):
                raise ValueError(f"Parameter 'q0' must be a versor (norm equal to 1.0). Its norm is equal to {np.linalg.norm(self.q0)}.")


    def _compute_all(self) -> np.ndarray:

        self.gyr = np.copy(self.gyr)
        self.acc = np.copy(self.acc)
        if self.acc.shape != self.gyr.shape:
            raise ValueError("acc and gyr are not the same size")
        num_samples = len(self.acc)
        Q = np.zeros((num_samples, 4))
        # Compute with IMU architecture
        if self.mag is None:
            Q[0] = acc2q(self.acc[0]) if self.q0 is None else self.q0/np.linalg.norm(self.q0)
            for t in range(1, num_samples):
                Q[t] = self.updateIMU(Q[t-1], self.gyr[t], self.acc[t])
            return Q
        # Compute with MARG architecture
        self.mag = np.copy(self.mag)
        if self.mag.shape != self.gyr.shape:
            raise ValueError("mag and gyr are not the same size")
        Q[0] = ecompass(self.acc[0], self.mag[0], frame='NED', representation='quaternion')
        for t in range(1, num_samples):
            Q[t] = self.updateMARG(Q[t-1], self.gyr[t], self.acc[t], self.mag[t])
        return Q
        
    def updateMARG(self, q: np.ndarray, gyr: np.ndarray, acc: np.ndarray, mag: np.ndarray, dt: float = None) -> np.ndarray:
        dt = self.Dt if dt is None else dt
        if gyr is None or not np.linalg.norm(gyr) > 0:
            return q
        if mag is None or not np.linalg.norm(mag) > 0:
            return self.updateIMU(q, gyr, acc)
        qDot = 0.5 * q_prod(q, [0, *gyr])                           # (eq. 12)
        a_norm = np.linalg.norm(acc)
        if a_norm > 0:
            a = acc/a_norm
            m = mag/np.linalg.norm(mag)
            # Rotate normalized magnetometer measurements
            h = q_prod(q, q_prod([0, *m], q_conj(q)))               # (eq. 45)
            bx = np.linalg.norm([h[1], h[2]])                       # (eq. 46)
            bz = h[3]
            qw, qx, qy, qz = q/np.linalg.norm(q)
            # Objective function (eq. 31)
            f = np.array([2.0*(qx*qz - qw*qy)   - a[0],
                            2.0*(qw*qx + qy*qz)   - a[1],
                            2.0*(0.5-qx**2-qy**2) - a[2],
                            2.0*bx*(0.5 - qy**2 - qz**2) + 2.0*bz*(qx*qz - qw*qy)       - m[0],
                            2.0*bx*(qx*qy - qw*qz)       + 2.0*bz*(qw*qx + qy*qz)       - m[1],
                            2.0*bx*(qw*qy + qx*qz)       + 2.0*bz*(0.5 - qx**2 - qy**2) - m[2]])
            # Jacobian (eq. 32)
            J = np.array([[-2.0*qy,               2.0*qz,              -2.0*qw,               2.0*qx             ],
                            [ 2.0*qx,               2.0*qw,               2.0*qz,               2.0*qy             ],
                            [ 0.0,                 -4.0*qx,              -4.0*qy,               0.0                ],
                            [-2.0*bz*qy,            2.0*bz*qz,           -4.0*bx*qy-2.0*bz*qw, -4.0*bx*qz+2.0*bz*qx],
                            [-2.0*bx*qz+2.0*bz*qx,  2.0*bx*qy+2.0*bz*qw,  2.0*bx*qx+2.0*bz*qz, -2.0*bx*qw+2.0*bz*qy],
                            [ 2.0*bx*qy,            2.0*bx*qz-4.0*bz*qx,  2.0*bx*qw-4.0*bz*qy,  2.0*bx*qx          ]])
            gradient = J.T@f                                        # (eq. 34)
            gradient /= np.linalg.norm(gradient)
            qDot -= self.gain*gradient                              # (eq. 33)
        q_new = q + qDot*dt                                         # (eq. 13)
        q_new /= np.linalg.norm(q_new)
        return q_new
    
    def updateIMU(self, q: np.ndarray, gyr: np.ndarray, acc: np.ndarray, dt: float = None) -> np.ndarray:
        dt = self.Dt if dt is None else dt
        if gyr is None or not np.linalg.norm(gyr) > 0:
            return q
        qDot = 0.5 * q_prod(q, [0, *gyr])                           # (eq. 12)
        a_norm = np.linalg.norm(acc)
        if a_norm > 0:
            a = acc/a_norm
            qw, qx, qy, qz = q/np.linalg.norm(q)
            # Objective function (eq. 25)
            f = np.array([2.0*(qx*qz - qw*qy)   - a[0],
                          2.0*(qw*qx + qy*qz)   - a[1],
                          2.0*(0.5-qx**2-qy**2) - a[2]])
            if np.linalg.norm(f) > 0:
                # Jacobian (eq. 26)
                J = np.array([[-2.0*qy,  2.0*qz, -2.0*qw, 2.0*qx],
                              [ 2.0*qx,  2.0*qw,  2.0*qz, 2.0*qy],
                              [ 0.0,    -4.0*qx, -4.0*qy, 0.0   ]])
                # Objective Function Gradient
                gradient = J.T@f                                    # (eq. 34)
                gradient /= np.linalg.norm(gradient)
                qDot -= self.gain*gradient                          # (eq. 33)
        q_new = q + qDot*dt                                         # (eq. 13)
        q_new /= np.linalg.norm(q_new)
        return q_new

def ecompass(a: np.ndarray, m: np.ndarray, frame: str = 'ENU', representation: str = 'rotmat') -> np.ndarray:
    if frame.upper() not in ['ENU', 'NED']:
        raise ValueError("Wrong local tangent plane coordinate frame. Try 'ENU' or 'NED'")
    if representation.lower() not in ['rotmat', 'quaternion', 'rpy', 'axisangle']:
        raise ValueError("Wrong representation type. Try 'rotmat', 'quaternion', 'rpy', or 'axisangle'")
    a = np.copy(a)
    m = np.copy(m)
    if a.shape[-1] != 3 or m.shape[-1] != 3:
        raise ValueError("Input vectors must have exactly 3 elements.")
    m /= np.linalg.norm(m)
    Rz = a/np.linalg.norm(a)
    if frame.upper() == 'NED':
        Ry = np.cross(Rz, m)
        Rx = np.cross(Ry, Rz)
    else:
        Rx = np.cross(m, Rz)
        Ry = np.cross(Rz, Rx)
    Rx /= np.linalg.norm(Rx)
    Ry /= np.linalg.norm(Ry)
    R = np.c_[Rx, Ry, Rz].T
    if representation.lower() == 'quaternion':
        return chiaverini(R)
    if representation.lower() == 'rpy':
        phi = np.arctan2(R[1, 2], R[2, 2])    # Roll Angle
        theta = -np.arcsin(R[0, 2])           # Pitch Angle
        psi = np.arctan2(R[0, 1], R[0, 0])    # Yaw Angle
        return np.array([phi, theta, psi])
    if representation.lower() == 'axisangle':
        angle = np.arccos((R.trace()-1)/2)
        axis = np.zeros(3)
        if angle != 0:
            S = np.array([R[2, 1]-R[1, 2], R[0, 2]-R[2, 0], R[1, 0]-R[0, 1]])
            axis = S/(2*np.sin(angle))
        return (axis, angle)
    return R

def chiaverini(dcm: np.ndarray) -> np.ndarray:
    dcm = np.copy(dcm)
    if dcm.ndim not in [2, 3]:
        raise ValueError('dcm must be a 2- or 3-dimensional array.')
    if dcm.shape[-2:] != (3, 3):
        raise ValueError(f"dcm must be an array of shape 3-by-3 or N-by-3-by-3. Got {dcm.shape}")
    if dcm.ndim < 3:
        q = np.zeros(4)
        q[0] = 0.5*np.sqrt(np.clip(dcm.trace(), -1.0, 3.0) + 1.0)
        q[1] = 0.5*np.sign(dcm[2, 1]-dcm[1, 2])*np.sqrt(np.clip(dcm[0, 0]-dcm[1, 1]-dcm[2, 2], -1.0, 1.0)+1.0)
        q[2] = 0.5*np.sign(dcm[0, 2]-dcm[2, 0])*np.sqrt(np.clip(dcm[1, 1]-dcm[2, 2]-dcm[0, 0], -1.0, 1.0)+1.0)
        q[3] = 0.5*np.sign(dcm[1, 0]-dcm[0, 1])*np.sqrt(np.clip(dcm[2, 2]-dcm[0, 0]-dcm[1, 1], -1.0, 1.0)+1.0)
        if not any(q):
            q[0] = 1.0
        q /= np.linalg.norm(q)
        return q
    Q = np.zeros((dcm.shape[0], 4))
    Q[:, 0] = 0.5*np.sqrt(np.clip(dcm.trace(axis1=1, axis2=2), -1.0, 3.0) + 1.0)
    Q[:, 1] = 0.5*np.sign(dcm[:, 2, 1] - dcm[:, 1, 2])*np.sqrt(np.clip(dcm[:, 0, 0]-dcm[:, 1, 1]-dcm[:, 2, 2], -1.0, 1.0) + 1.0)
    Q[:, 2] = 0.5*np.sign(dcm[:, 0, 2] - dcm[:, 2, 0])*np.sqrt(np.clip(dcm[:, 1, 1]-dcm[:, 2, 2]-dcm[:, 0, 0], -1.0, 1.0) + 1.0)
    Q[:, 3] = 0.5*np.sign(dcm[:, 1, 0] - dcm[:, 0, 1])*np.sqrt(np.clip(dcm[:, 2, 2]-dcm[:, 0, 0]-dcm[:, 1, 1], -1.0, 1.0) + 1.0)
    Q /= np.linalg.norm(Q, axis=1)[:, None]
    return Q

def q_prod(p: np.ndarray, q: np.ndarray) -> np.ndarray:
    pq = np.zeros(4)
    pq[0] = p[0]*q[0] - p[1]*q[1] - p[2]*q[2] - p[3]*q[3]
    pq[1] = p[0]*q[1] + p[1]*q[0] + p[2]*q[3] - p[3]*q[2]
    pq[2] = p[0]*q[2] - p[1]*q[3] + p[2]*q[0] + p[3]*q[1]
    pq[3] = p[0]*q[3] + p[1]*q[2] - p[2]*q[1] + p[3]*q[0]
    return pq

def q_conj(q: np.ndarray) -> np.ndarray:
    q = np.copy(q)
    if q.ndim > 2 or q.shape[-1] != 4:
        raise ValueError(f"Quaternion must be of shape (4,) or (N, 4), but has shape {q.shape}")
    return np.array([1., -1., -1., -1.])*np.array(q)

def q2euler(q: np.ndarray) -> np.ndarray:
    if sum(np.array([1., 0., 0., 0.])-q) == 0.0:
        return np.zeros(3)
    if len(q) != 4:
        return None
    R_00 = 2.0*q[0]**2 - 1.0 + 2.0*q[1]**2
    R_10 = 2.0*(q[1]*q[2] - q[0]*q[3])
    R_20 = 2.0*(q[1]*q[3] + q[0]*q[2])
    R_21 = 2.0*(q[2]*q[3] - q[0]*q[1])
    R_22 = 2.0*q[0]**2 - 1.0 + 2.0*q[3]**2
    #rotation around x, roll
    phi = np.arctan2( R_21, R_22)
    #rotation around y, pitch
    theta = -np.arctan( R_20/np.sqrt(1.0-R_20**2))
    #rotation around z, yaw
    psi = np.arctan2( R_10, R_00)
    return np.array([phi, theta, psi])

def acc2q(a: np.ndarray, return_euler: bool = False) -> np.ndarray:
    q = np.array([1.0, 0.0, 0.0, 0.0])
    ex, ey, ez = 0.0, 0.0, 0.0
    if np.linalg.norm(a) > 0 and len(a) == 3:
        ax, ay, az = a
        # Normalize accelerometer measurements
        a_norm = np.linalg.norm(a)
        ax /= a_norm
        ay /= a_norm
        az /= a_norm
        # Euler Angles from Gravity vector
        ex = np.arctan2(ay, az)
        ey = np.arctan2(-ax, np.sqrt(ay**2 + az**2))
        ez = 0.0
        if return_euler:
            return np.array([ex, ey, ez])*RAD2DEG
        # Euler to Quaternion
        cx2 = np.cos(ex/2.0)
        sx2 = np.sin(ex/2.0)
        cy2 = np.cos(ey/2.0)
        sy2 = np.sin(ey/2.0)
        q = np.array([cx2*cy2, sx2*cy2, cx2*sy2, -sx2*sy2])
        q /= np.linalg.norm(q)
    return q

#drop extra samples since phone measurements don't always have the same lengths(length differs by 2 at most)
def equalize_lengths(ax,ay,az,gx,gy,gz,mx,my,mz):
    mini = min(len(ax), len(ay))
    mini = min(mini, len(az))
    mini = min(mini, len(gx))
    mini = min(mini, len(gy))
    mini = min(mini, len(gz))
    mini = min(mini, len(mx))
    mini = min(mini, len(my))
    mini = min(mini, len(mz))

    if len(ax) > mini:
        ax = ax[:-(len(ax)-mini)]
    if len(ay) > mini:
        ay = ay[:-(len(ay)-mini)]
    if len(az) > mini:
        az = az[:-(len(az)-mini)]


    if len(gx) > mini:
        gx = gx[:-(len(gx)-mini)]
    if len(gy) > mini:
        gy = gy[:-(len(gy)-mini)]
    if len(gz) > mini:
        gz = gz[:-(len(gz)-mini)]


    if len(mx) > mini:
        mx = mx[:-(len(mx)-mini)]
    if len(my) > mini:
        my = my[:-(len(my)-mini)]
    if len(mz) > mini:
        mz = mz[:-(len(mz)-mini)]

    return (ax, ay, az, gx, gy, gz, mx, my, mz)

def madgwick_headings(ax,ay,az,gx,gy,gz,mx,my,mz):
    ax, ay, az, gx, gy, gz, mx, my, mz = equalize_lengths(ax,ay,az,gx,gy,gz,mx,my,mz)

    acc_data  = np.concatenate([np.array(az).reshape(-1,1),np.array(ay).reshape(-1,1),np.array(ax).reshape(-1,1)], axis=1)
    gyro_data = np.concatenate([np.array(gz).reshape(-1,1),np.array(gy).reshape(-1,1),np.array(gx).reshape(-1,1)], axis=1)
    mag_data  = np.concatenate([np.array(mz).reshape(-1,1),np.array(my).reshape(-1,1),np.array(mx).reshape(-1,1)], axis=1)
    #get quaternions
    madgwick = Madgwick(gyr=gyro_data, acc=acc_data, mag=mag_data, frequency=samplerate, gain=0.038)

    current = [0, 0, 0]
    x = []
    y = [] 
    z = []

    limit = len(madgwick.Q)
    # limit=100
    for i in (range(limit)):
        #convert quaternions to rotation matrices
        euler = q2euler(madgwick.Q[i]) *180/math.pi
        rot = Rotation.from_euler('xyz', euler)
        #rotate unit vector to get cartesian headings, x=0
        vector = np.array(rot.as_matrix()).dot(np.array([0,0,1]))
        #sum heading vectors to get a kind of heading array
        current += vector
        x.append(current[0])
        y.append(current[1])
        z.append(current[2])

    x = np.array(x)
    y = np.array(y)
    z = np.array(z)

    return x, y, z

def features_extraction_marg_pathid(ax,ay,az,gx,gy,gz,mx,my,mz):
    FEATURES = ['avgdist2d', 'avgdist3d', "dist1", "dist2", "dist3", "dist4", "dist5"]
    for idx, feature in enumerate(FEATURES):
        FEATURES[idx] = FEATURES[idx] + '_phone' 

    avgdist2d=0; avgdist3d=0;
    try: 
        x, y, z = madgwick_headings(ax,ay,az,gx,gy,gz,mx,my,mz)

             
        avgdist2d=math.sqrt((x[-1]+x[0])**2 + (y[-1]+y[0])**2 )
        avgdist3d=math.sqrt((x[-1]+x[0])**2 + (y[-1]+y[0])**2 + (z[-1]+z[0])**2)

        seglen = int(len(x)/5)
        dist1 = math.sqrt((x[seglen-1]   - x[0]       )**2   +   (y[seglen-1]   - y[0]       )**2)
        dist2 = math.sqrt((x[seglen*2-1] - x[seglen]  )**2   +   (y[seglen*2-1] - y[seglen]  )**2)
        dist3 = math.sqrt((x[seglen*3-1] - x[seglen*2])**2   +   (y[seglen*3-1] - y[seglen*2])**2)
        dist4 = math.sqrt((x[seglen*4-1] - x[seglen*3])**2   +   (y[seglen*4-1] - y[seglen*3])**2)
        dist5 = math.sqrt((x[seglen*5-1] - x[seglen*4])**2   +   (y[seglen*5-1] - y[seglen*4])**2)
    except:
        #return zeros if filter fails for some reason
        DistanceFeatures = ['north', "east", 'south',  'west']
        for idx, feature in enumerate(DistanceFeatures):
            FEATURES.append(DistanceFeatures[idx])
        #7 common features + 8 per segment features * 5 segments = 47 zeros
        df_features = pd.DataFrame(index = [FEATURES], data = np.zeros(11))
        df_features = pd.DataFrame.transpose(df_features)
        df_features.columns = df_features.columns.map(''.join)
        return df_features 


    df_features = pd.DataFrame(index = [FEATURES], data = [avgdist2d,avgdist3d, dist1, dist2, dist3, dist4, dist5])
    df_features = pd.DataFrame.transpose(df_features)
    df_features.columns = df_features.columns.map(''.join)

    #divide data into 5 parts and classify headings separately
    # print(f"total length {len(x)}")
    north = 0; northeast  = 0; east = 0; southeast = 0; south = 0; southwest = 0; west = 0; northwest = 0;
    DistanceFeatures = ['north', "east", 'south',  'west']
    for i in range(len(x)):
        bearing = (math.atan2(x[i], z[i]) * 180.0 / math.pi ) + 180.0

        if bearing <= 90:
            north += 1
        elif bearing <= 180:
            east += 1
        elif bearing <= 270:
            south += 1
        else:
            west += 1

        # seg_features = pd.DataFrame(index = [DistanceFeatures], data = [north, northeast, east, southeast, south, southwest, west, northwest])
    seg_features = pd.DataFrame(index = [DistanceFeatures], data = [north, east,  south,  west])
    seg_features = pd.DataFrame.transpose(seg_features)
    seg_features.columns = seg_features.columns.map(''.join)
    df_features = pd.concat([df_features, seg_features],axis=1)

    return df_features


In [36]:
#load all the models
watchpos_rf = joblib.load("group32_model_watchpos_acc97.5.pkl")
pathid_rf   = joblib.load("group32_model_path_id_acc0.775.pkl")

In [37]:
# Loop through all filenames to process recordings
submission = []
for filename in tqdm(filenames):
    recording = Recording(filename)
    
    # Assumes filename format ends with a three-digit ID before ".pkl"
    match = re.search(r'(\d{3})\.pkl$', filename)
    if match:
        id = int(match.group(1))
    else:
        raise ValueError(f'Filename {filename} does not match expected format')

    # Placeholder for the algorithm to process the recording
    # Implement the logic to infer watch location, path index, step count,
    # and activities (standing, walking, running, cycling) here.
    # Ensure your algorithm is tolerant to missing data and does not crash
    # when optional smartphone data traces are missing.
    #calculate watch position features
    watchpos_acc_norm = calc_norm(recording.data['ax'].values, recording.data['ay'].values, recording.data['az'].values, "acc_norm")
    watchpos_acc_features = features_extraction_watchpos(watchpos_acc_norm, "acc")

    watchpos_gyro_norm = calc_norm(recording.data['gx'].values, recording.data['gy'].values, recording.data['gz'].values, "gyro_norm")
    watchpos_gyro_features = features_extraction_watchpos(watchpos_gyro_norm, "gyro")

    watchpos_features = pd.concat([watchpos_acc_features, watchpos_gyro_features], axis=1)

    #calculate path classification features
    pathid_altitude_features = features_extraction_altitude_pathid(recording.data['altitude'].values)
    
    pathid_bearing_features  = features_extraction_marg_pathid(recording.data['phone_ax'].values, recording.data['phone_ay'].values, recording.data['phone_az'].values,recording.data['phone_gx'].values, recording.data['phone_gy'].values, recording.data['phone_gz'].values,recording.data['phone_mx'].values, recording.data['phone_my'].values, recording.data['phone_mz'].values)
    
    pathid_features = pd.concat([pathid_altitude_features, pathid_bearing_features],axis=1)

    path_idx = int(np.rint(pathid_rf.predict(pathid_features))[0])  # Integer, path in {0, 1, 2, 3, 4}
    watch_loc = int(np.rint(watchpos_rf.predict(watchpos_features))[0])  # Integer, 0: left wrist, 1: belt, 2: right ankle
    standing = False  # Boolean, True if participant was standing still throughout the recording
    walking = False  # Boolean, True if participant was walking throughout the recording
    running = False  # Boolean, True if participant was running throughout the recording
    cycling = False  # Boolean, True if participant was cycling throughout the recording
    step_count = 0  # Integer, number of steps, must be provided for each recording

    predictions = {
        'Id': id, 
        'watch_loc': watch_loc, 
        'path_idx': path_idx,
        'standing': standing,
        'walking': walking,
        'running': running,
        'cycling': cycling,
        'step_count': step_count
        }

    submission.append(predictions)

100%|██████████| 280/280 [03:18<00:00,  1.41it/s]


In [38]:
# Write the predicted values into a .csv file to then upload the .csv file to Kaggle
# When cross-checking the .csv file on your computer, we recommend using a text editor and NOT excel so that the results are displayed correctly
# IMPORTANT: Do NOT change the name of the columns of the .csv file ("Id", "watch_loc", "path_idx", "standing", "walking", "running", "cycling", "step_count")
submission_df = pd.DataFrame(submission, columns=['Id', 'watch_loc', 'path_idx', 'standing', 'walking', 'running', 'cycling', 'step_count'])
# submission_df.to_csv('/kaggle/working/submission.csv', index=False)
submission_df.to_csv('group32_submission.csv', index=False)