In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
# You may change the mhealth_activity module but your algorithm must support the original version
from mhealth_activity import Recording, Trace, Activity, WatchLocation, Path
from tqdm import tqdm
from multiprocessing import Pool
import pickle
# For interactive plots, uncomment the following line
# %matplotlib widget

In [3]:
create_data_pickle = True
if create_data_pickle:
    files = os.listdir('data/train')
    list_of_dicts = []
    types_to_include = ['ax', 'ay', 'az','gx', 'gy', 'gz', 'phone_ax', 'phone_ay', 'phone_az', 'mx', 'my', 'mz', 'phone_mx', 'phone_my', 'phone_mz', 'speed', 'longitude', 'latitude', 'altitude', 'phone_steps', 'temperature']

    for file in tqdm(files):
        Dict = {}
        d = Recording(os.path.join('data/train',file))

        Dict['labels'] = d.labels
        for data_type in types_to_include:
            if data_type in d.data.keys():
                Dict[data_type] = d.data[data_type]
        list_of_dicts.append(Dict)

    data = pd.DataFrame(list_of_dicts)
    data.to_pickle(path='data/pickled_and_sorted_training_data.pkl.zst', compression={'method': 'zstd'})
else:
    data = pd.read_pickle('data/pickled_and_sorted_training_data.pkl.zst')

100%|██████████| 396/396 [01:54<00:00,  3.46it/s]


In [8]:
data['phone_ax'][0].values

array([4.0063972 , 3.82354005, 3.70652345, ..., 1.81136305, 1.98539158,
       2.06260458])

In [3]:
def mag_norm(input):
    return (np.sqrt(input[1]['mx'].values**2 + input[1]['my'].values**2 + input[1]['mz'].values**2))

def calc_magneto_mag(input):

    mag = []
    with Pool(8) as p:
        mag = p.map(mag_norm, data.iterrows())        
    # print(mag)
    return mag

def accel_norm(input):
    return (np.sqrt(input[1]['ax'].values**2 + input[1]['ay'].values**2 + input[1]['az'].values**2))

def calc_accel_mag(input):

    mag = []
    with Pool(8) as p:
        mag = p.map(accel_norm, data.iterrows())        
    print(mag)
    return mag

def gyro_norm(input):
    return (np.sqrt(input[1]['gx'].values**2 + input[1]['gy'].values**2 + input[1]['gz'].values**2))

def calc_gyro_mag(input):

    mag = []
    with Pool(8) as p:
        mag = p.map(accel_norm, data.iterrows())        
    print(mag)
    return mag




calc_watch_vals = False
if calc_watch_vals:

    accel_mag   = calc_accel_mag(data)
    magneto_mag = calc_magneto_mag(data)
    gyro_mag    = calc_gyro_mag(data)

    with open('data/accel_mag_train.pkl', 'wb') as f:
        pickle.dump(accel_mag, f)
        f.close()

    with open('data/magneto_mag_train.pkl', 'wb') as f:
        pickle.dump(magneto_mag, f)
        f.close()

    with open('data/gyro_mag_train.pkl', 'wb') as f:
        pickle.dump(gyro_mag, f)
        f.close()




    

    

[array([1.01920062, 1.02640566, 1.02624003, ..., 1.06292329, 1.08907452,
       1.13337015]), array([1.01515228, 1.04539054, 1.07295749, ..., 1.18719988, 1.16663535,
       1.16840683]), array([1.0076106 , 1.00275902, 1.00581143, ..., 1.1451823 , 1.15308679,
       1.1590346 ]), array([0.99868443, 0.99490372, 0.98949981, ..., 1.61380057, 1.6037108 ,
       1.56463281]), array([1.02577051, 1.02451698, 1.02119522, ..., 1.07269576, 1.05412275,
       1.03627699]), array([1.0148285 , 1.01271724, 1.0228904 , ..., 0.99729727, 1.00479185,
       0.98858125]), array([1.01072181, 1.00757842, 1.00751455, ..., 1.01234072, 1.00810022,
       1.00797368]), array([0.98426607, 0.98848258, 0.98800879, ..., 0.98618562, 0.98513852,
       0.98434865]), array([1.05966894, 1.05799194, 1.04621242, ..., 1.04352832, 1.04335459,
       1.04201302]), array([1.06405473, 1.12402012, 1.18505268, ..., 0.94826837, 0.96433597,
       0.97492921]), array([0.98081137, 0.98544996, 0.98543063, ..., 0.93460661, 0.9234648

In [None]:
#handle phone data too, doesnt really work now
def phone_mag_norm(input):
    return (np.sqrt(input[1]['phone_mx'].values**2 + input[1]['phone_my'].values**2 + input[1]['phone_mz'].values**2))

def calc_phone_magneto_mag(input):

    mag = []
    with Pool(8) as p:
        mag = p.map(phone_mag_norm, data.iterrows())        
    # print(mag)
    return mag

def phone_accel_norm(input):
    if(len(input[1]['phone_ax'].values) != len(input[1]['phone_ay'].values**2) or len (input[1]['phone_ay'].values**2 ) != len(input[1]['phone_az'].values**2) or len (input[1]['phone_ax'].values**2 ) != len(input[1]['phone_az'].values**2)):
        print(f"{len(input[1]['phone_ax'].values)} {len(input[1]['phone_ay'].values**2)} {len(input[1]['phone_az'].values**2)}" )

    return (np.sqrt(input[1]['phone_ax'].values**2 + input[1]['phone_ay'].values**2 + input[1]['phone_az'].values**2))

def calc_phone_accel_mag(input):

    mag = []
    with Pool(8) as p:
        mag = p.map(phone_accel_norm, data.iterrows())        
    print(mag)
    return mag

calc_phone_mags = False
if calc_phone_mags:
    phone_accel_mag = calc_phone_accel_mag(data)
    phone_magneto_mag = calc_phone_magneto_mag(data)
    with open('data/phone_accel_mag_train.pkl', 'wb') as f:
        pickle.dump(phone_accel_mag, f)
        f.close()

    with open('data/phone_magneto_mag_train.pkl', 'wb') as f:
        pickle.dump(phone_magneto_mag, f)
        f.close()

In [5]:
#rinse and repeat for test data
create_data_pickle = False
if create_data_pickle:
    files = os.listdir('data/test')
    list_of_dicts = []
    types_to_include = ['ax', 'ay', 'az', 'phone_ax', 'phone_ay', 'phone_az', 'speed', 'longitude', 'latitude', 'altitude', 'phone_steps']

    for file in tqdm(files):
        Dict = {}
        d = Recording(os.path.join('data/test',file))

        Dict['labels'] = d.labels
        for data_type in types_to_include:
            if data_type in d.data.keys():
                Dict[data_type] = d.data[data_type]
        list_of_dicts.append(Dict)

    data_test = pd.DataFrame(list_of_dicts)
    data_test.to_pickle(path='data/pickled_and_sorted_test_data.pkl.zst', compression={'method': 'zstd'})
else:
    data_test = pd.read_pickle('data/pickled_and_sorted_test_data.pkl.zst')

In [6]:
calc_a_mag = True
if calc_a_mag:

    accel_mag = calc_accel_mag(data_test)
    with open('data/accel_mag_test.pkl', 'wb') as f:
        pickle.dump(accel_mag, f)
        f.close()

else:
    file = open('data/accel_mag_test.pkl', 'rb')
    accel_mag =  pickle.load(file)
    file.close()
    # print("aaa")
    # print(accel_mag)

[array([1.00849538, 1.00338404, 1.0075361 , ..., 0.75031102, 0.76019118,
       0.78002019]), array([1.00909688, 1.01767459, 1.03675198, ..., 2.21295618, 2.38624792,
       2.45164587]), array([0.97719184, 0.97660059, 0.97390352, ..., 1.01726321, 1.01872537,
       1.00739797]), array([1.02817484, 1.01810212, 1.03084999, ..., 1.37557349, 1.34933476,
       1.33327974]), array([0.84979997, 0.76732125, 0.708202  , ..., 0.74000526, 0.76636617,
       0.78789975]), array([1.01830847, 1.01294395, 1.00513742, ..., 1.01280372, 1.01409107,
       1.01261488]), array([1.03032258, 1.02972513, 1.03264209, ..., 0.94351994, 0.94446377,
       0.93482963]), array([0.98916107, 0.9905856 , 0.99402913, ..., 0.99523883, 0.99865973,
       0.98941102]), array([0.77031397, 0.77865681, 0.79194018, ..., 1.31044397, 1.38141706,
       1.44725831]), array([0.95027178, 0.95440069, 0.95662313, ..., 0.75296519, 0.77630116,
       0.80147058]), array([0.99171908, 0.98770864, 0.98974492, ..., 2.26332125, 2.3491135

In [6]:
d = Recording('data/train/train_trace_000.pkl')
print(d.data.keys())

dict_keys(['phone_mx', 'gx', 'altitude', 'phone_rotx', 'phone_steps', 'longitude', 'phone_gravy', 'lostPackets', 'phone_gz', 'latitude', 'phone_my', 'timestamp', 'gz', 'ax', 'my', 'phone_pressure', 'phone_gravx', 'phone_gy', 'phone_orientationx', 'temperature', 'phone_az', 'mz', 'az', 'gy', 'phone_mz', 'phone_rotm', 'phone_ax', 'packetNumber', 'phone_orientationz', 'phone_gravz', 'speed', 'bearing', 'phone_gx', 'phone_lax', 'phone_laz', 'phone_rotz', 'ay', 'phone_roty', 'phone_lay', 'mx', 'phone_ay', 'phone_orientationy'])
