In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
# You may change the mhealth_activity module but your algorithm must support the original version
from mhealth_activity import Recording, Trace, Activity, WatchLocation, Path
from tqdm import tqdm
from multiprocessing import Pool
import pickle
# For interactive plots, uncomment the following line
# %matplotlib widget

In [7]:
create_data_pickle = False
if create_data_pickle:
    files = os.listdir('data/train')
    list_of_dicts = []
    # types_to_include = ['ax', 'ay', 'az','gx', 'gy', 'gz', 'phone_ax', 'phone_ay', 'phone_az', 'mx', 'my', 'mz', 'phone_mx', 'phone_my', 'phone_mz', 'speed', 'longitude', 'latitude', 'altitude', 'phone_steps', 'temperature']
    types_to_include = ['ax', 'ay', 'az','gx', 'gy', 'gz','mx', 'my', 'mz',  'speed', 'altitude', 'phone_steps', 'temperature']

    for file in tqdm(files):
        Dict = {}
        d = Recording(os.path.join('data/train',file))

        Dict['labels'] = d.labels
        for data_type in types_to_include:
            if data_type in d.data.keys():
                Dict[data_type] = d.data[data_type]
        list_of_dicts.append(Dict)

    data = pd.DataFrame(list_of_dicts)
    data.to_pickle(path='data/pickled_and_sorted_training_data.pkl.zst', compression={'method': 'zstd'})
else:
    data = pd.read_pickle('data/pickled_and_sorted_training_data.pkl.zst')
    data = data.drop(columns=['labels', 'ax', 'ay', 'az', 'mx', 'my', 'mz', 'speed', 'altitude',
       'phone_steps', 'temperature'])

In [8]:
data.columns

Index(['gx', 'gy', 'gz'], dtype='object')

In [10]:
def mag_norm(input):
    return (np.sqrt(input[1]['mx'].values**2 + input[1]['my'].values**2 + input[1]['mz'].values**2))

def calc_magneto_mag(input):

    mag = []
    with Pool(4) as p:
        mag = p.map(mag_norm, data.iterrows())        
    # print(mag)
    return mag

def accel_norm(input):
    return (np.sqrt(input[1]['ax'].values**2 + input[1]['ay'].values**2 + input[1]['az'].values**2))

def calc_accel_mag(input):

    mag = []
    with Pool(2) as p:
        mag = p.map(accel_norm, data.iterrows())        
    # print(mag)
    return mag

def gyro_norm(input):
    return (np.sqrt(input[1]['gx'].values**2 + input[1]['gy'].values**2 + input[1]['gz'].values**2))

def calc_gyro_mag(input):

    mag = []
    with Pool(1) as p:
        mag = p.map(gyro_norm, data.iterrows())        
    # print(mag)
    return mag




calc_watch_vals = True
if calc_watch_vals:

    # accel_mag   = calc_accel_mag(data)

    # with open('data/accel_mag_train.pkl', 'wb') as f:
    #     pickle.dump(accel_mag, f)
    #     f.close()

    # magneto_mag = calc_magneto_mag(data)
    # print("calced")

    # with open('data/magneto_mag_train.pkl', 'wb') as f:
    #     pickle.dump(magneto_mag, f)
    #     f.close()
        
    gyro_mag    = calc_gyro_mag(data)
    with open('data/gyro_mag_train.pkl', 'wb') as f:
        pickle.dump(gyro_mag, f)
        f.close()




    

    

In [None]:
#handle phone data too, doesnt really work now
def phone_mag_norm(input):
    return (np.sqrt(input[1]['phone_mx'].values**2 + input[1]['phone_my'].values**2 + input[1]['phone_mz'].values**2))

def calc_phone_magneto_mag(input):

    mag = []
    with Pool(8) as p:
        mag = p.map(phone_mag_norm, data.iterrows())        
    # print(mag)
    return mag

def phone_accel_norm(input):
    if(len(input[1]['phone_ax'].values) != len(input[1]['phone_ay'].values**2) or len (input[1]['phone_ay'].values**2 ) != len(input[1]['phone_az'].values**2) or len (input[1]['phone_ax'].values**2 ) != len(input[1]['phone_az'].values**2)):
        print(f"{len(input[1]['phone_ax'].values)} {len(input[1]['phone_ay'].values**2)} {len(input[1]['phone_az'].values**2)}" )

    return (np.sqrt(input[1]['phone_ax'].values**2 + input[1]['phone_ay'].values**2 + input[1]['phone_az'].values**2))

def calc_phone_accel_mag(input):

    mag = []
    with Pool(4) as p:
        mag = p.map(phone_accel_norm, data.iterrows())        
    print(mag)
    return mag

calc_phone_mags = False
if calc_phone_mags:
    phone_accel_mag = calc_phone_accel_mag(data)
    phone_magneto_mag = calc_phone_magneto_mag(data)
    with open('data/phone_accel_mag_train.pkl', 'wb') as f:
        pickle.dump(phone_accel_mag, f)
        f.close()

    with open('data/phone_magneto_mag_train.pkl', 'wb') as f:
        pickle.dump(phone_magneto_mag, f)
        f.close()

In [5]:
#rinse and repeat for test data
create_data_pickle = False
if create_data_pickle:
    files = os.listdir('data/test')
    list_of_dicts = []
    types_to_include = ['ax', 'ay', 'az', 'phone_ax', 'phone_ay', 'phone_az', 'speed', 'longitude', 'latitude', 'altitude', 'phone_steps']

    for file in tqdm(files):
        Dict = {}
        d = Recording(os.path.join('data/test',file))

        Dict['labels'] = d.labels
        for data_type in types_to_include:
            if data_type in d.data.keys():
                Dict[data_type] = d.data[data_type]
        list_of_dicts.append(Dict)

    data_test = pd.DataFrame(list_of_dicts)
    data_test.to_pickle(path='data/pickled_and_sorted_test_data.pkl.zst', compression={'method': 'zstd'})
else:
    data_test = pd.read_pickle('data/pickled_and_sorted_test_data.pkl.zst')

In [None]:
calc_a_mag = True
if calc_a_mag:

    accel_mag = calc_accel_mag(data_test)
    with open('data/accel_mag_test.pkl', 'wb') as f:
        pickle.dump(accel_mag, f)
        f.close()

else:
    file = open('data/accel_mag_test.pkl', 'rb')
    accel_mag =  pickle.load(file)
    file.close()
    # print("aaa")
    # print(accel_mag)

In [6]:
d = Recording('data/train/train_trace_000.pkl')
print(d.data.keys())

dict_keys(['phone_mx', 'gx', 'altitude', 'phone_rotx', 'phone_steps', 'longitude', 'phone_gravy', 'lostPackets', 'phone_gz', 'latitude', 'phone_my', 'timestamp', 'gz', 'ax', 'my', 'phone_pressure', 'phone_gravx', 'phone_gy', 'phone_orientationx', 'temperature', 'phone_az', 'mz', 'az', 'gy', 'phone_mz', 'phone_rotm', 'phone_ax', 'packetNumber', 'phone_orientationz', 'phone_gravz', 'speed', 'bearing', 'phone_gx', 'phone_lax', 'phone_laz', 'phone_rotz', 'ay', 'phone_roty', 'phone_lay', 'mx', 'phone_ay', 'phone_orientationy'])


In [None]:
 list_of_dicts = []
    # types_to_include = ['ax', 'ay', 'az','gx', 'gy', 'gz', 'phone_ax', 'phone_ay', 'phone_az', 'mx', 'my', 'mz', 'phone_mx', 'phone_my', 'phone_mz', 'speed', 'longitude', 'latitude', 'altitude', 'phone_steps', 'temperature']
    types_to_include = ['ax', 'gx', 'gy', 'gz','mx', 'my', 'mz',  'speed', 'altitude', 'phone_steps', 'temperature']

    for file in tqdm(files):
        Dict = {}
        d = Recording(os.path.join('data/train',file))

        Dict["accel_time"] = d.data['ax'].timestamps
        Dict["gyro_time"] = d.data['gx'].timestamps
        Dict["magneto_time"] = d.data['mx'].timestamps

        list_of_dicts.append(Dict)
        
    data = pd.DataFrame(list_of_dicts)