# Dependencies

In [1]:
import copy
import os
from multiprocessing import Pool, cpu_count
from pathlib import Path
import itertools
import glob
import time

from scipy.stats import pearsonr
import numpy as np
import pandas as pd

# Feature engineering

In [2]:
dataset = pd.read_csv("dataset_gran_250.csv", index_col=0)

#### Frequency domain 

In [3]:
milliseconds_per_instance = 250

### Split for computation
To optimally use our resources the trials per individual will be analysed independently

In [4]:
from Chapter4.FrequencyAbstraction import FourierTransformation


FreqAbs = FourierTransformation()
fs = float(1000)/milliseconds_per_instance

periodic_predictor_cols = list(dataset.columns[:12])
print(periodic_predictor_cols)

average_t_per_100_rows = []
import copy

def do_freq_abstract_for_trial_participant_and_save(trial, participant):
    """
    Does freq abstrac on a single participant for a single trial. After inference results
    are saved to a individual csv for later fusing results.
    """
    
    t_start = time.time()
    
    print(f'Worker doing trial {trial} and participant {participant}')
    
    ds = copy.copy(dataset[(dataset.trial == trial) & (dataset.id == participant)])

    my_set = FreqAbs.abstract_frequency(data_table = ds, 
                                        cols = periodic_predictor_cols, 
                                        window_size = int(float(10000)/milliseconds_per_instance), 
                                        sampling_rate = fs)
    my_set.to_csv(f'freq_abstraction_csvs/trial_{trial}_participant_{participant}.csv')
    
    t_done = time.time()
    
    print(f'Took {t_done - t_start} seconds')

['attitude.roll', 'attitude.pitch', 'attitude.yaw', 'userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z', 'gravity.x', 'gravity.y', 'gravity.z', 'rotationRate.x', 'rotationRate.y', 'rotationRate.z']


In [5]:
# divide and conquer
participants = dataset.id.unique()

cormac_trials = dataset.trial.unique()[:5]
abel_trials = dataset.trial.unique()[5:10]
natasja_trials = dataset.trial.unique()[10:15]

print(cormac_trials)
print(abel_trials)
print(natasja_trials)

[ 1.  2. 11.  3.  4.]
[12.  7.  8. 15.  9.]
[16.  6. 14.  5. 13.]


# Cormac

In [6]:
# with Pool(processes=cpu_count()) as p:
#     r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(cormac_trials, participants))
#     print(r)

print('All done now')

All done now


# Abel

In [7]:
# with Pool(processes=cpu_count()) as p:
#     r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(abel_trials, participants))
#     print(r)
print('All done now')

All done now


# Natasja

In [None]:
print(participants)
print(natasja_trials)

# for trial in natasja_trials:
#     for participant in participants:
#         do_freq_abstract_for_trial_participant_and_save(trial, participant)

with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(natasja_trials, participants))
    print(r)

print('All done now')

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 21. 22. 23.]
[16.  6. 14.  5. 13.]
Worker doing trial 16.0 and participant 0.0
Worker doing trial 16.0 and participant 4.0
Worker doing trial 16.0 and participant 8.0
Worker doing trial 16.0 and participant 16.0
Worker doing trial 16.0 and participant 12.0
Worker doing trial 16.0 and participant 20.0
Worker doing trial 6.0 and participant 4.0
Worker doing trial 6.0 and participant 0.0
       attitude.roll  attitude.pitch  attitude.yaw  userAcceleration.x  \
0          -1.166779       -1.339164     -0.222591            0.035898   
250        -1.657714       -1.257245     -0.538131            0.057843   
500        -0.611435       -1.030245      0.780125            0.182438   
750        -1.215692       -1.255977     -0.162708           -0.113480   
1000       -1.657476       -1.252926     -0.506015            0.055675   
...              ...             ...           ...                 ...   
20250   

[77 rows x 20 columns]       attitude.roll  attitude.pitch  attitude.yaw  userAcceleration.x  \
0          -2.417223       -1.227271      0.539893            0.016991   
250        -2.635301       -1.189464      0.138776           -0.132556   
500        -0.726766       -1.363728      2.133601            0.154230   
750        -2.699549       -1.225036      0.120848           -0.014952   
1000       -1.574705       -1.288123      1.089850           -0.008314   
...              ...             ...           ...                 ...   
14250      -1.102156       -1.354315     -1.525416           -0.347751   
14500      -1.759311       -1.348202     -2.059614           -0.071071   
14750      -2.761392       -1.217825     -3.015835            0.044655   
15000      -0.840430       -1.370468     -1.200203           -0.075913   
15250      -0.375657       -1.394509     -0.680297            0.030973   

       userAcceleration.y  userAcceleration.z  gravity.x  gravity.y  \
0                0

[131 rows x 20 columns]


        attitude.roll  attitude.pitch  attitude.yaw  userAcceleration.x  \
0            2.494218       -1.392226      1.922713           -0.021803   
250          2.443692       -1.391353      1.885854           -0.039051   
500          2.419357       -1.395868      1.845492            0.009227   
750          2.399355       -1.401584      1.828054           -0.009584   
1000         2.441226       -1.406765      1.876033            0.003075   
...               ...             ...           ...                 ...   
202750      -2.495687       -1.506647      2.592572           -0.024430   
203000      -2.359208       -1.495488     -3.031725            0.019158   
203250      -2.319181       -1.498641     -2.969164            0.042730   
203500      -2.479904       -1.516798     -3.043590           -0.022330   
203750      -2.577819       -1.516871     -3.077340           -0.036801   

        userAcceleration.y  userAcceleration.z  gravity.x  gravity.y  \
0

### Merge back together

In [None]:
datasets = []

for csv_file_freq_abs in glob.glob('freq_abstraction_trial_*_participant_*.csv'):
    datasets.append(pd.read_csv(csv_file_freq_abs))

dataset = pd.concat(datasets)

dataset.to_csv("dataset_engineered_features.csv")


#### Time Domain