# Dependencies

In [1]:
import copy
import os
from multiprocessing import Pool, cpu_count
from pathlib import Path
import itertools
import glob
import time

from scipy.stats import pearsonr
import numpy as np
import pandas as pd

# Feature engineering

In [2]:
dataset = pd.read_csv("dataset_gran_250.csv", index_col=0)

#### Frequency domain 

In [3]:
milliseconds_per_instance = 250

### Split for computation
To optimally use our resources the trials per individual will be analysed independently

In [None]:
from Chapter4.FrequencyAbstraction import FourierTransformation


FreqAbs = FourierTransformation()
fs = float(1000)/milliseconds_per_instance

periodic_predictor_cols = list(dataset.columns[:12])
print(periodic_predictor_cols)

average_t_per_100_rows = []
import copy

def do_freq_abstract_for_trial_participant_and_save(trial, participant):
    """
    Does freq abstrac on a single participant for a single trial. After inference results
    are saved to a individual csv for later fusing results.
    """
    
    t_start = time.time()
    
    print(f'Worker doing trial {trial} and participant {participant}')
    
    ds = copy.copy(dataset[(dataset.trial == trial) & (dataset.id == participant)])

    my_set = FreqAbs.abstract_frequency(data_table = ds, 
                                        cols = periodic_predictor_cols, 
                                        window_size = int(float(10000)/milliseconds_per_instance), 
                                        sampling_rate = fs)
    my_set.to_csv(f'freq_abstraction_csvs/trial_{trial}_participant_{participant}.csv')
    
    t_done = time.time()
    
    print(f'Trial {trial} and participant {participant} finished, took {t_done - t_start} seconds')

In [5]:
# divide and conquer
participants = dataset.id.unique()

cormac_trials = dataset.trial.unique()[:5]
abel_trials = dataset.trial.unique()[5:10]
natasja_trials = dataset.trial.unique()[10:15]

print(cormac_trials)
print(abel_trials)
print(natasja_trials)

[ 1.  2. 11.  3.  4.]
[12.  7.  8. 15.  9.]
[16.  6. 14.  5. 13.]


# Cormac

In [6]:
# with Pool(processes=cpu_count()) as p:
#     r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(cormac_trials, participants))
#     print(r)

print('All done now')

All done now


# Abel

In [7]:
# with Pool(processes=cpu_count()) as p:
#     r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(abel_trials, participants))
#     print(r)
print('All done now')

All done now


# Natasja

In [8]:
print(participants)
print(natasja_trials)

# for trial in natasja_trials:
#     for participant in participants:
#         do_freq_abstract_for_trial_participant_and_save(trial, participant)

with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(natasja_trials, participants))
    print(r)

print('All done now')

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 21. 22. 23.]
[16.  6. 14.  5. 13.]
Worker doing trial 16.0 and participant 4.0
Worker doing trial 16.0 and participant 0.0
Worker doing trial 16.0 and participant 8.0
Worker doing trial 16.0 and participant 16.0
Worker doing trial 16.0 and participant 12.0
Worker doing trial 16.0 and participant 20.0
Worker doing trial 6.0 and participant 0.0
Worker doing trial 6.0 and participant 4.0
Took 2.7636218070983887 seconds
Worker doing trial 16.0 and participant 5.0
Took 3.6538562774658203 seconds
Worker doing trial 16.0 and participant 17.0
Took 4.930432081222534 seconds
Worker doing trial 16.0 and participant 13.0
Took 5.492757320404053 seconds
Worker doing trial 16.0 and participant 9.0
Took 3.664729595184326 seconds
Worker doing trial 16.0 and participant 6.0
Took 8.426671743392944 seconds
Worker doing trial 16.0 and participant 1.0
Took 8.574840545654297 seconds
Worker doing trial 16.0 and participant 2

Took 46.579503536224365 seconds
Worker doing trial 13.0 and participant 15.0
Took 77.7903413772583 seconds
Worker doing trial 13.0 and participant 20.0
Took 71.46328020095825 seconds
Took 85.25915837287903 seconds
Took 23.1933810710907 seconds
Worker doing trial 13.0 and participant 17.0
Took 31.975690603256226 seconds
Took 31.715763092041016 seconds
Worker doing trial 13.0 and participant 21.0
Took 63.081684589385986 seconds
Took 60.735942363739014 seconds
Worker doing trial 13.0 and participant 11.0
Took 32.2468478679657 seconds
Worker doing trial 13.0 and participant 18.0
Took 86.32166957855225 seconds
Took 23.96027660369873 seconds
Worker doing trial 13.0 and participant 22.0
Took 17.466312885284424 seconds
Took 15.77765154838562 seconds
Worker doing trial 13.0 and participant 23.0
Took 27.908703804016113 seconds
Worker doing trial 13.0 and participant 19.0
Took 16.656466245651245 seconds
Took 10.989400625228882 seconds
[None, None, None, None, None, None, None, None, None, None, N

### Merge back together

In [9]:
datasets = []

for csv_file_freq_abs in glob.glob('freq_abstraction_trial_*_participant_*.csv'):
    datasets.append(pd.read_csv(csv_file_freq_abs))

dataset = pd.concat(datasets)

dataset.to_csv("dataset_engineered_features.csv")


ValueError: No objects to concatenate

#### Time Domain