# Dependencies

In [9]:
import copy
import os
from multiprocessing import Pool, cpu_count
from pathlib import Path
import itertools
import glob
import time

from scipy.stats import pearsonr
import numpy as np
import pandas as pd

# Feature engineering

In [10]:
dataset = pd.read_csv("dataset_gran_250.csv")

#### Frequency domain 

In [11]:
milliseconds_per_instance = 250

### Split for computation
To optimally use our resources the trials per individual will be analysed independently

In [12]:
from Chapter4.FrequencyAbstraction import FourierTransformation


FreqAbs = FourierTransformation()
fs = float(1000)/milliseconds_per_instance

periodic_predictor_cols = list(dataset.columns[:12])


average_t_per_100_rows = []

def do_freq_abstract_for_trial_participant_and_save(trial, participant):
    """
    Does freq abstrac on a single participant for a single trial. After inference results
    are saved to a individual csv for later fusing results.
    """
    
    t_start = time.time()
    
    print(f'Worker doing trial {trial} and participant {participant}')
    
    ds = dataset[dataset.trial.eq(trial)][dataset.id.eq(participant)]
    
#     print(f'Working on dataset of shape {ds.shape}')
    
    try:
        expected_run_time = sum(average_t_per_100_rows)/len(average_t_per_100_rows) * (ds.shape[0]/100)
    except:
         expected_run_time = 'UNKNOWN'
    
#     print(f'Expected run time: {expected_run_time} s')
    
    my_set = FreqAbs.abstract_frequency(ds, periodic_predictor_cols, int(float(10000)/milliseconds_per_instance), fs)
    
    my_set.to_csv(f'freq_abstraction_csvs/trial_{trial}_participant_{participant}.csv')
    t_done = time.time()
    total_time = t_done-t_start
    print(f'Took {total_time} seconds')
    run_time_per_row = total_time/ds.shape[0] if ds.shape[0] else 0
    average_t_per_100_rows.append(100*run_time_per_row)    

In [13]:
# divide and conquer
participants = dataset.id.unique()

cormac_trials = dataset.trial.unique()[:5]
abel_trials = dataset.trial.unique()[5:10]
natasja_trials = dataset.trial.unique()[10:15]

print(cormac_trials)
print(abel_trials)
print(natasja_trials)

[ 1.  2. 11.  3.  4.]
[12.  7.  8. 15.  9.]
[16.  6. 14.  5. 13.]


# Cormac

In [None]:
with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(cormac_trials, participants))
    print(r)

print('All done now')

Worker doing trial 1.0 and participant 0.0
Worker doing trial 1.0 and participant 4.0
Worker doing trial 1.0 and participant 8.0
Worker doing trial 1.0 and participant 12.0
Worker doing trial 1.0 and participant 16.0
Worker doing trial 1.0 and participant 20.0
Worker doing trial 2.0 and participant 0.0
Worker doing trial 2.0 and participant 4.0




Working on dataset of shape (175, 21)
Working on dataset of shape (141, 21)
Working on dataset of shape (127, 21)
Working on dataset of shape (167, 21)
Working on dataset of shape (167, 21)
Working on dataset of shape (233, 21)




Expected run time: UNKNOWN s
Expected run time: UNKNOWN s
Expected run time: UNKNOWN s
Expected run time: UNKNOWN s
Working on dataset of shape (197, 21)
Working on dataset of shape (173, 21)
Expected run time: UNKNOWN s
Expected run time: UNKNOWN s
Expected run time: UNKNOWN s
Expected run time: UNKNOWN s
Took 14.865007162094116 seconds
Worker doing trial 1.0 and participant 13.0
Working on dataset of shape (138, 21)
Expected run time: 16.152527467472346 s
Took 17.22282838821411 seconds
Worker doing trial 1.0 and participant 1.0
Working on dataset of shape (172, 21)
Expected run time: 21.00940767923991 s
Took 23.300018548965454 seconds
Worker doing trial 1.0 and participant 17.0
Working on dataset of shape (236, 21)
Expected run time: 32.92697232069369 s
Took 24.712753534317017 seconds
Worker doing trial 1.0 and participant 9.0
Working on dataset of shape (215, 21)
Expected run time: 31.815820418432086 s
Took 25.37162709236145 seconds
Worker doing trial 1.0 and participant 5.0
Working

# Abel

In [None]:
with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(abel_trials, participants))
    print(r)
print('All done now')

# Natasja

In [None]:
with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(natasja_trials, participants))
    print(r)

print('All done now')

### Merge back together

In [None]:
datasets = []

for csv_file_freq_abs in glob.glob('freq_abstraction_trial_*_participant_*.csv'):
    datasets.append(pd.read_csv(csv_file_freq_abs))

    
    
dataset = pd.concat(datasets)

dataset.to_csv("dataset_engineered_features.csv")


#### Time Domain