# Dependencies

In [None]:
import copy
import os
from multiprocessing import Pool, cpu_count
from pathlib import Path
import itertools
import glob
import time

from scipy.stats import pearsonr
import numpy as np
import pandas as pd

# Feature engineering

In [None]:
dataset = pd.read_csv("dataset_gran_250.csv")

#### Frequency domain 

In [None]:
milliseconds_per_instance = (1/50)*1000
print(milliseconds_per_instance)

### Split for computation
To optimally use our resources the trials per individual will be analysed independently

In [None]:
from Chapter4.FrequencyAbstraction import FourierTransformation


FreqAbs = FourierTransformation()
fs = float(1000)/milliseconds_per_instance

periodic_predictor_cols = list(dataset.columns[:12])


average_t_per_100_rows = []

def do_freq_abstract_for_trial_participant_and_save(trial, participant):
    """
    Does freq abstrac on a single participant for a single trial. After inference results
    are saved to a individual csv for later fusing results.
    """
    
    t_start = time.time()
    
    print(f'Worker doing trial {trial} and participant {participant}')
    
    ds = dataset[dataset.trial.eq(trial)][dataset.id.eq(participant)]
    
    print(f'Working on dataset of shape {ds.shape}')
    
    try:
        expected_run_time = sum(average_t_per_100_rows)/len(average_t_per_100_rows) * (ds.shape[0]/100)
    except:
         expected_run_time = 'UNKNOWN'
    
    print(f'Expected run time: {expected_run_time} s')
    
    my_set = FreqAbs.abstract_frequency(ds, periodic_predictor_cols, int(float(10000)/milliseconds_per_instance), fs)
    
    my_set.to_csv(f'freq_abstraction_csvs/freq_abstraction_trial_{trial}_participant_{participant}.csv')
    t_done = time.time()
    total_time = t_done-t_start
    print(f'Took {total_time} seconds')
    run_time_per_row = total_time/ds.shape[0] if ds.shape[0] else 0
    average_t_per_100_rows.append(100*run_time_per_row)
    
    
    

# Cormac

In [None]:
cormac_trials = dataset.trial.unique()[:5] #dataset.trial.unique()[:5]
participants = dataset.id.unique()

with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(cormac_trials, participants))
    print(r)

print('All done now')

# Abel

In [None]:
abel_trials = dataset.trial.unique()[5:10]
participants = dataset.id.unique()

with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(abel_trials, participants))

print('All done now')

# Natasja

In [None]:
natasja_trials = dataset.trial.unique()[10:15]
participants = dataset.id.unique()

with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(natasja_trials, participants))
    print(r)

print('All done now')

### Merge back together

In [None]:
datasets = []

for csv_file_freq_abs in glob.glob('freq_abstraction_trial_*_participant_*.csv'):
    datasets.append(pd.read_csv(csv_file_freq_abs))

    
    
dataset = pd.concat(datasets)

dataset.to_csv("dataset_engineered_features.csv")


#### Time Domain