# Dependencies

In [1]:
import copy
import os
from multiprocessing import Pool, cpu_count
from pathlib import Path
import itertools
import glob
import time

from scipy.stats import pearsonr
import numpy as np
import pandas as pd

# Feature engineering

In [5]:
dataset = pd.read_csv("dataset_imputed_values.csv")

#### Frequency domain 

In [3]:
milliseconds_per_instance = (1/50)*1000
print(milliseconds_per_instance)

20.0


### Split for computation
To optimally use our resources the trials per individual will be analysed independently

In [10]:
from Chapter4.FrequencyAbstraction import FourierTransformation


FreqAbs = FourierTransformation()
fs = float(1000)/milliseconds_per_instance

periodic_predictor_cols = list(dataset.columns[:12])


average_t_per_100_rows = []

def do_freq_abstract_for_trial_participant_and_save(trial, participant):
    """
    Does freq abstrac on a single participant for a single trial. After inference results
    are saved to a individual csv for later fusing results.
    """
    
    t_start = time.time()
    
    print(f'Worker doing trial {trial} and participant {participant}')
    
    ds = dataset[dataset.trial.eq(trial)][dataset.id.eq(participant)]
    
    print(f'Working on dataset of shape {ds.shape}')
    
    try:
        expected_run_time = sum(average_t_per_100_rows)/len(average_t_per_100_rows) * (ds.shape[0]/100)
    except:
         expected_run_time = 'UNKNOWN'
    
    print(f'Expected run time: {expected_run_time} s')
    
    my_set = FreqAbs.abstract_frequency(ds, periodic_predictor_cols, int(float(10000)/milliseconds_per_instance), fs)
    
    my_set.to_csv(f'freq_abstraction_trial_{trial}_participant_{participant}.csv')
    t_done = time.time()
    total_time = t_done-t_start
    print(f'Took {total_time} seconds')
    run_time_per_row = total_time/ds.shape[0] if ds.shape[0] else 0
    average_t_per_100_rows.append(100*run_time_per_row)
    
    
    

# Cormac

In [3]:
cormac_trials = dataset.trial.unique()[:5] #dataset.trial.unique()[:5]
participants = dataset.id.unique()

with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(cormac_trials, participants))
    print(r)

print('All done now')

NameError: name 'dataset' is not defined

# Abel

In [None]:
abel_trials = dataset.trial.unique()[5:10]
participants = dataset.id.unique()

with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(abel_trials, participants))

print('All done now')

Worker doing trial 4.0 and participant 0.0Worker doing trial 4.0 and participant 15.0Worker doing trial 4.0 and participant 7.0Worker doing trial 4.0 and participant 18.0







Working on dataset of shape (240, 34)Working on dataset of shape (235, 34)Working on dataset of shape (283, 34)Working on dataset of shape (312, 34)


Expected run time: UNKNOWN sExpected run time: UNKNOWN s
Expected run time: UNKNOWN s

Expected run time: UNKNOWN s

Took 3.7419629096984863 seconds
Worker doing trial 4.0 and participant 8.0
Working on dataset of shape (230, 34)
Expected run time: 3.5860477884610487 s
Took 3.9570181369781494 seconds
Worker doing trial 4.0 and participant 1.0
Working on dataset of shape (262, 34)
Expected run time: 4.411654263354363 s
Took 4.104453802108765 seconds
Worker doing trial 12.0 and participant 0.0
Working on dataset of shape (82, 34)
Expected run time: 1.1892763666887587 s
Took 4.235467195510864 seconds
Worker doing trial 4.0 and participant 16.0
Working on dataset of shape (232, 34)
Expected run time: 3.149449965892693 s
Took 3.444114923477173 seconds
Worker doing trial 12.0 and participant 1.0
Working on dataset of shape (100, 34)
Expected r

Took 3.2129971981048584 seconds
Worker doing trial 7.0 and participant 18.0
Working on dataset of shape (535, 34)
Expected run time: 15.870021217323409 s
Took 66.26464009284973 seconds
Worker doing trial 7.0 and participant 14.0
Working on dataset of shape (554, 34)
Expected run time: 23.94378269051099 s
Took 105.95264196395874 seconds
Worker doing trial 7.0 and participant 6.0
Working on dataset of shape (550, 34)
Expected run time: 20.132534127784275 s
Took 57.20323705673218 seconds
Worker doing trial 8.0 and participant 0.0
Working on dataset of shape (348, 34)
Expected run time: 11.737977314417769 s
Took 4.3199217319488525 seconds
Worker doing trial 8.0 and participant 1.0
Working on dataset of shape (448, 34)
Expected run time: 14.633475263827178 s
Took 4.687836170196533 seconds
Worker doing trial 8.0 and participant 2.0
Working on dataset of shape (416, 34)
Expected run time: 13.148453873495308 s
Took 4.545964002609253 seconds
Worker doing trial 8.0 and participant 3.0
Working on

Took 4.077646017074585 seconds
Worker doing trial 15.0 and participant 17.0
Working on dataset of shape (191, 34)
Expected run time: 4.040371551020733 s
Took 4.317493915557861 seconds
Worker doing trial 15.0 and participant 21.0
Working on dataset of shape (178, 34)
Expected run time: 5.590276999947925 s
Took 3.8607940673828125 seconds
Worker doing trial 15.0 and participant 19.0
Working on dataset of shape (229, 34)
Expected run time: 4.840300470381062 s
Took 3.6922659873962402 seconds
Worker doing trial 15.0 and participant 22.0
Working on dataset of shape (265, 34)
Expected run time: 8.213922992937555 s
Took 4.160812139511108 seconds
Took 3.7061381340026855 seconds
Worker doing trial 15.0 and participant 23.0
Working on dataset of shape (118, 34)
Expected run time: 3.5831782029552395 s
Took 2.6599390506744385 seconds
Worker doing trial 15.0 and participant 18.0
Working on dataset of shape (259, 34)
Expected run time: 7.792399935650929 s
Took 3.3994197845458984 seconds
Took 111.25680

# Natasja

In [5]:
natasja_trials = dataset.trial.unique()[10:15]
participants = dataset.id.unique()

with Pool(processes=cpu_count()) as p:
    r = p.starmap(do_freq_abstract_for_trial_participant_and_save, itertools.product(natasja_trials, participants))
    print(r)

print('All done now')

NameError: name 'dataset' is not defined

### Merge back together

In [6]:
datasets = []

for csv_file_freq_abs in glob.glob('freq_abstraction_trial_*_participant_*.csv'):
    datasets.append(pd.read_csv(csv_file_freq_abs))

    
    
dataset = pd.concat(datasets)

dataset.to_csv("dataset_engineered_features.csv")


ValueError: No objects to concatenate

#### Time Domain