# Feature Extraction

In [1]:
from Funcs.Utility import *
import pandas as pd
import numpy as np
import scipy.stats as st
import cloudpickle
import ray
from datetime import datetime
from contextlib import contextmanager
import warnings
from typing import Dict, Callable, Union, Tuple, List, Optional, Iterable
from datetime import timedelta as td
from scipy import stats
import time
def load(path: str):
    with open(path, mode='rb') as f:
        return cloudpickle.load(f)
def dump(obj, path: str):
    with open(path, mode='wb') as f:
        cloudpickle.dump(obj, f)
def log(msg: any):
    print('[{}] {}'.format(datetime.now().strftime('%y-%m-%d %H:%M:%S'), msg))
@contextmanager
def log_t(msg: any):
    try:
        s = time.time()
        yield None
    finally:
        elasped_time = time.time() - s
        log(f'({elasped_time:.2f}s) {msg}')
def summary(x):
    x = np.asarray(x)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        n = len(x)
        # Here, uppercase np.dtype.kind corresponds to non-numeric data.
        # Also, we view the boolean data as dichotomous categorical data.
        if x.dtype.kind.isupper() or x.dtype.kind == 'b':
            cnt = pd.Series(x).value_counts(dropna=False)
            card = len(cnt)
            cnt = cnt[:20]
            cnt_str = ', '.join([f'{u}:{c}' for u, c in zip(cnt.index, cnt)])
            if card > 30:
                cnt_str = f'{cnt_str}, ...'
            return {
                'n': n,
                'cardinality': card,
                'value_count': cnt_str
            }
        else:
            x_nan = x[np.isnan(x)]
            x_norm = x[~np.isnan(x)]
            m = np.mean(x_norm)
            me = np.median(x_norm)
            s = np.std(x_norm, ddof=1)
            l, u = np.min(x_norm), np.max(x)
            conf_l, conf_u = st.t.interval(0.95, len(x_norm) - 1, loc=m, scale=st.sem(x_norm))
            n_nan = len(x_nan)
            return {
                'n': n,
                'mean': m,
                'SD': s,
                'med': me,
                'range': (l, u),
                'conf.': (conf_l, conf_u),
                'nan_count': n_nan
            }
@contextmanager
def on_ray(*args, **kwargs):
    try:
        if ray.is_initialized():
            ray.shutdown()
        ray.init(*args, **kwargs)
        yield None
    finally:
        ray.shutdown()

In [2]:
def _safe_na_check(_v):
    _is_nan_inf = False
    try:
        _is_nan_inf = np.isnan(_v) or np.isinf(_v)
    except:
        _is_nan_inf = False
    return _is_nan_inf or _v is None

In [3]:
def _extract_numeric_feature(d_key, d_val) -> Dict:
    feature = {}
    v=d_val
    hist, _ = np.histogram(v, bins='doane', density=False)
    std = np.sqrt(np.var(v, ddof=1)) if len(v) > 1 else 0
    v_norm = (v - np.mean(v)) / std if std != 0 else np.zeros(len(v))
    feature[f'{d_key}#AVG'] = np.mean(v) # Sample mean
    feature[f'{d_key}#STD'] = std # Sample standard deviation
    if std !=0:
        feature[f'{d_key}#SKW'] = stats.skew(v, bias=False) # Sample skewness
        feature[f'{d_key}#KUR'] = stats.kurtosis(v, bias=False) # Sample kurtosis
    feature[f'{d_key}#ASC'] = np.sum(np.abs(np.diff(v))) # Abstract sum of changes
    feature[f'{d_key}#BEP'] = stats.entropy(hist) # Binned entropy
    feature[f'{d_key}#MED'] = np.median(v) # Median
    feature[f'{d_key}#TSC'] = np.sqrt(np.sum(np.power(np.diff(v_norm), 2))) # Timeseries complexity
    return feature

In [4]:
def _extract_categorical_feature(cats, d_key, d_val) -> Dict:
    feature = {}
    v = d_val
    cnt = v.value_counts()
    val, sup = cnt.index, cnt.values
    hist = {k: v for k, v in zip(val, sup)}
    if len(cats) == 1:
        c = cats[0]
        feature[f'{d_key}#SUP'] = hist[c] if c in hist else 0
    else:
        # Information Entropy
        feature[f'{d_key}#ETP#'] = stats.entropy(sup)
        # Abs. Sum of Changes
        feature[f'{d_key}#ASC#'] = np.sum(v.values[1:] != v.values[:-1])
        if len(cats) == 2: # Dichotomous categorical data
            c = cats[0]
            feature[f'{d_key}#SUP'] = hist[c] if c in hist else 0
        else:
            for c in cats:
                feature[f'{d_key}#SUP={c}'] = hist[c]  if c in hist else 0
    return feature

In [5]:
def _extract_timeWindow_feature(is_numeric, cats, d_key, d_val) -> Dict:
    feature = {}
    v = d_val
    if d_key in ['CAE_DUR']:
        feature = _extract_numeric_feature(d_key, v)
        feature['CAE#FREQ'] = len(v)
    elif d_key in ['MSG_SNT','MSG_RCV','MSG_ALL']:
        feature[f'{d_key}#FREQ'] = np.sum(v) #As for ratio of inbound and outbound, we need to extract it after extracting for each sensor
    elif d_key in ['LOC_CLS']:
        feature = _extract_categorical_feature(cats, d_key, v)
        feature['LOC#NumOfPlcVist'] = len(set(v))
    else:
        if is_numeric:
            feature = _extract_numeric_feature(d_key, v)
        else:
            feature =_extract_categorical_feature(cats, d_key, v)
    return feature

In [6]:
#This fucntion is based on the  towards circadian computing: "early to bed and early to rise"
#makes some of us unhealthy and sleep derived
theta=30
def calculate_sleep_duration(s_on, s_off, theta):
    # Merge s_on and s_off into a single DataFrame based on timestamp
    df = pd.merge(pd.DataFrame({'timestamp': s_on, 'event': 'screen_on'}),
                  pd.DataFrame({'timestamp': s_off, 'event': 'screen_off'}),
                  how='outer', on='timestamp')
    # fill missing values in event_x with values from event_y, and vice versa
    df['event_x'] = df['event_x'].fillna(df['event_y'])
    df['event_y'] = df['event_y'].fillna(df['event_x'])
    # drop the event_x and event_y columns
    df = df.drop(columns=['event_y']).rename(columns={'event_x': 'event'})
    # Fill in missing timestamps with NaT and sort by timestamp
    df = df.fillna(pd.NaT).sort_values('timestamp')
    df=df.assign(
         timestamp=lambda x: pd.to_datetime(x['timestamp'], unit='ms', utc=True).dt.tz_convert(DEFAULT_TZ)
     )
    # Filter out screen-on events caused by notifications
    mask = (df['event'] == 'screen_off') & ((df['timestamp'].diff().fillna(pd.NaT)  / pd.Timedelta(seconds=1)) > theta)
    filtered_df = df[mask].reset_index(drop=True)
    # Discard non-usage patterns that do not start between 9PM to 7AM (next day)
    sleep_duration = pd.Series(dtype=float)
    sleep_onset = pd.Series(dtype="datetime64[ns]")
    for i in range(len(filtered_df)-1):
        if filtered_df.loc[i, 'timestamp'].hour >= 21 or filtered_df.loc[i, 'timestamp'].hour < 7:
            non_usage_duration = filtered_df.loc[i+1, 'timestamp'] - filtered_df.loc[i, 'timestamp']
            if non_usage_duration.total_seconds() > 0:
                sleep_duration = pd.concat([sleep_duration, pd.Series(non_usage_duration.total_seconds())])
                sleep_onset = pd.concat([sleep_onset , pd.Series(filtered_df.loc[i, 'timestamp'])])
    # Calculate sleep midpoint and apply individual corrective term
    if len(sleep_duration) > 0:
        sleep_duration = sleep_duration.reset_index(drop=True)
        sleep_onset  =sleep_onset.reset_index(drop=True)
        sleep_midpoint = sleep_onset + pd.to_timedelta(sleep_duration/2, unit="s")
        return sleep_duration.max(), sleep_onset.iloc[sleep_duration.idxmax()], sleep_midpoint.iloc[sleep_duration.idxmax()]
    else:
        return None, None, None

In [7]:
epoch_names = {
    0: 'Dawn',
    1: 'Morning',
    2: 'Afternoon',
    3: 'LateAfternoon',
    4: 'Evening',
    5: 'Night'
}
def _extract(
        pid: str,
        data: Dict[str, pd.Series],
        label: pd.Series,
        label_values: List[str],
#        window_data: Dict[str, Union[int, Callable[[pd.Timestamp], int]]],
#        window_label: Dict[str, Union[int, Callable[[pd.Timestamp], int]]],
        categories: Dict[str, Optional[List[any]]] = None,
        constant_features: Dict[str, any] = None,
        resample_s: Dict[str, float] = None
) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    _s = time.time()
    log(f"Begin feature extraction on {pid}'s data.")
    categories = categories or dict()
    constant_features = constant_features or dict()
    resample_s = resample_s or dict()
    X, y, date_times = [], [], []
#    count = 0
    for timestamp in label.index:
        row = dict()
        #Find the start of today and yesterday for extracting today epoch features and yesterday epoch features
        start_of_today = datetime(timestamp.year, timestamp.month, timestamp.day, tzinfo=timestamp.tzinfo)
        start_of_today = pd.Timestamp(start_of_today.date(), tz=DEFAULT_TZ)
        start_of_yesterday = timestamp - pd.Timedelta(days=1)
        start_of_yesterday = pd.Timestamp(start_of_yesterday.date(), tz=DEFAULT_TZ)
        label_cur = label.at[timestamp]
        t = timestamp - td(milliseconds=1)
        # Features relevant to participants' info
        for d_key, d_val in constant_features.items():
            row[d_key] = d_val
        # Features from sensor data
        for d_key, d_val in data.items():
            is_numeric = d_key not in categories
            cats = categories.get(d_key) or list()
            d_val = d_val.sort_index()
            # Features relevant to latest value of a given data
            # These features are extracted only for bounded categorical data and numerical data.
            if is_numeric or cats:
                try:
                    v = d_val.loc[:t].iloc[-1]
                except (KeyError, IndexError):
                    v = 0
                if is_numeric:
                    row[f'{d_key}#VAL'] = v
                else:
                    for c in cats:
                        row[f'{d_key}#VAL={c}'] = v == c
            # Features relevant to duration since the latest state change.
            # These features are only for categorical data.
            # In addition, duration since a given state is set recently is considered,
            # that are available only at bounded categorical data.
#             if not is_numeric:
#                 try:
#                     v = d_val.loc[:t]
#                     row[f'{d_key}#DSC'] = (t - v.index[-1]).total_seconds() if len(v) else -1.0
#                     for c in cats:
#                         v_sub = v.loc[lambda x: x == c].index
#                         row[f'{d_key}#DSC={c}'] = (t - v_sub[-1]).total_seconds() if len(v_sub) else -1.0
#                 except (KeyError, IndexError):
#                     row[f'{d_key}#DSC'] = -1.0
#                     for c in cats:
#                         row[f'{d_key}#DSC={c}'] = -1.0
            # Features extracted from time-windows
            # These features requires resampling and imputation on each data.
            sample_rate = RESAMPLE_S.get(d_key) or 1
            d_val_res = d_val.resample(f'{sample_rate}S', origin='start')
            if d_val.dtypes != object:
                d_val_res = d_val_res.mean().interpolate(method='linear').dropna()
            else:
                d_val_res = d_val_res.ffill().dropna()
            #No resampling
#             d_val_res =d_val
           # Features extracted from immediate past time-windows
            w_val = 15 * 60
            v = d_val_res.loc[t - td(seconds=w_val):t]
            if len(v) == 0:
                continue
            else:
                new_row = {f'{k}_ImmediatePast': v for k, v in _extract_timeWindow_feature(is_numeric, cats, d_key, v).items()}
                row.update(new_row)
            #Features extracted from yesterday epoch time windows
            yesterday_time_windows = [
                (start_of_yesterday + pd.Timedelta(hours=6), start_of_yesterday + pd.Timedelta(hours=9)),
                (start_of_yesterday + pd.Timedelta(hours=9), start_of_yesterday + pd.Timedelta(hours=12)),
                (start_of_yesterday + pd.Timedelta(hours=12), start_of_yesterday + pd.Timedelta(hours=15)),
                (start_of_yesterday + pd.Timedelta(hours=15), start_of_yesterday + pd.Timedelta(hours=18)),
                (start_of_yesterday + pd.Timedelta(hours=18), start_of_yesterday + pd.Timedelta(hours=21)),
                (start_of_yesterday + pd.Timedelta(hours=21), start_of_yesterday + pd.Timedelta(hours=24))
            ]
            for count, (start, end) in enumerate(yesterday_time_windows):
                # Get data for the current yesterday epoch time window
                v = d_val_res.loc[start:end]
                epoch_name = epoch_names.get(count)
                if len(v) > 0:
                    new_row = {f'{k}Yesterday{epoch_name}': v for k, v in _extract_timeWindow_feature(is_numeric, cats, d_key, v).items()}
                    row.update(new_row)
                    
            #Features extracted from today epoch time windows until current time
            today_time_windows = []
            for i in range(6):
                start = start_of_today + pd.Timedelta(hours=i*3)
                end = start_of_today + pd.Timedelta(hours=(i+1)*3)
                if start <= timestamp:
                    today_time_windows.append((start, min(end, timestamp)))
                else:
                    break
            for count, (start, end) in enumerate(today_time_windows):
                # Get data for the current time window
                v = d_val.loc[start:end]
                epoch_name = epoch_names.get(count)
                if len(v) > 0:
                    new_row = {f'{k}Today{epoch_name}': v for k, v in _extract_timeWindow_feature(is_numeric, cats, d_key, v).items()}
                    row.update(new_row)
        #Sleep feature extracted from last night's data
        onset_min = start_of_yesterday + pd.Timedelta(hours=21)
        onset_max = start_of_today + pd.Timedelta(hours=14)
        duration, onset, midpoint =calculate_sleep_duration(data['SCR_SON'].loc[onset_min:onset_max].reset_index()['timestamp'], data['SCR_DUR'].loc[onset_min:onset_max].reset_index()['timestamp'], theta)
        if duration:
            row['Sleep#Duration'] = duration
            onset_hour = onset.hour
            if onset_hour >=21:
                row['Sleep#Onset'] = onset_hour - 21
            else:
                row['Sleep#Onset'] = onset_hour + 3

        # Features relevant to time
        day_of_week = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'][t.isoweekday() - 1]
        is_weekend = 'Y' if t.isoweekday() > 5 else 'N'
        hour = t.hour
        if 6 <= hour < 9:
            hour_name = 'DAWN'
        elif 9 <= hour < 12:
            hour_name = 'MORNING'
        elif 12 <= hour < 15:
            hour_name = 'AFTERNOON'
        elif 15 <= hour < 18:
            hour_name = 'LATE_AFTERNOON'
        elif 18 <= hour < 21:
            hour_name = 'EVENING'
        elif 21 <= hour < 24:
            hour_name = 'NIGHT'
        else:
            hour_name = 'MIDNIGHT'
        for d in ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']:
            row[f'Time#DOW={d}'] = d == day_of_week
        for d in ['Y', 'N']:
            row[f'Time#WKD={d}'] = d == is_weekend
        for d in ['DAWN', 'MORNING', 'AFTERNOON', 'LATE_AFTERNOON', 'EVENING', 'NIGHT', 'MIDNIGHT']:
            row[f'Time#HRN={d}'] = d == hour_name
        try:
            last_label = label.loc[label[:t].index.max()]['stress_dyn']
        except (KeyError, IndexError):
            last_label = -1
        row[f'ESM#LastLabel'] = last_label
        # Features extracted from previous respones behavior
#         for w_key, w_val in window_label.items():
#             w_val = w_val(t) if isinstance(w_val, Callable) else w_val
#             try:
#                 v = label.loc[t - td(seconds=w_val):t]
#                 if len(label_values) <= 2: # Binary classification
#                     row[f'ESM#LIK#{w_key}'] = np.sum(v == label_values[0]) / len(v) if len(v) > 0 else 0
#                 else:
#                     for l in label_values:
#                         row[f'ESM#LIK={l}#{w_key}'] = np.sum(v == l) / len(v) if len(v) > 0 else 0
#             except (KeyError, IndexError):
#                 if len(label_values) <= 2:
#                     row[f'ESM#LIK#{w_key}'] = 0
#                 else:
#                     for l in label_values:
#                         row[f'ESM#LIK={l}#{w_key}'] = 0
        row = {
            k: 0.0 if _safe_na_check(v) else v
            for k, v in row.items()
        }

        X.append(row)
        y.append(label_cur)
        date_times.append(timestamp)
#         count = count +1
#         if count>5:
#             break

    
    log(f"Complete feature extraction on {pid}'s data ({time.time() - _s:.2f} s).")
    X = pd.DataFrame(X)
    y = np.asarray(y)
    group = np.repeat(pid, len(y))
    date_times =  np.asarray(date_times)
    return X, y, group, date_times
def extract(
        pids: Iterable[str],
        data: Dict[str, pd.Series],
        label: pd.Series,
        label_values: List[str],
#        window_data: Dict[str, Union[int, Callable[[pd.Timestamp], int]]],
#        window_label: Dict[str, Union[int, Callable[[pd.Timestamp], int]]],
        categories: Dict[str, Optional[List[any]]] = None,
        constat_features: Dict[str, Dict[str, any]] = None,
        resample_s: Dict[str, float] = None,
        with_ray: bool=False
):
    if with_ray and not ray.is_initialized():
        raise EnvironmentError('Ray should be initialized if "with_ray" is set as True.')
    func = ray.remote(_extract).remote if with_ray else _extract
    jobs = []
    for pid in pids:
        d = dict()
        for k, v in data.items():
            try:
                d[k] = v.loc[(pid, )]
                if k.startswith('LOC_'):
                    d[k].index= pd.to_datetime( d[k].index, unit='ms', utc=True).tz_convert(DEFAULT_TZ)
                d['SPEED'] = d.pop('LOC_SPEED')
            except (KeyError, IndexError):
                pass
        job = func(
            pid=pid, data=d, label=label.loc[(pid, )],
            label_values=label_values,
#            window_data=window_data,
#            window_label=window_label,
            categories=categories,
            constant_features=constat_features[pid],
            resample_s=resample_s
        )
        jobs.append(job)
    jobs = ray.get(jobs) if with_ray else jobs
    print([x.shape for _, x, _, _ in jobs])
    X = pd.concat([x for x, _, _, _ in jobs], axis=0, ignore_index=True)
    y = np.concatenate([x for _, x, _, _ in jobs], axis=0)
    group = np.concatenate([x for _, _, x, _ in jobs], axis=0)
    date_times = np.concatenate([x for _, _, _, x in jobs], axis=0)
    t_s = date_times.min().normalize().timestamp()
    t_norm = np.asarray(list(map(lambda x: x.timestamp() - t_s, date_times)))
    C, DTYPE = X.columns, X.dtypes
    X = X.fillna({
        **{c: False for c in C[(DTYPE == object) | (DTYPE == bool)]},
        **{c: 0.0 for c in C[(DTYPE != object) & (DTYPE != bool)]},
    }).astype({
        **{c: 'bool' for c in C[(DTYPE == object) | (DTYPE == bool)]},
        **{c: 'float32' for c in C[(DTYPE != object) & (DTYPE != bool)]},
    })
    return X, y, group, t_norm, date_times

In [8]:
import os
import cloudpickle
LABEL_VALUES = [1, 0]
RESAMPLE_S = {
    'ACC_AXX': 0.25,
    'ACC_AXY': 0.25,
    'ACC_AXZ': 0.25,
    'ACC_MAG': 0.25,
    'EDA': 0.5
}
CATEGORIES = {
#    'DST_MOT': ['IDLE', 'WALKING', 'JOGGING', 'RUNNING'],
#    'ULV_INT': ['NONE', 'LOW', 'MEDIUM', 'HIGH'],
    'ACT_type': ['WALKING', 'STILL', 'IN_VEHICLE', 'ON_BICYCLE', 'RUNNING'],
#    'APP_PAC': None,
    'APP_CAT': ['SOCIAL','HEALTH','ENTER','WORK',"INFO"],
#    'BAT_STA': ['CHARGING', 'DISCHARGING', 'FULL', 'NOT_CHARGING'],
#    'CAE': ['CALL', 'IDLE'],
#    'CON': ['DISCONNECTED', 'WIFI', 'MOBILE'],
    'LOC_CLS': None,
    'LOC_LABEL': ['eating','home','work','social','others'] ,
    'SCR_SON': ['SON'],
    'SCR_ULK': ['ULK']
#     'RNG': ['VIBRATE', 'SILENT', 'NORMAL'],
#     'CHG': ['DISCONNECTED', 'CONNECTED'],
#     'PWS': ['ACTIVATE', 'DEACTIVATE'],
#     'ONF': ['ON', 'OFF']
}
PARTICIPANTS = pd.read_csv(os.path.join(PATH_INTERMEDIATE, 'proc', 'PARTICIPANT_INFO.csv'),index_col = 'pcode')
PINFO = PARTICIPANTS.assign(
    age=lambda x: x['age'],
    gender=lambda x: x['gender'],
    openness=lambda x: x['openness'],
    conscientiousness=lambda x: x['conscientiousness'],
    neuroticism=lambda x: x['neuroticism'],
    extraversion=lambda x: x['extraversion'],
    agreeableness=lambda x: x['agreeableness'],
    PSS10=lambda x: x['PSS10'],
    GHQ12=lambda x: x['GHQ12'],
    CESD_R=lambda x: x['CESD-R'],
    self_efficacy=lambda x: x['self-efficacy'],
    optimism=lambda x: x['optimism'],
    hope=lambda x: x['hope'],
    resiliency=lambda x: x['resiliency'],
)[[
    "age", "gender", "openness", "conscientiousness", "neuroticism", "extraversion", "agreeableness", 
    "GHQ12", "PSS10", "CESD-R", "self-efficacy", "optimism", "hope", "resiliency"
]]

PINFO = pd.get_dummies(PINFO, prefix_sep='=', dtype=bool).to_dict('index')
PINFO = {k: {f'PIF#{x}': y for x, y in v.items()} for k, v in PINFO.items()}
DATA = load(os.path.join(PATH_INTERMEDIATE, 'proc', 'data_proc.pkl'))
LABELS_PROC = pd.read_csv(os.path.join(PATH_INTERMEDIATE, 'proc', 'LABELS_PROC.csv'), index_col=['pcode','timestamp'],parse_dates=True)

In [9]:
import warnings
from pandas.errors import PerformanceWarning

warnings.simplefilter(action='ignore', category=PerformanceWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)


with on_ray(num_cpus=6):
    #for l in ['valence', 'arousal', 'stress', 'disturbance']:
    for l in ['stress']:
        #In preprocessing, dynamic threshold shows better data balance
        labels = LABELS_PROC[f'{l}_dyn']
        pids = labels.index.get_level_values('pcode').unique()
        feat = extract(
            pids=pids,
            data=DATA,
            label=labels,
            label_values=LABEL_VALUES,
#            window_data=WINDOW_DATA,
#            window_label=WINDOW_LABEL,
            categories=CATEGORIES,
            constat_features=PINFO,
            resample_s=RESAMPLE_S,
            with_ray=True
        )
        dump(feat, os.path.join(PATH_INTERMEDIATE, 'feat', f'{l}.pkl'))

2023-05-06 19:42:01,145	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(_extract pid=116375)[0m [23-05-06 19:42:02] Begin feature extraction on P001's data.
[2m[36m(_extract pid=116379)[0m [23-05-06 19:42:02] Begin feature extraction on P002's data.
[2m[36m(_extract pid=116378)[0m [23-05-06 19:42:02] Begin feature extraction on P003's data.
[2m[36m(_extract pid=116374)[0m [23-05-06 19:42:02] Begin feature extraction on P007's data.
[2m[36m(_extract pid=116376)[0m [23-05-06 19:42:03] Begin feature extraction on P008's data.
[2m[36m(_extract pid=116377)[0m [23-05-06 19:42:03] Begin feature extraction on P009's data.








[2m[36m(_extract pid=116375)[0m [23-05-06 20:16:13] Complete feature extraction on P001's data (2050.94 s).
[2m[36m(_extract pid=116375)[0m [23-05-06 20:16:13] Begin feature extraction on P010's data.
[2m[36m(_extract pid=116374)[0m [23-05-06 20:19:11] Complete feature extraction on P007's data (2228.81 s).
[2m[36m(_extract pid=116374)[0m [23-05-06 20:19:11] Begin feature extraction on P011's data.
[2m[36m(_extract pid=116378)[0m [23-05-06 20:25:47] Complete feature extraction on P003's data (2625.14 s).
[2m[36m(_extract pid=116378)[0m [23-05-06 20:25:48] Begin feature extraction on P013's data.
[2m[36m(_extract pid=116379)[0m [23-05-06 20:27:04] Complete feature extraction on P002's data (2701.80 s).
[2m[36m(_extract pid=116379)[0m [23-05-06 20:27:04] Begin feature extraction on P014's data.
[2m[36m(_extract pid=116377)[0m [23-05-06 20:35:01] Complete feature extraction on P009's data (3178.53 s).
[2m[36m(_extract pid=116377)[0m [23-05-06 20:35:02] Begin 



[2m[36m(_extract pid=116376)[0m [23-05-06 20:40:37] Complete feature extraction on P008's data (3513.96 s).
[2m[36m(_extract pid=116376)[0m [23-05-06 20:40:37] Begin feature extraction on P016's data.




[2m[36m(_extract pid=116375)[0m [23-05-06 20:51:50] Complete feature extraction on P010's data (2137.19 s).
[2m[36m(_extract pid=116375)[0m [23-05-06 20:51:51] Begin feature extraction on P018's data.




[2m[36m(_extract pid=116379)[0m [23-05-06 21:00:35] Complete feature extraction on P014's data (2011.20 s).


RayTaskError(ValueError): [36mray::_extract()[39m (pid=116379, ip=192.168.1.27)
  File "/tmp/ipykernel_116038/3290932600.py", line 194, in _extract
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (182,) + inhomogeneous part.