In [None]:
import os
import sys
from pathlib import Path

In [None]:
# HOMEディレクトリ設定(環境に合わせて変更してください)
if "google.colab" in sys.modules:
    # Google Colaboratoryの場合
    HOME = Path("/content/drive/MyDrive/signate/NEDOG")

    # Google Driveをマウント
    if not os.path.exists("/content/drive"):
        from google.colab import drive
        drive.mount("/content/drive")
else:
    # それ以外
    HOME = Path("..")

# INPUT/WORKINGディレクトリ設定
INPUT = HOME / "input"
WORKING = HOME / "working"

In [None]:
import warnings
warnings.simplefilter('ignore', FutureWarning)

In [None]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
emg_cols = [
    'TA R', 'TA L', 'LG R', 'LG L', 'RF R', 'RF L', 'VL R', 'VL L',
    'ST R', 'ST L', 'GMAX R', 'GMAX L', 'EMI R', 'EMI L', 'DEL R', 'DEL L'
]

In [None]:
vel_cols = [
    'vel_x', 'vel_y', 'vel_z'
]

# 読み取り

In [None]:
tr_emg_df = pd.read_pickle(WORKING / 'prep1_tr_emg.pickle')
ts_emg_df = pd.read_pickle(WORKING / 'prep1_ts_emg.pickle')
rtr_emg_df = pd.read_pickle(WORKING / 'prep1_rtr_emg.pickle')
rts_emg_df = pd.read_pickle(WORKING / 'prep1_rts_emg.pickle')

In [None]:
tr_vel_df = pd.read_pickle(WORKING / 'prep1_tr_vel.pickle')
rtr_vel_df = pd.read_pickle(WORKING / 'prep1_rtr_vel.pickle')
rts_vel_df = pd.read_pickle(WORKING / 'prep1_rts_vel.pickle')

# EMGデータ加工

## スケール合わせ・絶対値

In [None]:
def modify_data(tr_df, ts_df):
    tr_result_df = tr_df.copy()
    ts_result_df = ts_df.copy()
    smin = tr_result_df['subject'].min()
    smax = tr_result_df['subject'].max()
    for subject in range(smin, 1+smax):
        ftr = tr_result_df['subject'] == subject
        fts = ts_result_df['subject'] == subject
        for col in emg_cols:
            lo, hi = np.percentile(tr_result_df.loc[ftr, col], [0, 100])
            tr_result_df.loc[ftr, col] = tr_result_df.loc[ftr, col].clip(lo, hi).abs()
            ts_result_df.loc[fts, col] = ts_result_df.loc[fts, col].clip(lo, hi).abs()

            scale = np.percentile(tr_result_df.loc[ftr, col], 90)
            tr_result_df.loc[ftr, col] /= scale
            ts_result_df.loc[fts, col] /= scale

    return tr_result_df, ts_result_df

In [None]:
tr_emg_df, ts_emg_df = modify_data(tr_emg_df, ts_emg_df)
rtr_emg_df, rts_emg_df = modify_data(rtr_emg_df, rts_emg_df)

In [None]:
# リファレンスデータ結合(train/testで別の被験者扱い)
rts_emg_df['subject'] += 1
tr_emg_df = pd.concat([tr_emg_df,rtr_emg_df,rts_emg_df], axis=0).reset_index(drop=True)

# downsampling

In [None]:
def agg_feature(df, group_cols, target_cols, funcs=['mean','std']):
    dfs = []
    for func in funcs:
        agg_df = df.groupby(group_cols)[target_cols].agg(func).rename(
            columns=lambda x: f'{func} {x}'
        )
        dfs.append(agg_df)
    return pd.concat(dfs, axis=1).reset_index()

In [None]:
def downsample_emg(df,):
    result_df = df.copy()
    result_df['time'] = result_df['time'] * 60 // 2000
    result_df = agg_feature(result_df, ['subject','trial','time'], emg_cols)
    return result_df

In [None]:
tr_emg_df = downsample_emg(tr_emg_df)
ts_emg_df = downsample_emg(ts_emg_df)

## ラグ特徴量追加

In [None]:
def lag_feature(df, lags):
    dfs = [df]
    for lag in lags:
        lag_df = df.drop('time',axis=1).groupby(['subject','trial']).shift(lag).rename(
            columns=lambda x: f'lag{lag} '+x
        )
        dfs.append(lag_df)
    return pd.concat(dfs, axis=1)

In [None]:
tr_emg_df = lag_feature(tr_emg_df,[-6,-2,2,6])
ts_emg_df = lag_feature(ts_emg_df,[-6,-2,2,6])

# 速度データ加工

In [None]:
# リファレンスデータ結合(train/testで別の被験者扱い)
rts_vel_df['subject'] += 1
tr_vel_df = pd.concat([tr_vel_df,rtr_vel_df,rts_vel_df], axis=0).reset_index(drop=True)

## 極座標

In [None]:
# 円柱座標系に変換(方位角はsin/cosで出力)
def conv_cylindrical(df):
    r = np.sqrt(df['vel_x']**2 + df['vel_y']**2)
    s = df['vel_y'] / r
    c = df['vel_x'] / r
    z = df['vel_z']
    return r, s, c, z

In [None]:
# 球面座標系に変換(z軸方向の角度は仰角で出力)
def conv_spherical(df):
    r, s, c, z = conv_cylindrical(df)
    e = np.arctan2(z, r)
    r = np.sqrt(r**2 + z**2)
    return r, s, c, e

In [None]:
tr_vel_df['vel_r'],tr_vel_df['vel_s'],tr_vel_df['vel_c'],tr_vel_df['vel_e'] = conv_spherical(tr_vel_df)

## 加速度

In [None]:
def calc_acc(df, cols):
    acc_df = df.groupby(['subject','trial'])[cols].transform(
        lambda x: (x.shift(-1) - x).ffill()
    )
    acc_df = acc_df.rename(columns=lambda x: x.replace('vel','acc'))
    return acc_df

In [None]:
tr_acc_df = calc_acc(tr_vel_df, vel_cols+['vel_r','vel_s','vel_c','vel_e'])
tr_vel_df = pd.concat([tr_vel_df,tr_acc_df], axis=1)

# 鏡像データ結合

In [None]:
def create_mirror_emg(df):
    fl = df.columns.str.contains('L$')
    fr = df.columns.str.contains('R$')
    result_df = df.copy()
    result_df.loc[:,fl] = df.loc[:,fr].values
    result_df.loc[:,fr] = df.loc[:,fl].values
    return result_df

In [None]:
def create_mirror_vel(df):
    fy = df.columns.str.contains('_y$')
    fs = df.columns.str.contains('_s$')
    result_df = df.copy()
    result_df.loc[:,fy] *= -1
    result_df.loc[:,fs] *= -1
    return result_df

In [None]:
tr_emg_df = pd.concat([tr_emg_df, create_mirror_emg(tr_emg_df)], axis=0).reset_index(drop=True)
ts_emg_df = pd.concat([ts_emg_df, create_mirror_emg(ts_emg_df)], axis=0).reset_index(drop=True)
tr_vel_df = pd.concat([tr_vel_df, create_mirror_vel(tr_vel_df)], axis=0).reset_index(drop=True)

# 保存

In [None]:
tr_emg_df.info()

In [None]:
ts_emg_df.info()

In [None]:
tr_vel_df.info()

In [None]:
tr_emg_df.to_pickle(WORKING / 'prep2_tr_emg.pickle')
tr_vel_df.to_pickle(WORKING / 'prep2_tr_vel.pickle')
ts_emg_df.to_pickle(WORKING / 'prep2_ts_emg.pickle')