In [1]:
# ========================
# library
# ========================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import pickle
import glob
from tqdm import tqdm
%matplotlib inline

In [2]:
# ========================
# constant
# ========================
DEFOG_META_PATH = "../data/defog_metadata.csv"
DEFOG_FOLDER = "../data/train/notype/*.csv"

In [3]:
# ========================
# settings
# ========================
fe = "078"
ex = "175"
if not os.path.exists(f"../output/fe/fe{fe}"):
    os.makedirs(f"../output/fe/fe{fe}")
    os.makedirs(f"../output/fe/fe{fe}/save")

In [4]:
data_list = glob.glob(DEFOG_FOLDER)

In [5]:
meta = pd.read_parquet("../output/fe/fe039/fe039_defog_meta.parquet")

In [6]:
cols = ["AccV","AccML","AccAP"]
num_cols = ["AccV","AccML","AccAP",'AccV_lag_diff', 'AccV_lead_diff', 'AccML_lag_diff', 'AccML_lead_diff',
       'AccAP_lag_diff', 'AccAP_lead_diff']
target_use_cols = ["Event"]
target_cols = ["StartHesitation","Turn","Walking"]
seq_len = 5000
shift = 2500
offset = 1250

In [7]:
for fold in range(5):
    print(fold)
    pred = pd.read_parquet(f"../output/exp/ex{ex}/ex{ex}_notype_{fold}_pred_15000.parquet")
    target_array = []
    for i,s in tqdm(zip(meta["Id"].values,
                   meta["sub_id"].values)):
        path = f"../data/train/notype/{i}.csv"
        if path in data_list:
            df = pd.read_csv(path)
            df_ = pred[pred["Id"] == i].reset_index(drop=True)
            df = df.merge(df_,how="left",on="Time")
            df["target_max"] = np.argmax(df[["StartHesitation","Turn","Walking"]].values,axis=1)
            
            df.loc[df["target_max"] == 0,"StartHesitation"] = 1
            df.loc[df["target_max"] == 0,["Turn","Walking"]] = 0
            
            df.loc[df["target_max"] == 1,"Turn"] = 1
            df.loc[df["target_max"] == 1,["StartHesitation","Walking"]] = 0
            
            df.loc[df["target_max"] == 2,"Walking"] = 1
            df.loc[df["target_max"] == 2,["StartHesitation","Turn"]] = 0

            df.loc[df["Event"] == 0,["StartHesitation","Turn","Walking"]] = 0

            df["valid"] = df["Valid"] & df["Task"]
            df["valid"] = df["valid"].astype(int)
            batch = (len(df)-1) // shift
            target = df[target_cols].values
            target_array_ = np.zeros([batch,seq_len,3])
            for n,b in enumerate(range(batch)):
                if b == (batch - 1):
                    target_ = target[b*shift : ]
                    target_array_[b,:len(target_),:] = target_
                elif b == 0:
                    target_ = target[b*shift:b*shift + seq_len]
                    target_array_[b,:,:] = target_
                else:
                    target_ = target[b*shift:b*shift + seq_len]
                    target_array_[b,:,:] = target_

            target_array.append(target_array_)
    target_array = np.concatenate(target_array,axis=0)
    np.save(f"../output/fe/fe{fe}/fe{fe}_target_array_{fold}.npy",target_array)

0


137it [00:41,  3.27it/s]


1


137it [00:45,  2.98it/s]


2


137it [00:45,  3.00it/s]


3


137it [00:46,  2.96it/s]


4


137it [00:41,  3.29it/s]
