In [1]:
import numpy as np
import os
import pandas as pd
import copy 
import datetime 
data_path = '../../../code_data/arises/'
process_file = 'coldstart_fl'

pid_list = [1000 + d for d in range(1, 13)]


In [2]:
pid2regular_df = {}
names = ['ts', 'glucose_level', 'isRealBGL', 'basal', 'meal', 
'bolus', 'correction_bolus', 'finger_stick',	'hypoStart', 
'EDA',	'SCL', 'SCR', 'HR', 'TEMP', 'ACC', 'RMSSD', 'SDNN', 'medianNNI', 'CVNNI', 'CVSD', 'pNNX', 
'meanHR', 'minHR', 'maxHR', 'VLF', 'LF', 'HF', 'LHR']
usecols = [i for i in range(28)]
for pid in pid_list:
    
    load_path = os.path.join(data_path, 'raw_data', f'dc_{pid}_data.csv')
    df = pd.read_csv(load_path, names=names, usecols=usecols, header=1)

    df['ts'] = pd.to_datetime(df['ts'], dayfirst=True, errors='coerce')
    # df = df.groupby('ts').sum()
    df = df.set_index('ts')
    df = df.sort_index()
    df['glucose_level'] = df.apply(lambda row: row['glucose_level'] if row['isRealBGL'] == True else np.nan,  axis = 1)
    df = df.drop(['hypoStart', 'isRealBGL'], axis=1)
    df = df.astype(np.float64)
    df = df.replace(0.0, np.nan)
    pid2regular_df[pid] = df
    print(f'{pid}, total_points:{len(df)}, start:{df.index[0]}, end:{df.index[-1]}')


1001, total_points:15831, start:2019-02-28 12:43:28, end:2019-04-24 11:51:50
1002, total_points:13215, start:2019-03-04 14:18:20, end:2019-04-19 11:25:42
1003, total_points:14896, start:2019-03-04 19:17:53, end:2019-04-25 12:31:25
1004, total_points:11517, start:2019-03-05 13:47:53, end:2019-04-14 13:25:57
1005, total_points:14064, start:2019-03-07 13:48:01, end:2019-04-25 09:41:10
1006, total_points:13464, start:2019-03-08 14:15:27, end:2019-04-24 08:08:52
1007, total_points:13066, start:2019-04-29 13:35:19, end:2019-06-13 22:18:07
1008, total_points:14307, start:2019-04-29 16:00:45, end:2019-06-18 08:08:44
1009, total_points:14201, start:2019-04-30 12:41:20, end:2019-06-18 19:59:44
1010, total_points:12911, start:2019-04-30 16:12:50, end:2019-06-14 12:01:44
1011, total_points:13018, start:2019-05-07 12:41:15, end:2019-06-21 17:25:02
1012, total_points:15797, start:2019-05-07 16:05:46, end:2019-07-01 12:23:44


In [3]:
# check
for pid in pid_list:
    delta_ts = pid2regular_df[pid].index[1:] - pid2regular_df[pid].index[:-1]
    print(pid, (delta_ts>datetime.timedelta(minutes=5, seconds=10)).sum())

1001 0
1002 0
1003 0
1004 0
1005 0
1006 0
1007 0
1008 0
1009 0
1010 0
1011 0
1012 0


In [4]:
# add time features

def add_time_attributes(pid2data):
    for pid in pid2data:
        data = pid2data[pid]

        temp = data.index.to_frame().loc[:, 'ts'].dt
        
        data['day_of_week'] = temp.dayofweek
        data['day_of_week'] = data['day_of_week'].astype(np.float64)

        data['hour'] = temp.hour
        data['hour'] = data['hour'].astype(np.float64)

        data['minute'] = temp.minute
        data['minute'] = data['minute'].astype(np.float64)

        
        data['timestamp'] = temp.hour * 3600 +\
                                temp.minute * 60 +\
                                temp.second
        data['timestamp'] = data['timestamp'].astype(np.float64)
        
        # new ————————————————————————
        seconds_in_day = 24*60*60

        data['sin_time'] = np.sin(2 * np.pi * data.timestamp / seconds_in_day)
        data['cos_time'] = np.cos(2 * np.pi * data.timestamp / seconds_in_day)
        data['sin_time'].astype(np.float64)
        data['cos_time'].astype(np.float64)
        # end ______________________
        
        
    return pid2data

In [5]:
pid2regular_df = add_time_attributes(pid2regular_df)


In [6]:
pid2regular_df[1001].sin_time.describe()

count    15831.000000
mean         0.000045
std          0.707334
min         -0.999977
25%         -0.712001
50%          0.007272
75%          0.712128
max          0.999976
Name: sin_time, dtype: float64

In [7]:
# add discrete y

def discrete(y):
    if y < 70:
        return 0
    elif 70<= y <=180:
        return 1
    elif y > 180:
        return 2
    else:
        return np.nan


def add_discrete_y(pid2data):
    for pid in pid2data:
        data = pid2data[pid]
        data['discrete_y'] = data['glucose_level']

        data['discrete_y'] = data['discrete_y'].apply(discrete)
        
    return pid2data

In [8]:
pid2regular_df = add_discrete_y(pid2regular_df)

In [9]:
pid2regular_df[1001].columns

Index(['glucose_level', 'basal', 'meal', 'bolus', 'correction_bolus',
       'finger_stick', 'EDA', 'SCL', 'SCR', 'HR', 'TEMP', 'ACC', 'RMSSD',
       'SDNN', 'medianNNI', 'CVNNI', 'CVSD', 'pNNX', 'meanHR', 'minHR',
       'maxHR', 'VLF', 'LF', 'HF', 'LHR', 'day_of_week', 'hour', 'minute',
       'timestamp', 'sin_time', 'cos_time', 'discrete_y'],
      dtype='object')

In [10]:
def add_indicate_for_nan(pid2data, ignore_indicate_set):
    for pid in pid2data:
        data = pid2data[pid]
        for attri in data:
            if attri in ignore_indicate_set:
                continue
            data[attri + '_indicate'] = 1
            data.loc[pd.notna(data[attri]), attri + '_indicate'] = 0
            if sum(data[attri + '_indicate']) != len(data[attri]) - sum(pd.notna(data[attri])):
                print('error')
    return pid2data

In [11]:
ignore_indicate_set = ['day_of_week', 'hour', 'minute',
       'timestamp', 'sin_time', 'cos_time',]
pid2regular_df = add_indicate_for_nan(pid2regular_df, ignore_indicate_set)

In [12]:
pid2train = {}
pid2valid = {}
pid2test = {}

for pid in pid_list:
    idxs = pid2regular_df[pid].index[pd.notna(pid2regular_df[pid]['glucose_level'])]

    idxs = idxs.to_frame()

    total_len = len(idxs)
    train_end = int(total_len * 0.6)
    valid_end = int(total_len * 0.8) 

    pid2train[pid] = pid2regular_df[pid].loc[:idxs.iloc[train_end][0]]

    pid2valid[pid] = pid2regular_df[pid].loc[idxs.iloc[train_end + 1][0]:idxs.iloc[valid_end][0]]

    pid2test[pid] = pid2regular_df[pid].loc[idxs.iloc[valid_end + 1][0]:]

    print(pid, total_len, len(pid2train[pid]), len(pid2valid[pid]), len(pid2test[pid]))

1001 15142 9636 3122 3073
1002 12882 7871 2635 2709
1003 14223 8952 2903 3041
1004 11165 6782 2354 2381
1005 13888 8437 2807 2820
1006 13062 8105 2692 2667
1007 11905 8202 2421 2443
1008 14200 8598 2870 2839
1009 13671 8614 2790 2797
1010 12736 7741 2587 2583
1011 12690 7805 2612 2601
1012 14334 9033 3650 3114


In [13]:
# calculate pearson
df = pd.concat(list(pid2train.values()))
df[['glucose_level', 'basal', 'meal', 'bolus', 'correction_bolus',
       'finger_stick', 'EDA', 'SCL', 'SCR', 'HR', 'TEMP', 'ACC', 'RMSSD', 'SDNN',
       'medianNNI', 'CVNNI', 'CVSD', 'pNNX', 'meanHR', 'minHR', 'maxHR', 'VLF',
       'LF', 'HF', 'LHR', 'timestamp']].corr(method ='pearson')['glucose_level'].sort_values()

SCL                -0.035496
medianNNI          -0.034121
SDNN               -0.025722
basal              -0.025605
VLF                -0.021262
LF                 -0.016292
LHR                -0.015930
EDA                -0.012553
timestamp          -0.011617
RMSSD              -0.011113
CVNNI              -0.010650
ACC                -0.009611
pNNX               -0.005775
HF                 -0.002610
CVSD               -0.000801
SCR                 0.003909
TEMP                0.006357
HR                  0.007584
maxHR               0.015800
meanHR              0.033335
minHR               0.033883
meal                0.134624
bolus               0.278311
correction_bolus    0.484299
finger_stick        0.934956
glucose_level       1.000000
Name: glucose_level, dtype: float64

In [14]:
pid2train[pid].columns

Index(['glucose_level', 'basal', 'meal', 'bolus', 'correction_bolus',
       'finger_stick', 'EDA', 'SCL', 'SCR', 'HR', 'TEMP', 'ACC', 'RMSSD',
       'SDNN', 'medianNNI', 'CVNNI', 'CVSD', 'pNNX', 'meanHR', 'minHR',
       'maxHR', 'VLF', 'LF', 'HF', 'LHR', 'day_of_week', 'hour', 'minute',
       'timestamp', 'sin_time', 'cos_time', 'discrete_y',
       'glucose_level_indicate', 'basal_indicate', 'meal_indicate',
       'bolus_indicate', 'correction_bolus_indicate', 'finger_stick_indicate',
       'EDA_indicate', 'SCL_indicate', 'SCR_indicate', 'HR_indicate',
       'TEMP_indicate', 'ACC_indicate', 'RMSSD_indicate', 'SDNN_indicate',
       'medianNNI_indicate', 'CVNNI_indicate', 'CVSD_indicate',
       'pNNX_indicate', 'meanHR_indicate', 'minHR_indicate', 'maxHR_indicate',
       'VLF_indicate', 'LF_indicate', 'HF_indicate', 'LHR_indicate',
       'discrete_y_indicate'],
      dtype='object')

In [15]:
pid_attri2mean_std = {}
attris_should_norm = ['glucose_level', 'basal', 'meal', 'bolus', 'correction_bolus',
       'finger_stick', 'EDA', 'SCL', 'SCR', 'HR', 'TEMP', 'ACC', 'RMSSD', 'SDNN',
       'medianNNI', 'CVNNI', 'CVSD', 'pNNX', 'meanHR', 'minHR', 'maxHR', 'VLF',
       'LF', 'HF', 'LHR', 'timestamp']
for pid in pid_list:
    for attri in attris_should_norm:
        mean = pid2train[pid][attri].mean()
        std = pid2train[pid][attri].std()
        if std is np.NaN or std is pd.NaT or std == 0:
            std = 1e-6
        pid_attri2mean_std[(pid, attri)] = (mean, std)

if not os.path.exists(os.path.join(data_path, process_file)):
    os.makedirs(os.path.join(data_path, process_file))
np.save(os.path.join(data_path, process_file, 'pid_attri2mean_std.npy'), pid_attri2mean_std)

In [16]:
def norm_data(pid2data, pid_attri2mean_std, attris_should_norm):

    new_pid2data = {}
    for pid in pid2data:

        new_pid2data[pid] = copy.deepcopy(pid2data[pid]) 

        for attri in attris_should_norm:

            mean, std = pid_attri2mean_std[(pid, attri)]

            if std <= 1e-6:
                new_pid2data[pid][attri] = new_pid2data[pid][attri] / mean
            else:
                new_pid2data[pid][attri] = (new_pid2data[pid][attri] - mean) / std          

    return new_pid2data

In [17]:
pid2train_norm = norm_data(pid2train, pid_attri2mean_std, attris_should_norm)
pid2valid_norm = norm_data(pid2valid, pid_attri2mean_std, attris_should_norm)
pid2test_norm = norm_data(pid2test, pid_attri2mean_std, attris_should_norm)

In [18]:
attri_list_wo_glucose = [
    'basal', 'meal', 'bolus', 'correction_bolus',
    'finger_stick', 'EDA', 'SCL', 'SCR', 'HR', 'TEMP', 'ACC', 'RMSSD', 'SDNN',
    'medianNNI', 'CVNNI', 'CVSD', 'pNNX', 'meanHR', 'minHR', 'maxHR', 'VLF',
    'LF', 'HF', 'LHR', 'day_of_week', 'hour', 'minute', 'timestamp',
    'sin_time', 'cos_time', 'glucose_level_indicate',
    'basal_indicate', 'meal_indicate', 'bolus_indicate',
    'correction_bolus_indicate', 'finger_stick_indicate', 'EDA_indicate',
    'SCL_indicate', 'SCR_indicate', 'HR_indicate', 'TEMP_indicate',
    'ACC_indicate', 'RMSSD_indicate', 'SDNN_indicate', 'medianNNI_indicate',
    'CVNNI_indicate', 'CVSD_indicate', 'pNNX_indicate', 'meanHR_indicate',
    'minHR_indicate', 'maxHR_indicate', 'VLF_indicate', 'LF_indicate',
    'HF_indicate', 'LHR_indicate' 
]

temp = []

for idx, attri in enumerate(attri_list_wo_glucose):
    temp.append({'attri':attri, 'idx':idx}, )
attri2idx = pd.DataFrame(temp).set_index('attri')
idx2attri = pd.DataFrame(temp).set_index('idx')
print(attri2idx)
print(idx2attri)

save_path = os.path.join(data_path, process_file)

attri2idx.to_pickle(os.path.join(save_path,'attri2idx.pkl'))
idx2attri.to_pickle(os.path.join(save_path,'idx2attri.pkl'))


                           idx
attri                         
basal                        0
meal                         1
bolus                        2
correction_bolus             3
finger_stick                 4
EDA                          5
SCL                          6
SCR                          7
HR                           8
TEMP                         9
ACC                         10
RMSSD                       11
SDNN                        12
medianNNI                   13
CVNNI                       14
CVSD                        15
pNNX                        16
meanHR                      17
minHR                       18
maxHR                       19
VLF                         20
LF                          21
HF                          22
LHR                         23
day_of_week                 24
hour                        25
minute                      26
timestamp                   27
sin_time                    28
cos_time                    29
glucose_

In [19]:
for pid in pid_list:
    pid2train_norm[pid] = pid2train_norm[pid].fillna(0.0)
    pid2valid_norm[pid] = pid2valid_norm[pid].fillna(0.0)
    pid2test_norm[pid] = pid2test_norm[pid].fillna(0.0)

In [20]:
# generate training data


def gen_final(pid2data, attri2idx,  n_prev, pred_window, name, valid_points = 24): 
    pid2data_npy = {}
    for pid in pid_list:
        data = pid2data[pid]
        
        y_list = []
        y_dis_list = []
        target_cos_time_list = []
        target_sin_time_list = []
        target_total_seconds_list = []
        attri_X_list = []

        glucose_level_X_list = []
        
        seq_st_ed_list = []
        
        seq_st, seq_ed = None, None


        for idx in range(len(data) - n_prev - pred_window + 1):
            glucose_level_y = data.iloc[idx + n_prev + pred_window - 1]['glucose_level']
            glucose_level_y_indicate = data.iloc[idx + n_prev + pred_window - 1]['glucose_level_indicate']
            if abs(glucose_level_y_indicate - 1) < 1e-6:
                continue

            # Resonable
            num_valids = int(valid_points - sum(data.iloc[idx + n_prev - valid_points : idx + n_prev]['glucose_level_indicate']))
            if num_valids == 0:
                if seq_st is not None and seq_ed - seq_st >= 2:
                    seq_st_ed_list.append([seq_st, seq_ed])
                seq_st, seq_ed = None, None
                continue
            # end

            
            y_list.append(glucose_level_y)
            y_dis_list.append(data.iloc[idx + n_prev + pred_window - 1]['discrete_y'])
            target_cos_time_list.append(data.iloc[idx + n_prev + pred_window - 1]['cos_time'])
            target_sin_time_list.append(data.iloc[idx + n_prev + pred_window - 1]['sin_time'])
            target_total_seconds_list.append(data.iloc[idx + n_prev + pred_window - 1]['timestamp'])
            glucose_level_X = np.array(data.iloc[idx: idx + n_prev]['glucose_level'].to_numpy(), dtype=np.float32)


            attri_X = np.array(data.iloc[idx: idx + n_prev][list(attri2idx.index)].to_numpy(), dtype=np.float32)          

            glucose_level_X_list.append(np.expand_dims(glucose_level_X, axis=0))

            attri_X_list.append(np.expand_dims(attri_X, axis=0))


            if seq_st is None:
                seq_st = len(y_list) - 1
            else:
                seq_ed = len(y_list) - 1

        if seq_st is not None and seq_ed - seq_st >= 2:
            seq_st_ed_list.append([seq_st, seq_ed])

        pid2data_npy[pid] = {}
        pid2data_npy[pid]['glucose_level_X'] = np.concatenate(glucose_level_X_list, axis=0)

        pid2data_npy[pid]['attri_X'] = np.concatenate(attri_X_list, axis=0)
        pid2data_npy[pid]['y'] = np.array(y_list, dtype=np.float32)
        pid2data_npy[pid]['y_dis'] = np.array(y_dis_list, dtype=np.int32)
        pid2data_npy[pid]['target_cos_time'] = np.array(target_cos_time_list, dtype=np.float32)
        pid2data_npy[pid]['target_sin_time'] = np.array(target_sin_time_list, dtype=np.float32)
        pid2data_npy[pid]['target_timestamp'] = np.array(target_total_seconds_list, dtype=np.float32)
        pid2data_npy[pid]['mean'] = pid_attri2mean_std[(pid, 'glucose_level')][0]
        pid2data_npy[pid]['std'] = pid_attri2mean_std[(pid, 'glucose_level')][1]

        pid2data_npy[pid]['seq_st_ed_list'] = np.array(seq_st_ed_list, dtype=np.int32)

        print(pid, pid2data_npy[pid]['glucose_level_X'].shape, pid2data_npy[pid]['attri_X'].shape)
        print('    ',len(y_list), seq_st_ed_list)
    save_path = os.path.join(data_path, process_file)
    np.save(os.path.join(save_path, f'{name}_{n_prev}_{pred_window}.npy'), pid2data_npy)




In [21]:
gen_final(pid2train_norm, attri2idx,  n_prev=24, pred_window=6, name='train_pid2data_npy', valid_points = 12)

1001 (9002, 24) (9002, 24, 55)
     9002 [[0, 528], [529, 946], [947, 2680], [2681, 2830], [2831, 4654], [4655, 4793], [4794, 5204], [5205, 5472], [5473, 7058], [7059, 7220], [7221, 7768], [7769, 9001]]
1002 (7691, 24) (7691, 24, 55)
     7691 [[0, 1661], [1662, 3986], [3987, 6733], [6734, 6832], [6833, 7690]]
1003 (8442, 24) (8442, 24, 55)
     8442 [[0, 567], [568, 2609], [2610, 2758], [2759, 3418], [3419, 4756], [4757, 5261], [5262, 5464], [5465, 5948], [5949, 7657], [7658, 7894], [7895, 8211], [8212, 8441]]
1004 (6658, 24) (6658, 24, 55)
     6658 [[0, 2825], [2826, 5653], [5654, 5659], [5660, 6657]]
1005 (8286, 24) (8286, 24, 55)
     8286 [[0, 2535], [2536, 5235], [5236, 7881], [7882, 8285]]
1006 (7785, 24) (7785, 24, 55)
     7785 [[0, 1167], [1168, 2608], [2609, 3352], [3353, 5286], [5287, 7784]]
1007 (7034, 24) (7034, 24, 55)
     7034 [[0, 78], [79, 1051], [1052, 1678], [1679, 2559], [2560, 2775], [2776, 3261], [3262, 3760], [3761, 3817], [3818, 3870], [3871, 4051], [4052, 41

In [22]:
gen_final(pid2train_norm, attri2idx,  n_prev=24, pred_window=12, name='train_pid2data_npy', valid_points = 12)

1001 (8956, 24) (8956, 24, 55)
     8956 [[0, 528], [529, 940], [941, 2668], [2669, 2818], [2819, 4638], [4639, 4771], [4772, 5176], [5177, 5438], [5439, 7024], [7025, 7186], [7187, 7728], [7729, 8955]]
1002 (7669, 24) (7669, 24, 55)
     7669 [[0, 1655], [1656, 3974], [3975, 6723], [6724, 6816], [6817, 7668]]
1003 (8391, 24) (8391, 24, 55)
     8391 [[0, 561], [562, 2597], [2598, 2740], [2741, 3394], [3395, 4732], [4733, 5231], [5232, 5428], [5429, 5916], [5917, 7619], [7620, 7855], [7856, 8166], [8167, 8390]]
1004 (6640, 24) (6640, 24, 55)
     6640 [[0, 2819], [2820, 5647], [5648, 6639]]
1005 (8262, 24) (8262, 24, 55)
     8262 [[0, 2529], [2530, 5223], [5224, 7863], [7864, 8261]]
1006 (7755, 24) (7755, 24, 55)
     7755 [[0, 1161], [1162, 2596], [2597, 3334], [3335, 5262], [5263, 7754]]
1007 (6962, 24) (6962, 24, 55)
     6962 [[0, 72], [73, 1040], [1041, 1661], [1662, 2536], [2537, 2746], [2747, 3226], [3227, 3719], [3720, 3776], [3777, 3829], [3830, 4010], [4011, 4108], [4109, 43

In [23]:
gen_final(pid2valid_norm, attri2idx,  n_prev=24, pred_window=6, name='valid_pid2data_npy', valid_points = 12)

1001 (2994, 24) (2994, 24, 55)
     2994 [[0, 1498], [1499, 1966], [1967, 2993]]
1002 (2541, 24) (2541, 24, 55)
     2541 [[0, 1859], [1860, 2540]]
1003 (2810, 24) (2810, 24, 55)
     2810 [[0, 1483], [1484, 2809]]
1004 (2192, 24) (2192, 24, 55)
     2192 [[0, 1349], [1350, 1723], [1724, 2191]]
1005 (2743, 24) (2743, 24, 55)
     2743 [[0, 2289], [2290, 2742]]
1006 (2565, 24) (2565, 24, 55)
     2565 [[0, 261], [262, 265], [266, 269], [270, 2376], [2377, 2564]]
1007 (2350, 24) (2350, 24, 55)
     2350 [[0, 118], [119, 2349]]
1008 (2805, 24) (2805, 24, 55)
     2805 [[0, 2794], [2795, 2804]]
1009 (2705, 24) (2705, 24, 55)
     2705 [[0, 2704]]
1010 (2512, 24) (2512, 24, 55)
     2512 [[0, 834], [835, 2511]]
1011 (2498, 24) (2498, 24, 55)
     2498 [[0, 28], [29, 869], [870, 2497]]
1012 (2831, 24) (2831, 24, 55)
     2831 [[0, 1911], [1912, 2633], [2634, 2830]]


In [24]:
gen_final(pid2valid_norm, attri2idx,  n_prev=24, pred_window=12, name='valid_pid2data_npy', valid_points = 12)

1001 (2979, 24) (2979, 24, 55)
     2979 [[0, 2], [3, 1495], [1496, 1957], [1958, 2978]]
1002 (2529, 24) (2529, 24, 55)
     2529 [[0, 1853], [1854, 2528]]
1003 (2798, 24) (2798, 24, 55)
     2798 [[0, 1477], [1478, 2797]]
1004 (2174, 24) (2174, 24, 55)
     2174 [[0, 1343], [1344, 1711], [1712, 2173]]
1005 (2731, 24) (2731, 24, 55)
     2731 [[0, 2283], [2284, 2730]]
1006 (2560, 24) (2560, 24, 55)
     2560 [[0, 255], [256, 271], [272, 2377], [2378, 2559]]
1007 (2344, 24) (2344, 24, 55)
     2344 [[0, 118], [119, 2343]]
1008 (2793, 24) (2793, 24, 55)
     2793 [[0, 2788], [2789, 2792]]
1009 (2699, 24) (2699, 24, 55)
     2699 [[0, 2698]]
1010 (2503, 24) (2503, 24, 55)
     2503 [[0, 828], [829, 2502]]
1011 (2482, 24) (2482, 24, 55)
     2482 [[0, 24], [25, 859], [860, 2481]]
1012 (2819, 24) (2819, 24, 55)
     2819 [[0, 1911], [1912, 2627], [2628, 2818]]


In [25]:
gen_final(pid2test_norm, attri2idx,  n_prev=24, pred_window=6, name='test_pid2data_npy', valid_points = 12)

1001 (2992, 24) (2992, 24, 55)
     2992 [[0, 1149], [1150, 1354], [1355, 2991]]
1002 (2540, 24) (2540, 24, 55)
     2540 [[0, 409], [410, 634], [635, 2539]]
1003 (2797, 24) (2797, 24, 55)
     2797 [[0, 422], [423, 1447], [1448, 1955], [1956, 2796]]
1004 (2191, 24) (2191, 24, 55)
     2191 [[0, 1884], [1885, 2083], [2084, 2190]]
1005 (2739, 24) (2739, 24, 55)
     2739 [[0, 1880], [1881, 2334], [2335, 2738]]
1006 (2577, 24) (2577, 24, 55)
     2577 [[0, 456], [457, 2576]]
1007 (2342, 24) (2342, 24, 55)
     2342 [[0, 82], [83, 205], [206, 2341]]
1008 (2810, 24) (2810, 24, 55)
     2810 [[0, 2809]]
1009 (2699, 24) (2699, 24, 55)
     2699 [[0, 18], [19, 2698]]
1010 (2512, 24) (2512, 24, 55)
     2512 [[0, 896], [897, 2511]]
1011 (2496, 24) (2496, 24, 55)
     2496 [[0, 1188], [1189, 2197], [2198, 2495]]
1012 (2825, 24) (2825, 24, 55)
     2825 [[0, 2396], [2397, 2573], [2574, 2824]]


In [26]:
gen_final(pid2test_norm, attri2idx,  n_prev=24, pred_window=12, name='test_pid2data_npy', valid_points = 12)

1001 (2980, 24) (2980, 24, 55)
     2980 [[0, 1143], [1144, 1348], [1349, 2979]]
1002 (2528, 24) (2528, 24, 55)
     2528 [[0, 403], [404, 628], [629, 2527]]
1003 (2773, 24) (2773, 24, 55)
     2773 [[0, 416], [417, 1435], [1436, 1937], [1938, 2772]]
1004 (2173, 24) (2173, 24, 55)
     2173 [[0, 1878], [1879, 2071], [2072, 2172]]
1005 (2727, 24) (2727, 24, 55)
     2727 [[0, 1880], [1881, 2328], [2329, 2726]]
1006 (2565, 24) (2565, 24, 55)
     2565 [[0, 450], [451, 2564]]
1007 (2330, 24) (2330, 24, 55)
     2330 [[0, 76], [77, 199], [200, 2329]]
1008 (2804, 24) (2804, 24, 55)
     2804 [[0, 2803]]
1009 (2687, 24) (2687, 24, 55)
     2687 [[0, 12], [13, 2686]]
1010 (2500, 24) (2500, 24, 55)
     2500 [[0, 890], [891, 2499]]
1011 (2480, 24) (2480, 24, 55)
     2480 [[0, 1182], [1183, 2187], [2188, 2479]]
1012 (2807, 24) (2807, 24, 55)
     2807 [[0, 2390], [2391, 2561], [2562, 2806]]
