In [1]:
import h5py
import numpy as np

In [2]:
def add_window_horizon(data, window=12, horizon=12, single=False):
    '''
    :param data: shape [B, ...]
    :param window:
    :param horizon:
    :return: X is [B, W, ...], Y is [B, H, ...]
    '''
    length = len(data)
    end_index = length - horizon - window + 1
    X = []      #windows
    Y = []      #horizon
    index = 0
    if single:
        while index < end_index:
            X.append(data[index:index+window])
            Y.append(data[index+window+horizon-1:index+window+horizon])
            index = index + 1
    else:
        while index < end_index:
            X.append(data[index:index+window])
            Y.append(data[index+window:index+window+horizon])
            index = index + 1
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

def split_data_by_ratio(data, val_ratio, test_ratio):
    data_len = data.shape[0]
    test_data = data[-int(data_len*test_ratio):]
    val_data = data[-int(data_len*(test_ratio+val_ratio)):-int(data_len*test_ratio)]
    train_data = data[:-int(data_len*(test_ratio+val_ratio))]
    return train_data, val_data, test_data

def split_data_by_day(data, val_day, test_day, interval_per_day):
    test_data = data[-interval_per_day * test_day:]
    val_data = data[-interval_per_day * (test_day + val_day): -interval_per_day * test_day]
    train_data = data[:-interval_per_day * (test_day + val_day)]
    return train_data, val_data, test_data

def split_train_val_test(data_name, type):
    print('='*30 + f' {data_name} ' + '='*30)
    data = h5py.File(f'../data/h5data/{data_name}.h5')
    pick = np.expand_dims(data[type + '_pick'][:], axis=-1)
    drop = np.expand_dims(data[type + '_drop'][:], axis=-1)
    new_data = np.concatenate([pick, drop], axis=-1)
    print(f'Data shape: {new_data.shape}')

    x, y = add_window_horizon(new_data)
    print(x.shape, y.shape)
    x_train, x_val, x_test = split_data_by_day(x, 14, 14, 48)
    y_train, y_val, y_test = split_data_by_day(y, 14, 14, 48)
    print(f'Shape of x_train, x_val, x_test: {x_train.shape}, {x_val.shape}, {x_test.shape}')
    
    np.savez_compressed(f'../data/{data_name}/train.npz', x=x_train, y=y_train)
    np.savez_compressed(f'../data/{data_name}/val.npz', x=x_val, y=y_val)
    np.savez_compressed(f'../data/{data_name}/test.npz', x=x_test, y=y_test)

In [3]:
split_train_val_test('bos-bike', 'bike')
split_train_val_test('bay-bike', 'bike')
split_train_val_test('dc-bike', 'bike')

Data shape: (4416, 201, 2)
(4393, 12, 201, 2) (4393, 12, 201, 2)
Shape of x_train, x_val, x_test: (3049, 12, 201, 2), (672, 12, 201, 2), (672, 12, 201, 2)
Data shape: (4416, 133, 2)
(4393, 12, 133, 2) (4393, 12, 133, 2)
Shape of x_train, x_val, x_test: (3049, 12, 133, 2), (672, 12, 133, 2), (672, 12, 133, 2)
Data shape: (4416, 117, 2)
(4393, 12, 117, 2) (4393, 12, 117, 2)
Shape of x_train, x_val, x_test: (3049, 12, 117, 2), (672, 12, 117, 2), (672, 12, 117, 2)
