## Loading the bike-sharing data and change it into tensor

In [76]:
import os
import numpy as np
import torch
from pathlib import Path

In [77]:
# BASE_PATH = str(Path(__file__).resolve().parent.parent.parent) # BASE_PATH: /Users/yhhan/git/link_dl
# import sys
# sys.path.append(BASE_PATH)
os.getcwd()

'c:\\git_deeplearning\\DeepLearning\\_03_your_code\\_03_real_world_data_to_tensors'

In [86]:
torch.set_printoptions(edgeitems=2, threshold=50, linewidth=75)

bikes_path = os.path.join(os.path.pardir, os.path.pardir, "_00_data", "e_time-series-bike-sharing-dataset", "hour-fixed.csv")

bikes_numpy = np.loadtxt(
  fname=bikes_path, dtype=np.float32, delimiter=",", skiprows=1,
  converters={
    1: lambda x: float(x[8:10])  # 2011-01-07 --> 07 --> 7
  }
)
bikes_data = torch.from_numpy(bikes_numpy).to(torch.float)
print(bikes_data.shape)    # >>> torch.Size([17520, 17]) : 17520 시간
bikes_target = bikes_data[:, -1].unsqueeze(dim=-1)  # 'cnt'
bikes_data = bikes_data[:, :-1]   # >>> torch.Size([17520, 16])
print(bikes_target.shape)

torch.Size([17520, 17])
torch.Size([17520, 1])


## For all data, change nominal data by one-hot encoding

In [79]:
eye_matrix = torch.eye(4)

data_torch_list = []
for idx in range(bikes_data.shape[0]):  # range(730)
  hour_data = bikes_data[idx]  # hour_data.shape: [17]
  weather_onehot = eye_matrix[hour_data[9].long() - 1]
  concat_data_torch = torch.cat(tensors=(hour_data, weather_onehot), dim=-1) # hour_data : [20], weather_onehot : [4]
  # concat_data_torch.shape: [20]
  data_torch_list.append(concat_data_torch)

bikes_data = torch.stack(data_torch_list, dim=0)
bikes_data = torch.cat([bikes_data[:, 1:9], bikes_data[:, 10:]], dim=-1)
# Drop 'instant' and 'whethersit' columns

print(bikes_data.shape)
print(bikes_data[0])

torch.Size([17520, 18])
tensor([ 1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
         0.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
         1.0000,  0.0000,  0.0000,  0.0000])


## Data size: train size, validation size, test size

In [80]:
sequence_size = 24 # 24hrs, 1day를 하나의 sequence로
validation_size = 96
test_size = 24
y_normalizer = 100

data_size = len(bikes_data) - sequence_size + 1
print("data_size: {0}".format(data_size))
train_size = data_size - (validation_size + test_size)
print("train_size: {0}, validation_size: {1}, test_size: {2}".format(train_size, validation_size, test_size))

data_size: 17497
train_size: 17377, validation_size: 96, test_size: 24


## Train Data Preparation

In [81]:
row_cursor = 0

X_train_list = []
y_train_regression_list = []
for idx in range(0, train_size):
  sequence_data = bikes_data[idx: idx + sequence_size]
  sequence_target = bikes_target[idx + sequence_size - 1]
  X_train_list.append(sequence_data)
  y_train_regression_list.append(sequence_target)
  row_cursor += 1

X_train = torch.stack(X_train_list, dim=0).to(torch.float)
print(X_train.shape)
y_train_regression = torch.tensor(y_train_regression_list, dtype=torch.float32) / y_normalizer
print(len(y_train_regression))

m = X_train.mean(dim=0, keepdim=True)
s = X_train.std(dim=0, keepdim=True)
X_train = (X_train - m) / s

print(X_train.shape, y_train_regression.shape)
# >>> torch.Size([17376, 24, 16]) torch.Size([17377])

torch.Size([17377, 24, 18])
17377
torch.Size([17377, 24, 18]) torch.Size([17377])


In [82]:
row_cursor, row_cursor + 96

(17377, 17473)

## Validation Data Preparation

In [83]:
X_validation_list = []
y_validation_regression_list = []
for idx in range(row_cursor, row_cursor + validation_size):
  sequence_data = bikes_data[idx: idx + sequence_size]
  sequence_target = bikes_target[idx + sequence_size - 1]
  X_validation_list.append(sequence_data)
  y_validation_regression_list.append(sequence_target)
  row_cursor += 1

X_validation = torch.stack(X_validation_list, dim=0).to(torch.float)
y_validation_regression = torch.tensor(y_validation_regression_list, dtype=torch.float32) / y_normalizer

X_validation = (X_validation - m) / s

print(X_validation.shape, y_validation_regression.shape)
# >>> torch.Size([96, 24, 19]) torch.Size([96])

torch.Size([96, 24, 18]) torch.Size([96])


In [84]:
row_cursor, row_cursor + test_size

(17473, 17497)

## Test Data Preparation

In [85]:
X_test_list = []
y_test_regression_list = []
for idx in range(row_cursor, row_cursor + test_size):
  sequence_data = bikes_data[idx: idx + test_size]
  sequence_target = bikes_target[idx + test_size - 1]
  X_test_list.append(sequence_data)
  y_test_regression_list.append(sequence_target)
  row_cursor += 1

X_test = torch.stack(X_test_list, dim=0).to(torch.float)
y_test_regression = torch.tensor(y_test_regression_list, dtype=torch.float32) / y_normalizer

X_test -= (X_test - m) / s

print(X_test.shape, y_test_regression.shape)
# >>> torch.Size([24, 24, 18]) torch.Size([24])

torch.Size([24, 24, 18]) torch.Size([24])
