In [7]:
import numpy as np
import torch

# we have a dataset of bike-sharing with statistics per hour of the service
bikes_numpy = np.loadtxt("../../dlwpt-code/data/p1ch4/bike-sharing-dataset/hour-fixed.csv",
                         dtype=np.float32,
                         delimiter=",",
                         skiprows=1,
                         converters={1: lambda x: float(x[8:10])}) # convert date strings to numbers corresponding to the day of the month in column 1

bikes = torch.from_numpy(bikes_numpy)
bikes, bikes.shape, bikes.stride()

(tensor([[1.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 3.0000e+00, 1.3000e+01,
          1.6000e+01],
         [2.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 8.0000e+00, 3.2000e+01,
          4.0000e+01],
         [3.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 5.0000e+00, 2.7000e+01,
          3.2000e+01],
         ...,
         [1.7377e+04, 3.1000e+01, 1.0000e+00,  ..., 7.0000e+00, 8.3000e+01,
          9.0000e+01],
         [1.7378e+04, 3.1000e+01, 1.0000e+00,  ..., 1.3000e+01, 4.8000e+01,
          6.1000e+01],
         [1.7379e+04, 3.1000e+01, 1.0000e+00,  ..., 1.2000e+01, 3.7000e+01,
          4.9000e+01]]),
 torch.Size([17520, 17]),
 (17, 1))

In [12]:
# we have 17520 hours of data for 17 columns (features collected), we want daily batches N*L*C (sequences*hours*channels)
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
print(daily_bikes.shape, daily_bikes.stride())
daily_bikes = daily_bikes.transpose(1, 2)
print(daily_bikes.shape, daily_bikes.stride())

torch.Size([730, 24, 17]) (408, 17, 1)
torch.Size([730, 17, 24]) (408, 1, 17)


In [18]:
# we decide to encode wheather column (9) that have values 1 - good to 4 - bad into one-hot-encoding
# for better reading on a single day
first_day = bikes[:24].long()
wheather_onehot = torch.zeros(first_day.shape[0], 4)
wheather_onehot.scatter_(dim=1, index=first_day[:,9].unsqueeze(1).long() - 1, value = 1.0)
print(wheather_onehot[:2])

# concat day with scatter matrix on dimension 1
torch.cat((bikes[:24], wheather_onehot), 1)[0]

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.]])


tensor([ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
         0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
        16.0000,  1.0000,  0.0000,  0.0000,  0.0000])

In [24]:
# harder to read, on the whole dataset
daily_wheather_onehot = torch.zeros(daily_bikes.shape[0], 4, daily_bikes.shape[2])
daily_wheather_onehot.scatter_(dim=1, index=daily_bikes[:,9,:].unsqueeze(1).long() - 1, value=1.0)

daily_bikes = torch.cat((daily_bikes, daily_wheather_onehot), 1)
daily_bikes.shape

torch.Size([730, 21, 24])