# 读取时间序列

In [1]:
import torch
import numpy as np

torch.set_printoptions(edgeitems=2, threshold=50)

In [2]:
bikes_numpy = np.loadtxt(
    "../../myData/p1ch4/bike-sharing-dataset/hour-fixed.csv",
    dtype=np.float32,
    delimiter=",",
    skiprows=1,
    converters={1: lambda x: float(x[8:10])},
)

In [3]:
import csv

reader = csv.reader(
    open("../../myData/p1ch4/bike-sharing-dataset/hour-fixed.csv", "rt")
)
column_names = next(reader)
for idx, name in enumerate(column_names):
    print(idx, name)

0 instant
1 dteday
2 season
3 yr
4 mnth
5 hr
6 holiday
7 weekday
8 workingday
9 weathersit
10 temp
11 atemp
12 hum
13 windspeed
14 casual
15 registered
16 cnt


In [4]:
import pandas

data = pandas.read_csv(
    "../../myData/p1ch4/bike-sharing-dataset/hour-fixed.csv", delimiter=","
)
data.head(5)

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [5]:
bikes = torch.from_numpy(bikes_numpy)
bikes, bikes.shape

(tensor([[1.0000e+00, 1.0000e+00,  ..., 1.3000e+01, 1.6000e+01],
         [2.0000e+00, 1.0000e+00,  ..., 3.2000e+01, 4.0000e+01],
         ...,
         [1.7378e+04, 3.1000e+01,  ..., 4.8000e+01, 6.1000e+01],
         [1.7379e+04, 3.1000e+01,  ..., 3.7000e+01, 4.9000e+01]]),
 torch.Size([17520, 17]))

In [6]:
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape

torch.Size([730, 24, 17])

In [7]:
daily_bikes.stride()

(408, 17, 1)

In [8]:
# daily_bikes = daily_bikes.permute(0, 2, 1) # 也可以使用permute来重新排列维度
daily_bikes = daily_bikes.transpose(1, 2)
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 17, 24]), (408, 1, 17))

In [9]:
first_day = bikes[:24].to(torch.int64)
weather_onehot = torch.zeros(first_day.shape[0], 4)
first_day[:, 9], first_day.shape

(tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2]),
 torch.Size([24, 17]))

In [10]:
weather_range = torch.unique(bikes[:, 9])
weather_range

tensor([1., 2., 3., 4.])

In [11]:
weather_onehot.scatter_(
    dim=1, index=first_day[:, 9].unsqueeze(1).to(torch.int64) - 1, value=1.0
)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [13]:
weather_onehot.shape

torch.Size([24, 4])

In [14]:
torch.cat((bikes[:24], weather_onehot), dim=1)[:1], torch.cat(
    (bikes[:24], weather_onehot), dim=1
).shape

(tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
           0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
          16.0000,  1.0000,  0.0000,  0.0000,  0.0000]]),
 torch.Size([24, 21]))

In [15]:
daily_weather_onehot = torch.zeros(daily_bikes.shape[0], 4, daily_bikes.shape[2])
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [16]:
daily_weather_onehot.scatter_(
    dim=1, index=daily_bikes[:, 9, :].to(torch.int64).unsqueeze(1) - 1, value=1.0
)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [17]:
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)
daily_bikes.shape

torch.Size([730, 21, 24])

In [None]:
# 离差标准化，也叫归一化，或者最小-最大缩放
daily_bikes[:, 9, :] = (daily_bikes[:, 9, :] - 1.0) / 3.0

In [18]:
temp = daily_bikes[:, 10, :]
temp_min = torch.min(temp)
temp_max = torch.max(temp)
daily_bikes[:, 10, :] = (daily_bikes[:, 10, :] - temp_min) / (temp_max - temp_min)

In [19]:
# 标准化
temp = daily_bikes[:, 10, :]
daily_bikes[:, 10, :] = (temp - torch.mean(temp)) / torch.std(temp)