# time series

In [16]:
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, threshold=50, linewidth=75)

In [17]:
import pandas

df = pandas.read_csv("../data/p1ch4/bike-sharing-dataset/hour-fixed.csv")
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [18]:
df.iloc[:, 1] = df.iloc[:, 1].apply(lambda x: float(x[8:10]))

In [19]:
bikes_numpy = df.values.astype(np.float32)

可以用:

```python
bikes_numpy = np.loadtxt(
    "../data/p1ch4/bike-sharing-dataset/hour-fixed.csv", 
    dtype=np.float32, 
    delimiter=",", 
    skiprows=1, 
    converters={1: lambda x: float(x[8:10])}) # <1>
```

其中对于`converters={1: lambda x: float(x[8:10])})`的解释如下：

这段代码是在使用pandas的`read_csv`函数的`converters`参数。`converters`参数接受一个字典，字典的键是列的索引或名称，值是一个函数，这个函数会应用于该列的每个值。

在这个例子中，`converters={1: lambda x: float(x[8:10])}`表示对第二列（索引为1）的每个值执行`lambda x: float(x[8:10])`函数。

这个函数是一个lambda表达式，它接受一个参数`x`，并返回`x[8:10]`的浮点数值。`x[8:10]`表示取`x`的第9个和第10个字符（Python的索引从0开始）。这通常用于处理日期和时间的字符串，例如`"YYYY-MM-DD HH:MM:SS"`，其中`x[8:10]`会取出日期的日部分。

所以，这段代码的作用是将CSV文件的第二列的每个值的第9个和第10个字符转换为浮点数。

In [20]:
bikes = torch.from_numpy(bikes_numpy)
bikes

tensor([[1.0000e+00, 1.0000e+00,  ..., 1.3000e+01, 1.6000e+01],
        [2.0000e+00, 1.0000e+00,  ..., 3.2000e+01, 4.0000e+01],
        ...,
        [1.7378e+04, 3.1000e+01,  ..., 4.8000e+01, 6.1000e+01],
        [1.7379e+04, 3.1000e+01,  ..., 3.7000e+01, 4.9000e+01]])

In [21]:
bikes.shape, bikes.stride()

(torch.Size([17520, 17]), (1, 17520))

In [22]:
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 24, 17]), (24, 1, 17520))

这段代码是在使用PyTorch的`view`方法来改变张量`bikes`的形状。

`view`方法返回一个新的张量，这个张量与原张量共享数据，但有不同的形状。

在这个例子中，`view(-1, 24, bikes.shape[1])`将`bikes`的形状改为`(-1, 24, bikes.shape[1])`。这里的`-1`表示该维度的大小会自动计算，以保证新张量的元素总数与原张量相同。`24`和`bikes.shape[1]`是新张量的第二和第三维度的大小。

所以，这段代码的作用是将`bikes`的形状改为`(n, 24, bikes.shape[1])`，其中`n`是自动计算的，`24`是每天的小时数，`bikes.shape[1]`是原张量的列数。这通常用于处理时间序列数据，其中每一行代表一个小时的数据，每一列代表一个特征。

- `n`: 天数
- `24`: 每天的小时数
- `bikes.shape[1]`: 原张量的列数

In [23]:
daily_bikes = daily_bikes.transpose(1, 2) # 调换第2维和第3维
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 17, 24]), (24, 17520, 1))

In [24]:
first_day = bikes[:24].long()
weather_onehot = torch.zeros(first_day.shape[0], 4)
first_day[:, 9], first_day.shape, weather_onehot.shape

(tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2,
         2, 2]),
 torch.Size([24, 17]),
 torch.Size([24, 4]))

In [25]:
weather_onehot.scatter_(
    dim=1, 
    index=first_day[:,9].unsqueeze(1).long() - 1, # <1>
    value=1.0)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [26]:
(torch.cat((bikes[:24], weather_onehot), 1)[:1], 
 torch.cat((bikes[:24], weather_onehot), 1).shape)

(tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,
           6.0000,  0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,
           3.0000, 13.0000, 16.0000,  1.0000,  0.0000,  0.0000,  0.0000]]),
 torch.Size([24, 21]))

In [27]:
daily_weather_onehot = torch.zeros(daily_bikes.shape[0], 4,
                                   daily_bikes.shape[2])
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [28]:
daily_weather_onehot.scatter_(
    1, daily_bikes[:,9,:].long().unsqueeze(1) - 1, 1.0)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [29]:
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)
daily_bikes[0, :, 1], daily_bikes.shape

(tensor([ 2.0000,  1.0000,  1.0000,  0.0000,  1.0000,  1.0000,  0.0000,
          6.0000,  0.0000,  1.0000,  0.2200,  0.2727,  0.8000,  0.0000,
          8.0000, 32.0000, 40.0000,  1.0000,  0.0000,  0.0000,  0.0000]),
 torch.Size([730, 21, 24]))

In [30]:
daily_bikes[:, 9, :] = (daily_bikes[:, 9, :] - 1.0) / 3.0

In [31]:
temp = daily_bikes[:, 10, :]
temp_min = torch.min(temp)
temp_max = torch.max(temp)
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - temp_min)
                         / (temp_max - temp_min))

In [32]:
temp = daily_bikes[:, 10, :]
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - torch.mean(temp))
                         / torch.std(temp))