In [1]:
import pandas as pd
import os, time
import torch
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torch.nn as nn

In [2]:
data_path = '../data/' 

location_path = os.path.join(data_path, 'sdwpf_baidukddcup2022_turb_location.CSV')
location      = pd.read_csv(location_path)
location.describe()

Unnamed: 0,TurbID,x,y
count,134.0,134.0,134.0
mean,67.5,2666.760881,5986.126609
std,38.826537,1829.842484,3346.503906
min,1.0,0.0,0.0
25%,34.25,1014.935725,3182.294825
50%,67.5,3246.54185,5841.89434
75%,100.75,4320.609625,8814.83295
max,134.0,5501.4529,12121.00426


In [3]:
df_train = pd.read_csv(os.path.join(data_path, 'train/wtbdata_245days.csv')) 
print(df_train.shape)
print(df_train.columns)
df_train.head()

(4727520, 13)
Index(['TurbID', 'Day', 'Tmstamp', 'Wspd', 'Wdir', 'Etmp', 'Itmp', 'Ndir',
       'Pab1', 'Pab2', 'Pab3', 'Prtv', 'Patv'],
      dtype='object')


Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv
0,1,1,00:00,,,,,,,,,,
1,1,1,00:10,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66
2,1,1,00:20,6.27,-2.18,30.6,41.63,20.91,1.0,1.0,1.0,-0.24,509.76
3,1,1,00:30,6.42,-0.73,30.52,41.52,20.91,1.0,1.0,1.0,-0.26,542.53
4,1,1,00:40,6.25,0.89,30.49,41.38,20.91,1.0,1.0,1.0,-0.23,509.36


In [4]:
df_train = pd.merge(df_train, location, on='TurbID', how='left')
print(df_train.shape)
print(df_train.columns)
df_train.head()

(4727520, 15)
Index(['TurbID', 'Day', 'Tmstamp', 'Wspd', 'Wdir', 'Etmp', 'Itmp', 'Ndir',
       'Pab1', 'Pab2', 'Pab3', 'Prtv', 'Patv', 'x', 'y'],
      dtype='object')


Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv,x,y
0,1,1,00:00,,,,,,,,,,,3349.8515,5939.23193
1,1,1,00:10,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66,3349.8515,5939.23193
2,1,1,00:20,6.27,-2.18,30.6,41.63,20.91,1.0,1.0,1.0,-0.24,509.76,3349.8515,5939.23193
3,1,1,00:30,6.42,-0.73,30.52,41.52,20.91,1.0,1.0,1.0,-0.26,542.53,3349.8515,5939.23193
4,1,1,00:40,6.25,0.89,30.49,41.38,20.91,1.0,1.0,1.0,-0.23,509.36,3349.8515,5939.23193


In [5]:
df_train.describe()

Unnamed: 0,TurbID,Day,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv,x,y
count,4727520.0,4727520.0,4678002.0,4678002.0,4678002.0,4678002.0,4678002.0,4678002.0,4678002.0,4678002.0,4678002.0,4678002.0,4727520.0,4727520.0
mean,67.5,123.0,5.028376,0.4975428,41.10668,27.39717,188.5826,26.854,26.83916,26.82679,-13.239,350.4458,2666.761,5986.127
std,38.68139,70.72483,3.393703,31.60275,85.29011,18.32832,163.2459,38.83566,38.8294,38.82235,70.44244,424.9932,1823.002,3333.994
min,1.0,1.0,0.0,-3030.46,-273.03,-273.17,-884.86,-10.0,-10.0,-10.0,-624.98,-9.33,0.0,0.0
25%,34.0,62.0,2.24,-3.83,14.05,20.06,63.02,0.01,0.01,0.01,-38.61,-0.3,1014.317,3134.452
50%,67.5,123.0,4.34,-0.23,25.73,30.08,194.79,0.54,0.54,0.54,-0.3,179.08,3246.542,5841.894
75%,101.0,184.0,7.01,3.42,32.73,36.82,321.96,83.99,83.98,83.98,-0.03,547.66,4320.683,8816.238
max,134.0,245.0,26.29,2266.95,394.33,324.21,700.62,99.98,99.98,99.98,485.2,1567.02,5501.453,12121.0


In [6]:
print(df_train['Patv'].min())
print(df_train['Ndir'].min(), df_train['Ndir'].max())
df_train['TurbID'].unique()

-9.33
-884.86 700.62


array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
       131, 132, 133, 134], dtype=int64)

### 数据预处理
1. 用下一个值填补缺失值
2. 将小于0的功率Patv值替换为0.

In [7]:
print(df_train.columns)
print(df_train.select_dtypes(include='number').columns)
print('总天数：', len(df_train['Day'].unique()))

Index(['TurbID', 'Day', 'Tmstamp', 'Wspd', 'Wdir', 'Etmp', 'Itmp', 'Ndir',
       'Pab1', 'Pab2', 'Pab3', 'Prtv', 'Patv', 'x', 'y'],
      dtype='object')
Index(['TurbID', 'Day', 'Wspd', 'Wdir', 'Etmp', 'Itmp', 'Ndir', 'Pab1', 'Pab2',
       'Pab3', 'Prtv', 'Patv', 'x', 'y'],
      dtype='object')
总天数： 245


In [8]:
print(f'缺失值个数： {df_train.isna().sum().sum()}')
df_train.fillna(method='bfill', inplace=True)
print(f'缺失值个数： {df_train.isna().sum().sum()}')
df_train.head()

缺失值个数： 495180
缺失值个数： 0


Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv,x,y
0,1,1,00:00,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66,3349.8515,5939.23193
1,1,1,00:10,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66,3349.8515,5939.23193
2,1,1,00:20,6.27,-2.18,30.6,41.63,20.91,1.0,1.0,1.0,-0.24,509.76,3349.8515,5939.23193
3,1,1,00:30,6.42,-0.73,30.52,41.52,20.91,1.0,1.0,1.0,-0.26,542.53,3349.8515,5939.23193
4,1,1,00:40,6.25,0.89,30.49,41.38,20.91,1.0,1.0,1.0,-0.23,509.36,3349.8515,5939.23193


In [9]:
df_train['Patv'] = df_train['Patv'].apply(lambda x: max(0, x))
print(df_train['Patv'].min())

0.0


## 逐个构建训练集

In [10]:
df_train['TurbID'].unique()

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
       131, 132, 133, 134], dtype=int64)

In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler

class Scaler(object):
    def __init__(self):
        self.scaler = StandardScaler()

    def fit(self, data):
        self.scaler.fit(data)

    def transform(self, data):
        return torch.tensor(self.scaler.transform(data), dtype=torch.float32)

    def inverse_transform(self, data):
        return torch.tensor(self.scaler.inverse_transform(data), dtype=torch.float32)

class WindTurbineDataset(Dataset):
    """
    Desc: Data preprocessing,
          Here, e.g.    15 days for training,
                        3 days for validation,
                        and 6 days for testing
    """
    def __init__(self, data_path,
                 filename='my.csv',
                 flag='train',
                 size=None,
                 turbine_id=0,
                 task='MS',
                 target='Patv',
                 scale=True,
                 start_col=2,       # the start column index of the data one aims to utilize
                 day_len=24 * 6,
                 train_days=15,     # 15 days
                 val_days=3,        # 3 days
                 test_days=6,       # 6 days
                 total_days=30      # 30 days
                 ):
        super().__init__()
        self.unit_size = day_len
        if size is None:
            self.input_len = self.unit_size
            self.output_len = self.unit_size
        else:
            self.input_len = size[0]
            self.output_len = size[1]
        # initialization
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type  = type_map[flag]
        self.task      = task
        self.target    = target
        self.scale     = scale
        self.start_col = start_col
        self.data_path = data_path
        self.filename  = filename
        self.tid       = turbine_id

        # If needed, we employ the predefined total_size (e.g. one month)
        self.total_size = self.unit_size * total_days
        #
        self.train_size = train_days * self.unit_size
        self.val_size   = val_days * self.unit_size
        self.test_size  = test_days * self.unit_size
        # self.test_size = self.total_size - train_size - val_size
        #
        # Or, if total_size is unavailable:
        # self.total_size = self.train_size + self.val_size + self.test_size
        self.__read_data__()

    def __read_data__(self):
        self.scaler = Scaler()
        df_raw = pd.read_csv(os.path.join(self.data_path, self.filename))
        
        
        border1s = [self.tid * self.total_size,
                    self.tid * self.total_size + self.train_size - self.input_len,
                    self.tid * self.total_size + self.train_size + self.val_size - self.input_len
                    ]
        border2s = [self.tid * self.total_size + self.train_size,
                    self.tid * self.total_size + self.train_size + self.val_size,
                    self.tid * self.total_size + self.train_size + self.val_size + self.test_size
                    ]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        df_data = df_raw
        if self.task == 'M':
            cols_data = df_raw.columns[self.start_col:]
            df_data = df_raw[cols_data]
        elif self.task == 'MS':
            cols_data = df_raw.columns[self.start_col:]
            df_data = df_raw[cols_data]
        elif self.task == 'S': 
            df_data = df_raw[[self.target]]

        # Turn off the SettingWithCopyWarning
        pd.set_option('mode.chained_assignment', None)
        # df_data.replace(to_replace=np.nan, value=0, inplace=True)
        df_data.fillna(method='bfill', inplace=True)

        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values
        self.data_x   = data[border1:border2]
        self.data_y   = data[border1:border2]
        self.raw_data = df_data[border1 + self.input_len:border2]

    def get_raw_data(self):
        return self.raw_data

    def __getitem__(self, index):
        #
        # Only for customized use.
        # When sliding window not used, e.g. prediction without overlapped input/output sequences
        if self.set_type >= 3:
            index = index * self.output_len
        #
        # Standard use goes here.
        # Sliding window with the size of input_len + output_len
        s_begin = index
        s_end = s_begin + self.input_len
        r_begin = s_end
        r_end = r_begin + self.output_len
        seq_x = self.data_x[s_begin:s_end]
        self.target_col = self.raw_data.columns.get_loc(self.target)
        seq_y = self.data_y[r_begin:r_end, self.target_col]
        return seq_x, seq_y

    def __len__(self):
        # In our case, the sliding window is adopted, the number of samples is calculated as follows
        if self.set_type < 3:
            return len(self.data_x) - self.input_len - self.output_len + 1
        # Otherwise, if sliding window is not adopted
        return int((len(self.data_x) - self.input_len) / self.output_len)

    def inverse_transform(self, data):
        if data.ndim > 1:
            return self.scaler.inverse_transform(data)
        else:
            # 逆标准化y
            num_features = self.raw_data.shape[1]
            tmp = torch.ones(len(data), num_features - 1)
            tmp = torch.cat([tmp, data.reshape(-1, 1)], dim=1)
            # Note: 默认最后一列为目标变量
            y_inverse = self.scaler.inverse_transform(tmp)[:, -1] 
    
            return y_inverse

In [12]:
data_path  = '../data/train/'
filename   = 'wtbdata_245days.csv'
flag       = 'train'
input_len  = 288
output_len = 288
size       = [input_len, output_len]
task       = 'MS'
target     = 'Patv'
start_col  = 3
turbine_id = 0
day_len    = 144
train_days = 240
val_days   = 3
test_days  = 2
total_days = 245

data_train = WindTurbineDataset(
    data_path  = data_path,
    filename   = filename,
    flag       = flag,
    size       = size,
    task       = task,
    target     = target,
    start_col  = start_col,
    turbine_id = turbine_id,
    day_len    = day_len,
    train_days = train_days,
    val_days   = val_days,
    test_days  = test_days,
    total_days = total_days
)

In [13]:
x1, y1 = data_train[0]
print(len(data_train))
print(x1.shape, y1.shape)
x1

33985
torch.Size([288, 10]) torch.Size([288])


tensor([[ 0.1945, -0.1354,  0.5203,  ..., -0.2526,  0.0676,  0.0513],
        [ 0.1945, -0.1354,  0.5203,  ..., -0.2526,  0.0676,  0.0513],
        [ 0.2220, -0.0859,  0.5129,  ..., -0.2526,  0.0678,  0.0798],
        ...,
        [ 1.7905,  0.0162,  0.3305,  ..., -0.2491,  0.0692,  2.0438],
        [ 1.7411, -0.0211,  0.3192,  ..., -0.2496,  0.0692,  2.0436],
        [ 1.7301, -0.0777,  0.3079,  ..., -0.2496,  0.0695,  2.0431]])

In [14]:
y1

tensor([ 2.0433,  1.9946,  1.9480,  2.0323,  1.9579,  1.9507,  2.0446,  2.0440,
         2.0436,  2.0431,  1.9301,  1.9539,  1.4521,  0.0615,  0.4948,  0.7868,
         0.6457,  1.0694,  1.2208,  0.9012,  1.1156,  1.2710,  1.5628,  2.0443,
         2.0036,  1.8960,  2.0435,  2.0436,  1.9649,  2.0056,  2.0435,  2.0432,
         2.0434,  2.0363,  2.0241,  2.0207,  2.0350,  2.0426,  1.9964,  1.9533,
         1.9836,  1.9664,  1.8514,  1.8933,  1.9984,  2.0438,  2.0433,  2.0303,
         2.0197,  1.9645,  1.9644,  2.0059,  1.9001,  1.9920,  1.8765,  1.6576,
         1.8294,  1.7982,  1.9145,  1.9340,  1.8101,  1.7851,  1.8236,  1.8956,
         1.9445,  1.8997,  2.0280,  1.9624,  1.8880,  1.9861,  1.9218,  1.9862,
         1.9743,  1.9755,  2.0122,  2.0138,  2.0305,  2.0211,  2.0092,  2.0367,
         2.0025,  1.9993,  1.9749,  2.0118,  1.8987,  1.9060,  1.8882,  1.8933,
         1.8783,  1.9916,  1.9582,  1.9024,  1.7269,  1.6999,  1.7181,  1.7688,
         1.6678,  1.6652,  1.6096,  1.81

In [15]:
df_raw = data_train.get_raw_data()
print(df_raw.shape)
df_raw.head()

(34272, 10)


Unnamed: 0,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv
288,11.45,-2.16,26.84,40.28,7.42,1.05,1.05,1.05,-0.18,1549.45
289,10.93,-2.96,26.68,40.33,7.42,1.03,1.03,1.03,-0.24,1523.67
290,10.69,-4.61,26.55,40.4,7.42,1.01,1.01,1.01,-0.26,1499.01
291,11.54,-4.08,26.45,40.34,5.63,1.05,1.05,1.05,-0.27,1543.64
292,10.63,0.55,26.34,40.29,-0.27,1.02,1.02,1.02,-0.28,1504.23


In [16]:
target_col = df_raw.columns.get_loc('Patv')
print(target_col)
print(data_train.data_y.shape)
data_train.data_y[input_len:, target_col]

9
torch.Size([34560, 10])


tensor([ 2.0433,  1.9946,  1.9480,  ..., -0.8583, -0.8825, -0.8835])

In [17]:
data_train.inverse_transform(y1)

tensor([1549.4500, 1523.6700, 1499.0100, 1543.6399, 1504.2300, 1500.4500,
        1550.1699, 1549.8201, 1549.6499, 1549.3700, 1489.5100, 1502.1100,
        1236.4200,  500.0700,  729.5200,  884.1100,  809.4300, 1033.7600,
        1113.9401,  944.7100, 1058.2600, 1140.5000, 1295.0400, 1550.0200,
        1528.4399, 1471.4800, 1549.5900, 1549.6101, 1507.9301, 1529.4900,
        1549.5499, 1549.4301, 1549.5200, 1545.7799, 1539.2899, 1537.4900,
        1545.0699, 1549.0800, 1524.6400, 1501.8099, 1517.8800, 1508.7400,
        1447.8700, 1470.0500, 1525.7000, 1549.7200, 1549.4900, 1542.6001,
        1536.9900, 1507.7300, 1507.6799, 1529.6899, 1473.6300, 1522.3201,
        1461.1200, 1345.2200, 1436.2000, 1419.6901, 1481.2600, 1491.6000,
        1425.9800, 1412.7400, 1433.1200, 1471.2500, 1497.1700, 1473.4401,
        1541.3900, 1506.6400, 1467.2100, 1519.1901, 1485.1100, 1519.2600,
        1512.9100, 1513.5900, 1532.9800, 1533.8700, 1542.6699, 1537.7000,
        1531.4000, 1545.9600, 1527.880

In [18]:
data_train.inverse_transform(x1)

tensor([[ 6.1700e+00, -3.9900e+00,  3.0730e+01,  ...,  1.0000e+00,
         -2.5000e-01,  4.9466e+02],
        [ 6.1700e+00, -3.9900e+00,  3.0730e+01,  ...,  1.0000e+00,
         -2.5000e-01,  4.9466e+02],
        [ 6.2700e+00, -2.1800e+00,  3.0600e+01,  ...,  1.0000e+00,
         -2.4000e-01,  5.0976e+02],
        ...,
        [ 1.1980e+01,  1.5500e+00,  2.7370e+01,  ...,  1.0700e+00,
         -1.9000e-01,  1.5497e+03],
        [ 1.1800e+01,  1.9000e-01,  2.7170e+01,  ...,  1.0600e+00,
         -1.9000e-01,  1.5496e+03],
        [ 1.1760e+01, -1.8800e+00,  2.6970e+01,  ...,  1.0600e+00,
         -1.8000e-01,  1.5493e+03]])

In [19]:
df_train.head(10)

Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv,x,y
0,1,1,00:00,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66,3349.8515,5939.23193
1,1,1,00:10,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66,3349.8515,5939.23193
2,1,1,00:20,6.27,-2.18,30.6,41.63,20.91,1.0,1.0,1.0,-0.24,509.76,3349.8515,5939.23193
3,1,1,00:30,6.42,-0.73,30.52,41.52,20.91,1.0,1.0,1.0,-0.26,542.53,3349.8515,5939.23193
4,1,1,00:40,6.25,0.89,30.49,41.38,20.91,1.0,1.0,1.0,-0.23,509.36,3349.8515,5939.23193
5,1,1,00:50,6.1,-1.03,30.47,41.22,20.91,1.0,1.0,1.0,-0.27,482.21,3349.8515,5939.23193
6,1,1,01:00,6.77,1.07,30.31,41.19,20.91,1.0,1.0,1.0,-0.23,584.75,3349.8515,5939.23193
7,1,1,01:10,6.7,-2.8,30.24,41.0,20.91,1.0,1.0,1.0,-0.23,557.98,3349.8515,5939.23193
8,1,1,01:20,6.44,-3.46,30.13,40.91,20.91,1.0,1.0,1.0,-0.21,503.94,3349.8515,5939.23193
9,1,1,01:30,6.25,-3.15,29.97,40.72,20.91,1.0,1.0,1.0,-0.26,463.37,3349.8515,5939.23193


In [20]:
x2, y2 = data_train[1]
print(x2.shape, y2.shape)
data_train.inverse_transform(x2)

torch.Size([288, 10]) torch.Size([288])


tensor([[ 6.1700e+00, -3.9900e+00,  3.0730e+01,  ...,  1.0000e+00,
         -2.5000e-01,  4.9466e+02],
        [ 6.2700e+00, -2.1800e+00,  3.0600e+01,  ...,  1.0000e+00,
         -2.4000e-01,  5.0976e+02],
        [ 6.4200e+00, -7.3000e-01,  3.0520e+01,  ...,  1.0000e+00,
         -2.6000e-01,  5.4253e+02],
        ...,
        [ 1.1800e+01,  1.9000e-01,  2.7170e+01,  ...,  1.0600e+00,
         -1.9000e-01,  1.5496e+03],
        [ 1.1760e+01, -1.8800e+00,  2.6970e+01,  ...,  1.0600e+00,
         -1.8000e-01,  1.5493e+03],
        [ 1.1450e+01, -2.1600e+00,  2.6840e+01,  ...,  1.0500e+00,
         -1.8000e-01,  1.5494e+03]])

In [21]:
print(x1.shape)
model_rnn = nn.RNN(input_size=x1.shape[1], hidden_size=64, num_layers=2, batch_first=True)
out, hn = model_rnn(x1)
print(out.shape, hn.shape)

torch.Size([288, 10])
torch.Size([288, 64]) torch.Size([2, 64])


In [22]:
data_val = WindTurbineDataset(
    data_path  = data_path,
    filename   = filename,
    flag       = 'val',
    size       = size,
    task       = task,
    target     = target,
    start_col  = start_col,
    turbine_id = turbine_id,
    day_len    = day_len,
    train_days = train_days,
    val_days   = val_days,
    test_days  = test_days,
    total_days = total_days
)

print(len(data_val))
v1x, v1y = data_val[1]
print(v1x.shape, v1y.shape)

145
torch.Size([288, 10]) torch.Size([288])


### RNN

#### Input:
$(N, L, H_{in})$ when batch_first=True; <br>
$N$ = batch size; <br>
$L$ = sequence length; <br>
$H_{in}$ = input size

#### Output:
output: $(N, L, D*H_{out})$ <br>
$h_n$: 隐层输出.


In [71]:
class RNN(nn.Module):
    def __init__(self, input_size, 
                 hidden_size, 
                 output_size,
                 num_layers=1,
                 activation = 'tanh',
                 dropout=0,
                 device='cpu',
                 batch_first=True
                 ):
        super(RNN, self).__init__()
        assert activation in ['tanh', 'relu']
        self.input_size  = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.num_layers  = num_layers
        self.device      = device
        self.dropout     = dropout
        self.activation  = activation
        self.batch_first = batch_first
        self.model = nn.Sequential(
            nn.RNN(input_size=self.input_size, 
                   hidden_size  = self.hidden_size,
                   num_layers   = self.num_layers,
                   nonlinearity = self.activation,
                   dropout      = self.dropout,
                   batch_first  = self.batch_first),
            nn.Linear(self.hidden_size, self.output_size)
        )

    def forward(self, x):
        x = x.to(self.device)
        self.model.to(self.device)
        
        # x: (batch_size, sequence_length, input_size)
        out, _ = self.model[0](x)
        # out: (batch_size, sequence_length, hidden_size)
        out = self.model[1](out[:, -1, :])
        # out: (batch_size, output_size)
        return out

In [24]:
class RegressionEstimator():
    def __init__(self, model, crit, max_epochs, batch_size, device, optimizer, scheduler, 
                 verbose=False, logger=None):
        self.model      = model
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.device     = device
        self.verbose    = verbose  # control whether output the training process, bool.
        self.crit       = crit  # loss function
        self.optimizer  = optimizer
        self.scheduler  = scheduler
        self.logger     = logger

    def fit(self, X_train, Y_train, X_valid=None, Y_valid=None, patience=5):

        train_data = TensorDataset(X_train, Y_train)
        train_dataloader = DataLoader(
            dataset=train_data, batch_size=self.batch_size, shuffle=False)

        model     = self.model.to(self.device)
        optimizer = self.optimizer
        scheduler = self.scheduler
        train_loss_history = []
        validation_loss_history = []
        best_validation_loss = float('inf')
        patience_counter = 0

        for epoch in range(self.max_epochs):
            start_time = time.time()
            loss_all = []
            model.train()

            for data in train_dataloader:
                x, y = data
                x = x.to(self.device)
                y = y.to(self.device)

                optimizer.zero_grad()
                out = model(x)
                loss = self.crit(out, y)
                loss.requires_grad_(True)
                loss.backward()

                optimizer.step()
                loss_all.append(loss.item())

            scheduler.step()
            end_time = time.time()
            cost_time = end_time - start_time

            train_loss = np.mean(np.array(loss_all))
            train_loss_history.append(train_loss)

            # '---------------evaluating model on validation set------------------'
            if X_valid is not None:
                model.eval()
                valid_data = TensorDataset(X_valid, Y_valid)
                validation_dataloader = DataLoader(
                    dataset=valid_data, batch_size=self.batch_size, shuffle=False)
                loss_all = []
                with torch.no_grad():
                    for data in validation_dataloader:
                        x, y = data
                        x = x.to(self.device)
                        y = y.to(self.device)
                        output = model(x)
                        loss = self.crit(output, y)
                        loss_all.append(loss.item())

                validation_loss = np.mean(np.array(loss_all))
                validation_loss_history.append(validation_loss)
                
                if self.verbose and (epoch+1) % 100 == 0:
                    if self.logger:
                        self.logger.info('Epoch:{:d}, train_loss: {:.4f}, validation_loss: {:.4f}, cost_time: {:.2f}s'
                                         .format(epoch+1, train_loss, validation_loss, cost_time))
                    else:
                        print('Epoch:{:d}, train_loss: {:.4f}, validation_loss: {:.4f}, cost_time: {:.2f}s'
                              .format(epoch+1, train_loss, validation_loss, cost_time))

                # 早停策略：val_loss 超过 patience 个 epoch 没提升就停止训练
                if validation_loss < best_validation_loss:
                    best_validation_loss = validation_loss
                    patience_counter = 0
                else:
                    patience_counter += 1

                if patience_counter >= patience:
                    if self.logger:
                        self.logger.info(f'Early stopping after {patience} epochs without improvement.')
                    else:
                        print(f'Early stopping after {patience} epochs without improvement.')
                    break
            else:
                if self.verbose and (epoch+1) % 100 == 0:
                    print('Epoch:{:d}, train_loss: {:.4f}, cost_time: {:.2f}s'
                          .format(epoch+1, train_loss, cost_time))

        return train_loss_history, validation_loss_history

    def predict(self, x):

        model = self.model.to(self.device)
        model.eval()
        with torch.no_grad():
            x = x.to(self.device)
            pred = model(x)

        res = pred.data.cpu().numpy()

        return res

In [25]:
# x1_train_tensor = torch.from_numpy(x1_train).float()
# print(x1_train.shape)

# model_rnn = nn.RNN(input_size=x1_train.shape[2], hidden_size=64, num_layers=2, batch_first=True)
# out, hn = model_rnn(x1_train_tensor)

batch_size = 10
train_dataloader = DataLoader(
    dataset=data_train, batch_size=batch_size, shuffle=False)

print(len(data_train))
print(len(train_dataloader))

33985
3399


In [26]:
output_len = 24*6*2
model_rnn = RNN(input_size=x1.shape[1], hidden_size=32, output_size=output_len)

counter = 0
for i, (batch_x, batch_y) in enumerate(train_dataloader):
    if counter > 1:
        break
    out = model_rnn(batch_x)
    print(f'i: {i}, batch_x.shape: {batch_x.shape}, batch_y.shape: {batch_y.shape}, out.shape: {out.shape}')
    counter += 1

i: 0, batch_x.shape: torch.Size([10, 288, 10]), batch_y.shape: torch.Size([10, 288]), out.shape: torch.Size([10, 288])
i: 1, batch_x.shape: torch.Size([10, 288, 10]), batch_y.shape: torch.Size([10, 288]), out.shape: torch.Size([10, 288])


In [95]:
def train_and_val(data_train, data_val, model, criterion, config, logger=None):
    loader_train = DataLoader(dataset=data_train, batch_size=config['batch_size'], shuffle=config['shuffle_train_val'])
    loader_val   = DataLoader(dataset=data_val, batch_size=config['batch_size'], shuffle=config['shuffle_test'])

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'device: ', device)
    best_validation_loss = float('inf')
    patience_counter     = 0
    patience             = config['patience']
    train_loss_history   = []
    val_loss_history     = []
    
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr_rate']) # 这两个不能放外面
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 
                                                step_size=config['lr_step_size'], 
                                                gamma=config['lr_gamma'])
    
    for epoch in range(config['max_epoch']):
        train_loss = []
        epoch_start_time = time.time()
        # model.train()
        for x, y in loader_train:
            x, y = x.to(device), y.to(device)
            print(f'x.device: {x.device}, y.device: {y.device}, model.device: {model.device}')
            optimizer.zero_grad()
            out  = model(x)
            print(f'out.device: {out.device}, y.device:{y.device}')
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())

        train_loss_epoch = np.mean(train_loss)
        train_loss_history.append(train_loss_epoch)
        if scheduler:
            scheduler.step()

        model.eval()
        with torch.no_grad():
            val_loss = []
            for x, y in loader_val:
                x = x.to(device)
                y = y.to(device)
                out = model(x)
                loss = criterion(out, y)
                val_loss.append(loss.item())
        val_loss_epoch = np.mean(val_loss)
        val_loss_history.append(val_loss_epoch)
        epoch_end_time = time.time()
        cost_time = epoch_end_time - epoch_start_time

        # 早停
        if val_loss_epoch < best_validation_loss:
            best_validation_loss = val_loss_epoch
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            if logger:
                logger.info(f'Early stopping after {patience} epochs without improvement.')
            else:
                print(f'Early stopping after {patience} epochs without improvement.')
            break
        
        if logger:
            logger.info('Epoch:{:d}, train_loss: {:.4f}, validation_loss: {:.4f}, cost time: {:.2f}s'.format(epoch + 1, 
                                                                                                             train_loss_epoch, 
                                                                                                             val_loss_epoch,
                                                                                                             cost_time))
        else:
            print('Epoch:{:d}, train_loss: {:.4f}, validation_loss: {:.4f}, cost time: {:.2f}s'.format(epoch + 1, 
                                                                                                             train_loss_epoch, 
                                                                                                             val_loss_epoch,
                                                                                                             cost_time))
    return train_loss_history, val_loss_history

In [96]:

config = {
    'input_size'       : 10,
    'hidden_size'      : 32,
    'output_size'      : output_len,
    'num_layers'       : 1,
    'device'           : 'cuda',
    'batch_size'       : 10,
    'lr_rate'          : 0.01,
    'max_epoch'        : 100,
    'patience'         : 5,
    'shuffle_train_val': True,
    'shuffle_test'     : False,
    'lr_step_size'     : 30,
    'lr_gamma'         : 0.9
}

criterion = nn.MSELoss(reduction='mean')

model_rnn = RNN(input_size=config['input_size'], hidden_size=32, output_size=output_len)

train_and_val(data_train, data_val, model_rnn, criterion, config)

device:  cuda
x.device: cuda:0, y.device: cuda:0, model.device: cpu
out.device: cpu, y.device:cuda:0


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [55]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
print(model_rnn.device)
model2 = model_rnn.to(device)
print('model2.device:', next(model2.parameters()).device)
model2.device

cuda
cpu
model2.device: cuda:0


'cpu'

In [54]:
# model_rnn.to(device)
# model_rnn.device

model2.parameters.device

AttributeError: 'function' object has no attribute 'device'

In [None]:
data_test = WindTurbineDataset(
    data_path  = data_path,
    filename   = filename,
    flag       = 'test',
    size       = size,
    task       = task,
    target     = target,
    start_col  = start_col,
    turbine_id = turbine_id,
    day_len    = day_len,
    train_days = train_days,
    val_days   = val_days,
    test_days  = test_days,
    total_days = total_days
)

print(len(data_test))
t1x, t1y = data_test[1]
print(t1x.shape, t1y.shape)

1
torch.Size([288, 10]) torch.Size([287])
