In [2]:
from torch import nn
from torch import autograd
import torch
from torch import optim

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import os
from sklearn.metrics import mutual_info_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import warnings
warnings.filterwarnings("ignore")

def score2(pm, pp, plant):
    plant_power = {
        1: 10,
        2: 10,
        3: 40,
        4: 50
    }
    threshold = plant_power[plant] * 0.03
    index = pm >= threshold
    return np.abs(pm[index] - pp[index]).sum() / (np.sum(index) * plant_power[plant])


def load_dataset(plant):
    print(f'loading plant {plant} data')
    train = pd.read_csv(f'../data/train_{plant}.csv', parse_dates=["时间"]).drop_duplicates().reset_index(drop=True)
    test = pd.read_csv(f'../data/test_{plant}.csv', parse_dates=["时间"])
    train.columns = ['time', 'irr', 'ws', 'wd', 'temp', 'pr', 'hm', 'mirr', 'power']
    test.columns = ['id', 'time', 'irr', 'ws', 'wd', 'temp', 'pr', 'hm']
    data = pd.concat([train, test])
    return data

In [3]:
p1 = load_dataset(1)

loading plant 1 data


In [4]:
train = p1.id.isnull()
test = p1.power.isnull()

In [5]:
p1['wd'] = MinMaxScaler((-1, 1)).fit_transform(p1['wd'].values.reshape(-1,1))
power_scaler = StandardScaler(with_mean=0, with_std=1.).fit(p1[train]['power'].values.reshape(-1,1))
p1.loc[train, 'power'] = power_scaler.transform(p1.loc[train, 'power'].values.reshape(-1,1))

In [6]:
class SingleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.lstm = nn.GRU(input_size=input_size, hidden_size=hidden_size)
        self.fc = nn.Linear(hidden_size, 1)
#         self.relu = nn.ReLU()
        self.unlinear = nn.Tanh()
    def forward(self, x):
        x, hn = self.lstm(x)
        x = self.fc(x)
        x = self.unlinear(x[-1, :, :].squeeze())
        return x

In [7]:
def get_train_test(x, seq, features):
    train = x[x.id.isnull()].reset_index(drop=True)
    test = x[x.id.notnull()].reset_index(drop=True)
    train_size, test_size = train.shape[0], test.shape[0]
    train_x, test_x = [], []
    train_y = train['power'].values[seq:]
    for i in range(seq, train_size):
        train_x.append(x.iloc[i-seq: i][features].values)
    for i in range(train_size, test_size + train_size):
        test_x.append(x.iloc[i-seq: i][features].values)
    return np.stack(train_x, axis=1), np.stack(test_x, axis=1), train_y

In [8]:
train_x, test_x, train_y = get_train_test(p1, 100, features=['hm', 'irr', 'pr', 'temp', 'wd', 'ws'])

In [9]:
train_x = torch.tensor(train_x).float()
test_x = torch.tensor(test_x).float()
train_y = torch.tensor(train_y).float()

In [10]:
def generate_batch(x, y, batch_size, shuffle=True):
    n_sample = x.shape[1]
    idx = np.array(range(n_sample))
    if shuffle:
        np.random.shuffle(idx)
    iterations = n_sample // batch_size
    for step in range(iterations):
        yield (x[:, idx[batch_size*step: batch_size*(step+1)], :], 
               y[idx[batch_size*step: batch_size*(step+1)]], step)

In [None]:
epochs = 5
SEQLEN = 100
INPUT_SIZE = 10
batch_size = 4

lstm = SingleLSTM(6, 300)
optimizer = optim.Adam(lstm.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()
torch.nn.L1Loss

train_loss = []
test_loss = []
loss_record = 0
loss_total = []
for epoch in range(1, epochs+1):
    for (batch_x, batch_y, step) in generate_batch(train_x, train_y, batch_size):
        
        optimizer.zero_grad()
        pred_y = lstm(batch_x)
        loss = loss_func(pred_y, batch_y)
        loss_record += loss
        loss.backward()
        loss_total.append(loss)
        optimizer.step()
        if step % 4**4 == 0 and step > 0:
            loss_record /= 4**4
            loss_total.append(loss_record)
            print(f'epoch: {epoch}, step: {step}, loss: {loss_record: .4f}')
            loss_record = 0

epoch: 1, step: 256, loss:  1.1385
epoch: 1, step: 512, loss:  1.1077
epoch: 1, step: 768, loss:  1.0450
epoch: 1, step: 1024, loss:  1.0742
epoch: 1, step: 1280, loss:  1.1098
epoch: 1, step: 1536, loss:  1.0479
epoch: 1, step: 1792, loss:  1.0892
epoch: 1, step: 2048, loss:  1.0946
epoch: 1, step: 2304, loss:  1.1023
epoch: 1, step: 2560, loss:  1.0716
epoch: 1, step: 2816, loss:  1.0840
epoch: 1, step: 3072, loss:  1.1201
epoch: 1, step: 3328, loss:  1.0894
epoch: 1, step: 3584, loss:  1.0165
epoch: 1, step: 3840, loss:  1.0980
epoch: 1, step: 4096, loss:  1.0694
epoch: 1, step: 4352, loss:  1.0870
epoch: 1, step: 4608, loss:  1.0981
epoch: 1, step: 4864, loss:  1.0864
epoch: 1, step: 5120, loss:  1.0953
epoch: 1, step: 5376, loss:  1.0854
epoch: 1, step: 5632, loss:  1.0608
epoch: 1, step: 5888, loss:  1.0574
epoch: 1, step: 6144, loss:  1.0893
epoch: 1, step: 6400, loss:  1.1208
epoch: 1, step: 6656, loss:  1.0964
epoch: 1, step: 6912, loss:  1.0266
epoch: 1, step: 7168, loss:  1.