In [1]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn import metrics
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader,TensorDataset, Dataset
from plotly import graph_objects as go
import math
import os
import torch.nn.functional as F
import shutil
import xgboost as xgb
import random
from scipy import stats

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def sliding_window(data, sw_width, in_start=0):
    X = []
    data = np.array(data, type(float))
    for _ in range(data.shape[0]):
        in_end = in_start + sw_width
        if (in_end>data.shape[0]):
            break
        else:
            train_seq = data[in_start:in_end, :]
            X.append(train_seq)
            in_start += 1
    return np.array(X)

In [None]:
class ann_net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ann_net, self).__init__()
        
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [6]:
class lstm_net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, layers):
        super(lstm_net, self).__init__()

        self.lstm = nn.LSTM(
            input_size=input_dim,  
            hidden_size=hidden_dim,  
            num_layers=layers, 
            bias=True,  
            batch_first=True,
        )

        self.fc1 = nn.Sequential(
            nn.Linear(hidden_dim,hidden_dim),
            nn.ELU(inplace=True),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim//2),
            nn.ELU(inplace=True),
            nn.Linear(hidden_dim//2, 1),
        )

    def forward(self,x):
        r_out,(h_t,c_t) = self.lstm(x)
        batch, time_step, hidden_size = r_out.shape
        out = r_out[:, -1, :].view(batch, -1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [7]:
class Data_set(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __getitem__(self, id):
        data = (self.x[id],
                self.y[id])
        return data

    def __len__(self):
        return len(self.y)

In [None]:
if __name__ == '__main__':
    # parameters
    TIME_STEP = 31
    BATCH_SIZE = 32 # 128
    epoches = 200
    
    seed = random.randint(0, 4294967295)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.set_default_tensor_type(torch.FloatTensor)

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

    data = pd.read_excel(r'./data.xlsx')

    data = np.array(data)
    data = data[:,3:]
    train_set = data[:3652,:]
    test_set = data[3652:,:]

    train_x = sliding_window((train_set[:, 0:-1]), TIME_STEP).astype(np.float32)
    train_y = sliding_window((train_set[:, -1:]), TIME_STEP)[:, -1, :].astype(np.float32)

    train_x = train_x.reshape(train_x.shape[0],-1)
    train_y = train_y.squeeze()

    test_x = sliding_window((test_set[:, 0:-1]), TIME_STEP).astype(np.float32)
    test_y = sliding_window((test_set[:, -1:]), TIME_STEP)[:, -1, :].astype(np.float32)

    test_x = test_x.reshape(test_x.shape[0],-1)
    test_y = test_y.squeeze()


In [None]:
if __name__ == '__main__': 
 # lstm
    loader_lstm = DataLoader(Data_set(train_x, train_y), batch_size=BATCH_SIZE, shuffle=False, drop_last=True)
    
    model_lstm = lstm_net(input_dim=8, hidden_dim=64, output_dim=1, layers=3)
    optimizer = optim.Adam(model_lstm.parameters(),lr = 3e-4)
    criterion = nn.MSELoss()

    model_lstm.train()
    lowest_loss = 1000000
    for epoch in range(epoches):
        for data in loader_lstm:
            x, y = data
            # print(x.shape)
            # print(y.shape)
            x, y = torch.tensor(x), torch.tensor(y)
            optimizer.zero_grad()
            pred = model_lstm.forward(x).squeeze()
            loss = criterion(pred, y.squeeze())
            loss.backward(retain_graph=True)
            optimizer.step()
    
        checkpoint = {"model_state_dict": model_lstm.state_dict(),
                  "optimizer_state_dic": optimizer.state_dict(),
                  "loss": loss,
                  "epoch": epoch}
        if loss.item() < lowest_loss:
            lowest_loss = loss.item()
            save_model_path = r'./compare_model/lstm'
            # if os.path.exists(save_model_path):
            #     shutil.rmtree(save_model_path)
            # os.makedirs(save_model_path)
            path_checkpoint = save_model_path+'/'+'lstm_epoch_{}.pkl'.format(epoch)
            # best_checkpoint = save_model_path+'/'+'lstm_77-92_epoch_{}.pkl'.format(epoch)

            torch.save(checkpoint, path_checkpoint)


        print('Epoch:', '%04d' % (epoch), 'loss:', loss.item())


In [None]:
if __name__ == '__main__': 
 # ann
    train_x = train_x.reshape(train_x.shape[0],-1)
    loader_ann = DataLoader(Data_set(torch.tensor(train_x), torch.tensor(train_y)), batch_size=BATCH_SIZE, shuffle=False, drop_last=True)
    
    model_ann = ann_net(input_dim=8*TIME_STEP, hidden_dim=64, output_dim=1)
    optimizer = optim.Adam(model_ann.parameters(),lr = 3e-2)
    criterion = nn.MSELoss()

    model_ann.train()
    lowest_loss = 1000000
    for epoch in range(epoches):
        for data in loader_ann:
            x, y = data
            # x, y = torch.tensor(x), torch.tensor(y)
            optimizer.zero_grad()
            pred = model_ann.forward(x).squeeze()
            loss = criterion(pred, y.squeeze())
            loss.backward(retain_graph=True)
            optimizer.step()
    
        checkpoint = {"model_state_dict": model_ann.state_dict(),
                  "optimizer_state_dic": optimizer.state_dict(),
                  "loss": loss,
                  "epoch": epoch}
        if loss.item() < lowest_loss:
            lowest_loss = loss.item()
            save_model_path = r'.\compare_model\ann'
            # if os.path.exists(save_model_path):
            #     shutil.rmtree(save_model_path)
            # os.makedirs(save_model_path)
            path_checkpoint = save_model_path+'/'+'ann_epoch_{}.pkl'.format(epoch)
            torch.save(checkpoint, path_checkpoint)

        print('Epoch:', '%04d' % (epoch), 'loss:', loss.item())

In [None]:
from sklearn.svm import SVR
if __name__ == '__main__': 

    # parameters
    TIME_STEP = 31
    BATCH_SIZE = 32 # 128
    epoches = 200
    
    #设置随机种子
    seed = random.randint(0, 4294967295)
    # seed = 166143082
    # print('随机种子：',seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.set_default_tensor_type(torch.FloatTensor)

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

#---------------------------------------------------------------------------------
    model_num = 5
    prediction = np.zeros((model_num,3,test_y.shape[0]))

    for seed_num in range(0,model_num):
        seed_temp = random.randint(0, 4294967295)
        # xgboost
        params = {
            'objective': 'reg:squarederror',
            'max_depth': 5,
            'learning_rate': 0.01,
            'n_estimators': 200,
            'seed': seed_temp
        }
        model_xgboost = xgb.XGBRegressor(**params)
        model_xgboost.fit(train_x, train_y)
        # rf
        model_rf = RandomForestRegressor(random_state=seed_temp) 
        model_rf.fit(train_x, train_y) 
        model_svr = SVR(kernel='linear')
        model_svr.fit(train_x, train_y)

        pred_xgboost = model_xgboost.predict(test_x)
        pred_rf = model_rf.predict(test_x)
        pred_svr = model_svr.predict(test_x)

        pred_xgboost = pred_xgboost.reshape(-1,1)
        pred_rf = pred_rf.reshape(-1,1)
        pred_svr = pred_svr.reshape(-1,1)
        prediction[seed_num,0:] = pred_xgboost.squeeze()
        prediction[seed_num,1,:] = pred_rf.squeeze()
        prediction[seed_num,2,:] = pred_svr.squeeze()
  


(4, 3, 335)
(4, 3, 335)


In [None]:
#test
if __name__ == '__main__': 

    model_lstm_test = lstm_net(input_dim=8, hidden_dim=64, output_dim=1, layers=3)
    model_lstm_test.load_state_dict(torch.load(r'./compare/lstm_epoch.pkl')['model_state_dict'])
    model_lstm_test.eval()
    with torch.no_grad():
        pred_lstm = model_lstm_test.forward(torch.tensor(test_x)).squeeze()
    
    test_x = test_x.reshape(test_x.shape[0],-1)
    model_ann_test = ann_net(input_dim=8*TIME_STEP, hidden_dim=64, output_dim=1)
    model_ann_test.load_state_dict(torch.load(r'./compare/ann_epoch.pkl')['model_state_dict'])
    model_ann_test.eval()
    with torch.no_grad():
        pred_ann = model_ann_test.forward(torch.tensor(test_x)).squeeze()