In [None]:
import torch
import torch.nn as nn
from torch.nn import Transformer
from torch.optim import Adam
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
from torch.utils.data import DataLoader

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class moving_avg(nn.Module):
    """
    Moving average block to highlight the trend of time series
    """
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x


class series_decomp(nn.Module):
    """
    Series decomposition block
    """
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean
    
class DLinear(nn.Module):
    """
    Decomposition-Linear
    """
    def __init__(self, seq_len, output_size, individual, input_size):
        super(DLinear, self).__init__()
        self.seq_len = seq_len
        self.pred_len = output_size

        # Decomposition Kernel Size
        kernel_size = 25
        self.decomposition = series_decomp(kernel_size)
        self.individual = individual
        self.channels = input_size

        if self.individual:
            self.Linear_Seasonal = nn.ModuleList()
            self.Linear_Trend = nn.ModuleList()
            
            for i in range(self.channels):
                self.Linear_Seasonal.append(nn.Linear(self.seq_len, self.pred_len))
                self.Linear_Trend.append(nn.Linear(self.seq_len, self.pred_len))
        else:
            self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
            self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)

        self.linear = nn.Linear(self.channels * self.pred_len, self.pred_len)

    def forward(self, x):
        # x: [Batch, Input length, Channel]
        seasonal_init, trend_init = self.decomposition(x)
        seasonal_init = seasonal_init.permute(0, 2, 1)
        trend_init = trend_init.permute(0, 2, 1)

        if self.individual:
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len], dtype=seasonal_init.dtype).to(seasonal_init.device)
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len], dtype=trend_init.dtype).to(trend_init.device)

            for i in range(self.channels):
                seasonal_output[:, i, :] = self.Linear_Seasonal[i](seasonal_init[:, i, :])
                trend_output[:, i, :] = self.Linear_Trend[i](trend_init[:, i, :])
        else:
            seasonal_output = self.Linear_Seasonal(seasonal_init)
            trend_output = self.Linear_Trend(trend_init)

        x = seasonal_output + trend_output
        print(x.shape)
        print('x'*20)
        x = x[:,-1,:]
        print(x.shape)
        print('x'*20)
        print(x)
        return x

class NLinear(nn.Module):
    """
    Normalization-Linear
    """
    def __init__(self, seq_len, output_size, individual, input_size):
        super(NLinear, self).__init__()
        self.seq_len = seq_len
        self.pred_len = output_size
        
        # Use this line if you want to visualize the weights
        # self.Linear.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
        self.channels = input_size
        self.individual = individual
        if self.individual:
            self.Linear = nn.ModuleList()
            for i in range(self.channels):
                self.Linear.append(nn.Linear(self.seq_len,self.pred_len))
        else:
            self.Linear = nn.Linear(self.seq_len, self.pred_len)

    def forward(self, x):
        # x: [Batch, Input length, Channel]
        seq_last = x[:,-1:,:].detach()
        x = x - seq_last
        if self.individual:
            output = torch.zeros([x.size(0),self.pred_len,x.size(2)],dtype=x.dtype).to(x.device)
            for i in range(self.channels):
                output[:,:,i] = self.Linear[i](x[:,:,i])
            x = output
        else:
            x = self.Linear(x.permute(0,2,1)).permute(0,2,1)
        x = x + seq_last
        x = x[:,:,-1]

        return x # [Batch, Output length, Channel]
    
class Linear(nn.Module):
    """
    Just one Linear layer
    """
    def __init__(self, seq_len,output_size,individual,input_size):
        super(Linear, self).__init__()
        self.seq_len = seq_len
        self.pred_len = output_size
        self.Linear = nn.Linear(self.seq_len, self.pred_len)
        # Use this line if you want to visualize the weights
        # self.Linear.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
        self.channels = input_size
        self.individual = individual
        if self.individual:
            self.Linear = nn.ModuleList()
            for i in range(self.channels):
                self.Linear.append(nn.Linear(self.seq_len,self.pred_len))
        else:
            self.Linear = nn.Linear(self.seq_len, self.pred_len)

    def forward(self, x):
        # x: [Batch, Input length, Channel]
        if self.individual:
            output = torch.zeros([x.size(0),self.pred_len,x.size(2)],dtype=x.dtype).to(x.device)
            for i in range(self.channels):
                output[:,:,i] = self.Linear[i](x[:,:,i])
            x = output
        else:
            x = self.Linear(x.permute(0,2,1)).permute(0,2,1)
        print(x)
        x = x[:,:,-1]
        return x # [Batch, Output length, Channel]
    
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, batch_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.num_directions = 1
        self.batch_size = batch_size
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input_seq):
        batch_size, seq_len = input_seq.shape[0], input_seq.shape[1]
        h_0 = torch.randn(self.num_directions * self.num_layers, batch_size, self.hidden_size).to(device)
        c_0 = torch.randn(self.num_directions * self.num_layers, batch_size, self.hidden_size).to(device)
        output, _ = self.lstm(input_seq, (h_0, c_0))
        print(output.shape)
        print('x'*20)
        pred = self.linear(output)
        print(pred.shape)
        print('x'*20)
        pred = pred[:, -1, :]
        print(pred.shape)
        print('x'*20)
        return pred

In [None]:
# 模型參數
input_dim = 10 # 輸入詞彙表大小
hidden_dim = 32  # 隱藏層維度
output_dim = 1  # 輸出維度，輸出維度為1，代表預測的下一個數
num_layers = 4  # Transformer Encoder/Decoder 層數
num_heads = 8  # Attention heads 
hidden_size=32
batch_size=100


seq_len = 30
output_size = 1
individual = True
input_size = 10


# 創建 
# model = DLinear(seq_len,output_size,individual,input_size).to(device)
# model = NLinear(seq_len,output_size,individual,input_size).to(device)
model = LSTM(input_size,hidden_size,num_layers,output_size,batch_size).to(device)
# model = Linear(seq_len,output_size,individual,input_size).to(device)


# 定義損失函數和優化器
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=1e-2, weight_decay=1e-6)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, verbose=True)

In [None]:
def batch_data(final_seq, batch_size):
    """
    Batches the sequence data using PyTorch DataLoader.

    Args:
        final_seq: A list of tuples containing sequence input data and target data.
        batch_size: The desired batch size.

    Returns:
        A DataLoader object containing the batched sequence data.

    """
    final_seq = DataLoader(dataset=final_seq, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=False)

    return final_seq

In [None]:
# 生成 l * w * h 維度的資料
# l : batch_size
# w : sequence length
# h : amounts of features 
# 以下以 30 * 6 * 1 做示範
def data_generated(beg, num, max=1):
    final_seq = []
    for i in range(num):
        end = beg + 30
        q_tensors = [torch.tensor([i]*10, dtype=torch.float32) for i in range(beg, end)]
        q_tensors = torch.stack(q_tensors)
        a_tensors = torch.tensor([end], dtype=torch.float32) 
        final_seq.append((q_tensors, a_tensors))
        beg += 1
    print('final_seq : 資料類型={}, 列數={}'.format(type(final_seq), len(final_seq)))
    print('final_seq[0] : 資料類型={}, 列數={}'.format(type(final_seq[0]), len(final_seq[0])))
    print('final_seq[0][0] : 資料類型={}, 內容數={}'.format(type(final_seq[0][0]), final_seq[0][0].shape))
    print('final_seq[0][1] : 資料類型={}, 內容數={}'.format(type(final_seq[0][1]), final_seq[0][1].shape))
    print(final_seq[0][0])
    print(final_seq[0][1])
    data = batch_data(final_seq, 30)

    return data

In [None]:
data = data_generated(0, 571,1000)

In [None]:
# 生成訓練資料
train_data = data_generated(0, 970, 1000)

# 生成驗證資料
valid_data = data_generated(600, 60, 1000)

# 生成測試資料
test_data = data_generated(700, 60, 1000)

In [None]:
# # 生成訓練資料
# train_data = []
# for i in range(1000): 
#     sequence = torch.arange(i, i+6).float() # 輸入等差數列
#     target = torch.tensor([sequence[-1] + 1])  # 預測下一個值，等於最後一個+1
#     sequence = sequence.unsqueeze(0)
#     sequence = torch.cat([sequence, sequence], dim=0)
#     train_data.append((sequence, target))

# # 生成驗證資料
# valid_data = []
# for i in range(1000,1100):
#     sequence = torch.arange(i, i+6).float()  # 輸入等差數列
#     target = torch.tensor([sequence[-1] + 1])  # 預測下一個值，等於最後一個+1
#     sequence = sequence.unsqueeze(0)
#     sequence = torch.cat([sequence, sequence], dim=0)
#     valid_data.append((sequence, target))

# # 生成測試資料
# test_data = []
# for i in range(1100,1200):
#     sequence = torch.arange(i, i+6).float()  # 輸入等差數列
#     target = torch.tensor([sequence[-1] + 1])  # 預測下一個值，等於最後一個+1
#     sequence = sequence.unsqueeze(0)
#     sequence = torch.cat([sequence, sequence], dim=0)
#     test_data.append((sequence, target))

In [None]:
# final_seq = train_data
# print('final_seq : 資料類型={}, 列數={}'.format(type(final_seq), len(final_seq)))
# print('final_seq[0] : 資料類型={}, 列數={}'.format(type(final_seq[0]), len(final_seq[0])))
# print('final_seq[0][0] : 資料類型={}, 內容數={}'.format(type(final_seq[0][0]), final_seq[0][0].shape))
# print('final_seq[0][1] : 資料類型={}, 內容數={}'.format(type(final_seq[0][1]), final_seq[0][1].shape))

In [None]:
def get_val_loss(model, val_data, loss_function):
    """
    Computes the average validation loss for a given model and validation data.

    Args:
        model: The model for which to compute the validation loss.
        val_data: The validation data (a DataLoader object).
        loss_function: The loss function to compute the loss.

    Returns:
        The average validation loss.

    """

    model.eval()
    val_loss = []
    with torch.no_grad():
        for seq, label in val_data:
            try:
                seq, label = seq.to(device), label.to(device)
                y_pred = model(seq)
                loss = loss_function(y_pred[0].view(-1), label)
                val_loss.append(loss.item())
            except:
                seq, label = seq.to(device), label.to(device)
                print(seq.shape)
                print(seq.dtype)
                print(seq.type())
                y_pred = model(seq, seq)
                loss = loss_function(y_pred[0].view(-1), label)
                val_loss.append(loss.item())

    return np.mean(val_loss)

In [433]:
# 模型訓練

#for epoch in tqdm(range(1000)):
for epoch in range(1000):
    model.train()
    total_loss = 0.0
    time = 0
    for sequence, target in train_data:

        optimizer.zero_grad()
        sequence = sequence.to(device)
        # print(sequence.shape)
        # print(sequence.dtype)
        # print(sequence.type())
        target = target.to(device)
        # print(target.shape)
        # print(target.dtype)
        # print(target.type())
        print(sequence)
        output = model(sequence)
        #print('model :', output.view(-1)[0])
        #print('true :', target.view(-1)[0])
        
        # 計算損失
        loss = criterion(output.view(-1), target.view(-1)).to(device)

        # 反向傳播與參數更新
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        time += 1
    #print('結束1')  
    val_loss = get_val_loss(model, valid_data, criterion)
    #print('結束2') 
    if (epoch + 1) % 100 == 0:
        print('epoch {:03d} train_loss {:.8f} val_loss {:.8f}'.format(epoch, total_loss/time, val_loss))
    scheduler.step(val_loss)
    

# 模型測試
model.eval()
correct_predictions = 0
ans = 730
with torch.no_grad():
    for sequence, target in test_data:
        sequence = sequence.to(device)
        target = target.to(device)
        output = model(sequence)
        for i in output:
            print('預測 :', i, '正解 :', ans)
            ans += 1
        loss = criterion(output, target).to(device)
    
        predicted_value = output.view(-1).item()
       

accuracy = correct_predictions / len(test_data)
print(f"Test Accuracy: {accuracy}")

tensor([[[660., 660., 660.,  ..., 660., 660., 660.],
         [661., 661., 661.,  ..., 661., 661., 661.],
         [662., 662., 662.,  ..., 662., 662., 662.],
         ...,
         [687., 687., 687.,  ..., 687., 687., 687.],
         [688., 688., 688.,  ..., 688., 688., 688.],
         [689., 689., 689.,  ..., 689., 689., 689.]],

        [[661., 661., 661.,  ..., 661., 661., 661.],
         [662., 662., 662.,  ..., 662., 662., 662.],
         [663., 663., 663.,  ..., 663., 663., 663.],
         ...,
         [688., 688., 688.,  ..., 688., 688., 688.],
         [689., 689., 689.,  ..., 689., 689., 689.],
         [690., 690., 690.,  ..., 690., 690., 690.]],

        [[662., 662., 662.,  ..., 662., 662., 662.],
         [663., 663., 663.,  ..., 663., 663., 663.],
         [664., 664., 664.,  ..., 664., 664., 664.],
         ...,
         [689., 689., 689.,  ..., 689., 689., 689.],
         [690., 690., 690.,  ..., 690., 690., 690.],
         [691., 691., 691.,  ..., 691., 691., 691.]],