In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from dataflow import df_train, df_test  # Import dữ liệu

# Chia tập train thành train và validation
train, val = train_test_split(df_train, test_size=0.2, shuffle=False)

# Chuẩn hóa dữ liệu
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train[['Revenue', 'Units']])
val_scaled = scaler.transform(val[['Revenue', 'Units']])

# Tạo tensor dữ liệu
def create_sequences(data, seq_length):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        targets.append(data[i+seq_length])
    return torch.tensor(np.array(sequences), dtype=torch.float32), torch.tensor(np.array(targets), dtype=torch.float32)

seq_length = 10  # Độ dài chuỗi đầu vào
X_train, y_train = create_sequences(train_scaled, seq_length)
X_val, y_val = create_sequences(val_scaled, seq_length)


X_train = X_train.reshape(X_train.shape[0], -1)
X_val = X_val.reshape(X_val.shape[0], -1)

print("X_train reshaped shape:", X_train.shape)
print("X_val reshaped shape:", X_val.shape)



X_train reshaped shape: torch.Size([721238, 20])
X_val reshaped shape: torch.Size([180303, 20])


In [20]:
# Định nghĩa mô hình N-BEATS
class NBeatsBlock(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NBeatsBlock, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x)

class NBeats(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_blocks=3):
        super(NBeats, self).__init__()
        self.blocks = nn.ModuleList([NBeatsBlock(input_size, hidden_size, output_size) for _ in range(num_blocks)])

    def forward(self, x):
        y_hat = torch.zeros_like(x[:, 0, :])
        for block in self.blocks:
            y_hat += block(x.view(x.shape[0], -1))
        return y_hat

# Khởi tạo mô hình
input_size = seq_length * 2  # Hai features: Revenue, Units
hidden_size = 128
output_size = 2  # Hai output: Revenue, Units
model = NBeats(input_size, hidden_size, output_size)

# Cấu hình huấn luyện
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 10

# Huấn luyện mô hình
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train.view(X_train.shape[0], -1))
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            val_output = model(X_val.view(X_val.shape[0], -1))
            val_loss = criterion(val_output, y_val)
        print(f'Epoch {epoch}, Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}')

# Đánh giá mô hình
model.eval()
with torch.no_grad():
    y_pred = model(X_val.view(X_val.shape[0], -1)).numpy()
    y_true = y_val.numpy()

def calculate_metrics(y_true, y_pred):
    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return rmse, mape

rmse, mape = calculate_metrics(y_true, y_pred)
print(f'RMSE: {rmse:.4f}, MAPE: {mape:.2f}%')


IndexError: too many indices for tensor of dimension 2

In [9]:
print(f"Min Date_int: {df_train_data['Date_int'].min()}")
print(f"Max Date_int: {df_train_data['Date_int'].max()}")
print(f"Total rows: {len(df_train_data)}")


Min Date_int: 1278201600
Max Date_int: 1609372800
Total rows: 721248
