# Import Dependencies

In [41]:
from n_beats import NBeatsNet, EarlyStopping

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

# Data Preparation

In [3]:
df = pd.read_csv('/content/Data MPDW Kelompok 1.csv')
df.head()

Unnamed: 0,Date,Close,Volume
0,2023-10-09,973.590942,22298500
1,2023-10-10,1213.244019,13625200
2,2023-10-11,1512.810547,38431400
3,2023-10-12,1887.268555,219262300
4,2023-10-13,2356.589111,39504100


## Data Cleaning

## Dataset Splitting

In [34]:
df_train, df_val = train_test_split(df, test_size=0.2, shuffle=False, random_state=42)
df_train.shape, df_val.shape

((356, 3), (89, 3))

## Data Visualization

# Dataset & Dataloader

In [None]:
torch.tensor()

In [30]:
class TimeSeriesDataset4NBEATS(Dataset):
  def __init__(self, data, backcast_length, forecast_length):
    self.data = data
    self.backcast_length = backcast_length
    self.forecast_length = forecast_length

  def __len__(self):
    return len(self.data) - self.backcast_length - self.forecast_length + 1

  def __getitem__(self, index):
    x = self.data[index : index + self.backcast_length]
    y = self.data[index + self.backcast_length : index + self.backcast_length + self.forecast_length]
    return x, y

In [37]:
BATCH_SIZE = 32
BACKCAST_LENGTH = 30
FORECAST_LENGTH = 7

train_set = TimeSeriesDataset4NBEATS(torch.tensor(df_train.Close.values, dtype=torch.float32), BACKCAST_LENGTH, FORECAST_LENGTH)
trainloader = DataLoader(train_set, BATCH_SIZE, shuffle=False)

val_set = TimeSeriesDataset4NBEATS(torch.tensor(df_val.Close.values, dtype=torch.float32), BACKCAST_LENGTH, FORECAST_LENGTH)
valloader = DataLoader(val_set, BATCH_SIZE, shuffle=False)

In [36]:
next(iter(trainloader))[0][0], next(iter(trainloader))[1][0]

(tensor([ 973.5909, 1213.2440, 1512.8105, 1887.2686, 2356.5891, 2746.0256,
         3425.0430, 3784.5225, 4044.1467, 4024.1758, 3734.5947, 3794.5081,
         3814.4792, 3794.5081, 4054.1326, 4493.4961, 4573.3804, 4283.7998,
         4044.1467, 4134.0171, 4283.7998, 4383.6553, 4393.6406, 5217.4487,
         5217.4487, 5292.3403, 5367.2319, 5716.7261, 5691.7622, 6315.8589]),
 tensor([6790.1724, 6265.9312, 5641.8345, 6420.1572, 6070.4214, 5945.5151,
         6245.2891]))

# Training Preparation

In [29]:
model = NBeatsNet(
    device=device,
    stack_types=(NBeatsNet.GENERIC_BLOCK, NBeatsNet.GENERIC_BLOCK),
    forecast_length=FORECAST_LENGTH,
    backcast_length=BACKCAST_LENGTH,
    hidden_layer_units=128
).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())

| N-Beats
| --  Stack Generic (#0) (share_weights_in_stack=False)
     | -- GenericBlock(units=128, thetas_dim=4, backcast_length=30, forecast_length=7, share_thetas=False) at @134660556421216
     | -- GenericBlock(units=128, thetas_dim=4, backcast_length=30, forecast_length=7, share_thetas=False) at @134660377668416
     | -- GenericBlock(units=128, thetas_dim=4, backcast_length=30, forecast_length=7, share_thetas=False) at @134660377669952
| --  Stack Generic (#1) (share_weights_in_stack=False)
     | -- GenericBlock(units=128, thetas_dim=8, backcast_length=30, forecast_length=7, share_thetas=False) at @134660397490464
     | -- GenericBlock(units=128, thetas_dim=8, backcast_length=30, forecast_length=7, share_thetas=False) at @134660377673264
     | -- GenericBlock(units=128, thetas_dim=8, backcast_length=30, forecast_length=7, share_thetas=False) at @134660377673312


In [None]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1)
early_stopping = EarlyStopping(patience=10, path='best_model.pt')

In [None]:
def loop_fn(mode, dataset, dataloader, model, criterion, optimizer, device):
  if mode == "train":
    model.train()
    context_manager = torch.enable_grad()
  elif mode == "test":
    model.eval()
    context_manager = torch.inference_mode()

  preds, targets = [], []
  with context_manager:
    cost = 0
    for feature, target in tqdm(dataloader, desc=mode.title()):
      feature, target = feature.to(device, non_blocking=True), target.to(device, non_blocking=True)
      _, output = model(feature)
      loss = criterion(output, target)

      if mode == "train":
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

      cost += loss.item() * feature.shape[0]
      preds.extend(output.detach().cpu().numpy())
      targets.extend(target.detach().cpu().numpy())

    cost /= len(dataset)
    mape = mean_absolute_percentage_error(targets, preds)
    return cost, mape

epoch = 0
while True:
  epoch += 1
  print(f"\n--- Epoch {epoch} ---")

  train_cost, train_score = loop_fn("train", train_set, trainloader, model, criterion, optimizer, device)
  val_cost, val_score = loop_fn("test", val_set, valloader, model, criterion, optimizer, device)

  print(f"Epoch {epoch}: Train Cost: {train_cost:.6f}, MAPE: {train_score:.4f} | Validation Cost: {val_cost:.6f}, MAPE: {val_score:.4f}")

  scheduler.step(val_cost)

  early_stopping(val_cost, model)
  if early_stopping.early_stop:
    print("Early stopping triggered")
    break

print("Loading best model weights...")
model.load_state_dict(torch.load('best_model.pt'))

# Model Evaluation