In [7]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt
from Informer_structure_patched import load_data, preprocess_data, StockForecastDataset, transformer

In [8]:
df_cleaned = load_data("training_multi_stock.csv")
df_preprocessed, scalers, label_encoder = preprocess_data(df_cleaned)

Checking for null values... Date      0
close     0
high      0
low       0
open      0
volume    0
ticker    0
dtype: int64
Data after dropping nulls: (117187, 7)
Ticker: AAPL, ID: 0
Ticker: AMZN, ID: 1
Ticker: BAC, ID: 2
Ticker: COST, ID: 3
Ticker: CVX, ID: 4
Ticker: GOOGL, ID: 5
Ticker: GS, ID: 6
Ticker: JNJ, ID: 7
Ticker: JPM, ID: 8
Ticker: LLY, ID: 9
Ticker: META, ID: 10
Ticker: MS, ID: 11
Ticker: MSFT, ID: 12
Ticker: NKE, ID: 13
Ticker: NVDA, ID: 14
Ticker: PFE, ID: 15
Ticker: PG, ID: 16
Ticker: TSLA, ID: 17
Ticker: WMT, ID: 18
Ticker: XOM, ID: 19


In [9]:
df_preprocessed

Unnamed: 0,Date,open,high,low,close,volume,ticker,ticker_id
0,2000-01-03,0.000770,0.000811,0.000750,0.000821,0.058034,AAPL,0
1,2000-01-04,0.000797,0.000797,0.000746,0.000747,0.055496,AAPL,0
2,2000-01-05,0.000761,0.000796,0.000760,0.000759,0.084307,AAPL,0
3,2000-01-06,0.000780,0.000768,0.000697,0.000688,0.083186,AAPL,0
4,2000-01-07,0.000704,0.000722,0.000701,0.000724,0.049902,AAPL,0
...,...,...,...,...,...,...,...,...
117182,2024-08-26,0.153088,0.151554,0.152534,0.151905,0.000615,CVX,4
117183,2024-08-27,0.152780,0.151008,0.151696,0.150319,0.000541,CVX,4
117184,2024-08-28,0.150487,0.148574,0.150393,0.149347,0.000597,CVX,4
117185,2024-08-29,0.150508,0.149624,0.150352,0.150800,0.000473,CVX,4


In [11]:
dataset = StockForecastDataset(df_preprocessed)

print("Number of samples:", len(dataset))
print("Sample input shape:", dataset[0][0].shape)
print("Sample target shape:", dataset[0][1].shape)

Number of samples: 115787
Sample input shape: torch.Size([60, 6])
Sample target shape: torch.Size([10])


In [None]:

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
print("Train dataset size:", len(train_dataset))
print("Validation dataset size:", len(val_dataset))


dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("✅ Using Device:", dev)

model = transformer(input_dim=6, pred=10, d_model=256, n__heads=8, num_layers=4, ff_dim=512).to(dev)
criterion = torch.nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)

best_loss = float("inf")


for epoch in range(1, 31):
    model.train()
    epoch_loss = 0.0

    for x, y in train_loader:
        x, y = x.to(dev, non_blocking=True), y.to(dev, non_blocking=True)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    avg_train_loss = epoch_loss / len(train_loader)
    print(f"Epoch {epoch}, Training Loss: {avg_train_loss:.6f}")


    model.eval()
    val_loss = 0.0
    val_pred, val_true = [], []

    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(dev, non_blocking=True), y.to(dev, non_blocking=True)
            output = model(x)
            loss = criterion(output, y)
            val_loss += loss.item() * x.size(0)
            val_pred.append(output.cpu().numpy())
            val_true.append(y.cpu().numpy())

    val_loss /= len(val_loader.dataset)
    val_pred = np.concatenate(val_pred, axis=0)
    val_true = np.concatenate(val_true, axis=0)
    rmse = sqrt(mean_squared_error(val_true.flatten(), val_pred.flatten()))

    scheduler.step(val_loss)

    print(f"Epoch {epoch}, Validation Loss: {val_loss:.6f}, Validation RMSE: {rmse:.4f}")

    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.cpu().state_dict(), "New_best_informer_model.pth")
        print(f"✅ Model saved with Validation Loss: {best_loss:.6f}")
        model.to(dev)

print(f"🏁 Training completed. Best Validation Loss: {best_loss:.6f}")

Train dataset size: 92629
Validation dataset size: 23158
Using Device cuda
Epoch 1, Loss: 0.001065454027067755
Epoch 1, Validation RMSE: 0.014727699395742417
Model saved with RMSE: 0.014727699395742417
Epoch 2, Loss: 0.0004925195950816736
Epoch 2, Validation RMSE: 0.010930828138813143
Model saved with RMSE: 0.010930828138813143
Epoch 3, Loss: 0.00040246933177939397
Epoch 3, Validation RMSE: 0.011461459650228446
Epoch 4, Loss: 0.0003729336965233649
Epoch 4, Validation RMSE: 0.008839053629389342
Model saved with RMSE: 0.008839053629389342
Epoch 5, Loss: 0.0003499391800602767
Epoch 5, Validation RMSE: 0.009642153499308343
Epoch 6, Loss: 0.0003403925943302573
Epoch 6, Validation RMSE: 0.010034125187533242
Epoch 7, Loss: 0.00032218042376130825
Epoch 7, Validation RMSE: 0.010880466890497556
Epoch 8, Loss: 0.00032534490882738914
Epoch 8, Validation RMSE: 0.011475453954416012
Epoch 9, Loss: 0.00031734004260348963
Epoch 9, Validation RMSE: 0.007803729002721263
Model saved with RMSE: 0.007803729

In [14]:
#save scalers and label encoder
import pickle
with open("scalers.pkl", "wb") as f:
    pickle.dump(scalers, f)

In [15]:
print(scalers)
print(type(scalers))

{'open': MinMaxScaler(), 'high': MinMaxScaler(), 'low': MinMaxScaler(), 'close': MinMaxScaler(), 'volume': MinMaxScaler()}
<class 'dict'>
