# Import and Set

In [1]:
import sys
sys.path.append('../')
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from utils import *
from models.Transformers import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
stock_symbol = '5871.TW'
end_date = '2024-12-31'

"""
Trials:
    [decoder = {True, False}, num_class = {1, 2}]
Progress:
    [decoder = True, num_class = 1]
Pendng:
Finished:
    [decoder = False, num_class = 1] [decoder = True, num_class = 1]
"""
num_class = 2
init = True
fp16_training = True
decoder = True
num_epochs = 500
config = {
    'lr': 0.001,
}

# Data

In [2]:
if num_class == 1:
    with open('./DataLoader/dataloader_1.pk', 'rb') as f:
        data = pickle.load(f)
    trainloader = data['trainloader']
    validloader = data['validloader']
    # dataloader_test = data['testloader']
else:
    with open('./DataLoader/dataloader.pk', 'rb') as f:
        data = pickle.load(f)
    trainloader = data['trainloader']
    validloader = data['validloader']

for x, y in trainloader:
    print(x.shape)
    break
batch_size = x.size(0)

torch.Size([32, 6, 100])


# Setting

- Model, Criteria, Optimizer, Fp16, Previous Tarin Inofrmation

In [3]:
"""
Choose if fp16 and define model
pip install accelerate==0.2.0
"""
# Model
if fp16_training:
    print('Accelerating')
    from accelerate import Accelerator
    accelerator = Accelerator()
    device = accelerator.device
    model = TransformerDecoderOnly(num_class=num_class)
else:
    model = TransformerDecoderOnly(num_class=num_class).to(device)
        
Model = model.model_type # Model name

"""
Init for models, learning rate, ...
"""
# Check path
if os.path.exists(f'Temp//{Model}_{stock_symbol}_LastTrainInfo.pk'):
    # Check Init
    if init:
        print("Init model")
        lr = config['lr']
        last_epoch = 0
        min_val_loss = 10000
        loss_train = []
        loss_valid = []
    else:
        print('Load from last train epoch')
        with open(f'Temp//{Model}_class{num_class}_{stock_symbol}_LastTrainInfo.pk', 'rb') as f:
            last_train_info = pickle.load(f)
        lr = last_train_info['lr']
        last_epoch = last_train_info['epoch']
        min_val_loss = last_train_info['min val loss']
        model.load_state_dict(torch.load(f'Temp//{Model}_class{num_class}_{stock_symbol}_checkpoint_LastTrainModel.pt'))
        with open(f'Temp//{Model}_class{num_class}_{stock_symbol}_TrainValHistLoss.pk', 'rb') as f:
            loss_train_val = pickle.load(f)
        loss_train = loss_train_val['train']
        loss_valid = loss_train_val['valid']
else:
    print("Init model")
    lr = config['lr']
    last_epoch = 0
    min_val_loss = 10000.0
    loss_train = []
    loss_valid = []
print(f'Last train epoch: {last_epoch}  '
        f'Last train lr: {lr}   '
        f'Min val loss: {min_val_loss}')

# Criterion and Optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.00001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=len(trainloader)*1, gamma=0.9)        

# Prepare
if fp16_training:
    print('Accelerate Prepare')    
    model, optimizer, trainloader, validloader, scheduler = \
        accelerator.prepare(model, optimizer, trainloader, validloader, scheduler)
        
# Check device
for name, param in model.named_parameters():
    print(f"Parameter '{name}' is on device: {param.device}")
    break

Accelerating
Init model
Last train epoch: 0  Last train lr: 0.001   Min val loss: 10000.0
Accelerate Prepare
Parameter 'embedding.weight' is on device: cuda:0


# Train

In [4]:
for epoch in range(last_epoch, num_epochs):
    # Training phase
    model.train()
    loss_train_e = 0
    for batch_x, batch_y in tqdm(trainloader): 
        if not fp16_training:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)    
               
        batch_x = batch_x.permute(0, 2, 1)
        optimizer.zero_grad()
        
        outputs = model(batch_x)
        
    
        # Loss
        loss = criterion(outputs, batch_y)
        if fp16_training:
            accelerator.backward(loss)
        else:
            loss.backward()
        optimizer.step()
        if epoch > 50:
            scheduler.step()
        loss_train_e += loss.item()
        
    loss_train_e /= len(trainloader)
    loss_train.append(loss_train_e)
    
    loss_valid_e = 0
    with torch.no_grad():
        model.eval()
        for batch_x_val, batch_y_val in tqdm(validloader):
            # batch_x_val = mask(batch_x_val)
            if not fp16_training:
                batch_x_val = batch_x_val.to(device)
                batch_y_val = batch_y_val.to(device)
            batch_x_val = batch_x_val.permute(0, 2, 1)
            
            outputs_val = model(batch_x_val)
            loss = criterion(outputs_val, batch_y_val)
            loss_valid_e += loss.item()
        loss_valid_e /= len(validloader)
        loss_valid.append(loss_valid_e)
            
        torch.save(model.state_dict(), f'Temp/{Model}_class{num_class}_{stock_symbol}_checkpoint_LastTrainModel.pt')
        if loss_valid_e < min_val_loss:
            min_val_loss = loss_valid_e
            print(f'New best model found in epoch {epoch} with val loss: {min_val_loss}')
            torch.save(model.state_dict(), f'Model_Result/{Model}_class{num_class}_{stock_symbol}_best_model.pt')            
        if epoch % 50 == 0:
            pass
            # torch.save(model, f'ConformerResult/Conformerr_{stock_symbol}_checkpoint_{epoch}.pt')
            
    with open(f'Temp/{Model}_class{num_class}_{stock_symbol}_TrainValHistLoss.pk', 'wb') as f:
        pickle.dump({'train': loss_train, 'valid': loss_valid}, f)
    with open(f'Temp/{Model}_class{num_class}_{stock_symbol}_LastTrainInfo.pk', 'wb') as f:
        pickle.dump({'min val loss': min_val_loss, 'epoch': epoch, 'lr': optimizer.param_groups[0]['lr']}, f)
        
    # Print statistics
    print(f'Epoch [{epoch}/{num_epochs}]',
        f'Training Loss: {loss_train_e:.10f}',
        f'Valid Loss: {loss_valid_e:.10f}')

100%|██████████| 65/65 [00:39<00:00,  1.65it/s]
100%|██████████| 17/17 [00:03<00:00,  5.26it/s]


New best model found in epoch 0 with val loss: 6.6669149539049934
Epoch [0/500] Training Loss: 7.7487190577 Valid Loss: 6.6669149539


100%|██████████| 65/65 [00:45<00:00,  1.43it/s]
100%|██████████| 17/17 [00:03<00:00,  4.60it/s]


New best model found in epoch 1 with val loss: 5.34657816325917
Epoch [1/500] Training Loss: 6.3135783801 Valid Loss: 5.3465781633


100%|██████████| 65/65 [00:52<00:00,  1.23it/s]
100%|██████████| 17/17 [00:06<00:00,  2.65it/s]


New best model found in epoch 2 with val loss: 5.276153101640589
Epoch [2/500] Training Loss: 5.3527732776 Valid Loss: 5.2761531016


100%|██████████| 65/65 [00:42<00:00,  1.53it/s]
100%|██████████| 17/17 [00:03<00:00,  5.07it/s]


New best model found in epoch 3 with val loss: 5.174045113956227
Epoch [3/500] Training Loss: 4.9379463343 Valid Loss: 5.1740451140


100%|██████████| 65/65 [00:40<00:00,  1.61it/s]
100%|██████████| 17/17 [00:04<00:00,  4.16it/s]


New best model found in epoch 4 with val loss: 5.163202166557312
Epoch [4/500] Training Loss: 4.4471799640 Valid Loss: 5.1632021666


100%|██████████| 65/65 [00:36<00:00,  1.76it/s]
100%|██████████| 17/17 [00:03<00:00,  4.96it/s]


New best model found in epoch 5 with val loss: 4.7103354930877686
Epoch [5/500] Training Loss: 4.1070713025 Valid Loss: 4.7103354931


100%|██████████| 65/65 [00:37<00:00,  1.72it/s]
100%|██████████| 17/17 [00:03<00:00,  4.81it/s]


New best model found in epoch 6 with val loss: 4.611063136773951
Epoch [6/500] Training Loss: 3.9763393475 Valid Loss: 4.6110631368


100%|██████████| 65/65 [00:37<00:00,  1.75it/s]
100%|██████████| 17/17 [00:02<00:00,  6.20it/s]


New best model found in epoch 7 with val loss: 4.38024835025563
Epoch [7/500] Training Loss: 3.6838862135 Valid Loss: 4.3802483503


100%|██████████| 65/65 [00:36<00:00,  1.79it/s]
100%|██████████| 17/17 [00:03<00:00,  5.20it/s]


New best model found in epoch 8 with val loss: 4.254794885130489
Epoch [8/500] Training Loss: 3.5039221126 Valid Loss: 4.2547948851


100%|██████████| 65/65 [00:38<00:00,  1.67it/s]
100%|██████████| 17/17 [00:02<00:00,  5.73it/s]


New best model found in epoch 9 with val loss: 4.150697392575881
Epoch [9/500] Training Loss: 3.3336153929 Valid Loss: 4.1506973926


100%|██████████| 65/65 [00:37<00:00,  1.75it/s]
100%|██████████| 17/17 [00:03<00:00,  4.68it/s]


New best model found in epoch 10 with val loss: 4.088376024190118
Epoch [10/500] Training Loss: 3.3173258172 Valid Loss: 4.0883760242


100%|██████████| 65/65 [00:40<00:00,  1.61it/s]
100%|██████████| 17/17 [00:03<00:00,  5.09it/s]


Epoch [11/500] Training Loss: 3.1521884276 Valid Loss: 4.1301299895


100%|██████████| 65/65 [00:43<00:00,  1.50it/s]
100%|██████████| 17/17 [00:05<00:00,  2.94it/s]


Epoch [12/500] Training Loss: 3.1327047632 Valid Loss: 4.0929778604


100%|██████████| 65/65 [00:41<00:00,  1.56it/s]
100%|██████████| 17/17 [00:02<00:00,  5.99it/s]


New best model found in epoch 13 with val loss: 4.049393324291005
Epoch [13/500] Training Loss: 3.1160212425 Valid Loss: 4.0493933243


100%|██████████| 65/65 [00:44<00:00,  1.47it/s]
100%|██████████| 17/17 [00:04<00:00,  3.86it/s]


New best model found in epoch 14 with val loss: 4.046424430959365
Epoch [14/500] Training Loss: 3.1118165566 Valid Loss: 4.0464244310


100%|██████████| 65/65 [00:38<00:00,  1.71it/s]
100%|██████████| 17/17 [00:03<00:00,  4.83it/s]


New best model found in epoch 15 with val loss: 4.0022234636194565
Epoch [15/500] Training Loss: 3.0100293783 Valid Loss: 4.0022234636


100%|██████████| 65/65 [00:40<00:00,  1.61it/s]
100%|██████████| 17/17 [00:02<00:00,  5.86it/s]


New best model found in epoch 16 with val loss: 3.9950725821887745
Epoch [16/500] Training Loss: 3.0137341848 Valid Loss: 3.9950725822


100%|██████████| 65/65 [00:41<00:00,  1.56it/s]
100%|██████████| 17/17 [00:03<00:00,  5.66it/s]


Epoch [17/500] Training Loss: 2.9829758511 Valid Loss: 3.9989897644


100%|██████████| 65/65 [00:39<00:00,  1.65it/s]
100%|██████████| 17/17 [00:02<00:00,  5.70it/s]


New best model found in epoch 18 with val loss: 3.976901082431569
Epoch [18/500] Training Loss: 3.0494744072 Valid Loss: 3.9769010824


100%|██████████| 65/65 [00:45<00:00,  1.42it/s]
100%|██████████| 17/17 [00:03<00:00,  4.85it/s]


Epoch [19/500] Training Loss: 3.0356352678 Valid Loss: 4.0070406479


100%|██████████| 65/65 [00:41<00:00,  1.55it/s]
100%|██████████| 17/17 [00:05<00:00,  3.15it/s]


Epoch [20/500] Training Loss: 3.0121903309 Valid Loss: 4.0026769638


 58%|█████▊    | 38/65 [00:25<00:22,  1.22it/s]