# Import and Set

In [1]:
import sys 
sys.path.append('../')
import os
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from utils import *
from datas import *
from set_train import *
from models.Transformers import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
stock_symbol, end_date, num_class, batch_size, init, fp16_training, num_epochs, lr = set_train()

# Data
trainloader, validloader, testloader, test_date, df, src = data()
for x, y in trainloader:
    break
print(src.device, x.device, src.shape, x.shape, y.shape)

## Init: Model, Criteria, Optimizer, Fp16, Previous Tarin Inofrmation

In [3]:
"""
Choose if fp16 and define model
pip install accelerate==0.2.0
"""
# Model
if fp16_training:
    print('Accelerating')
    from accelerate import Accelerator
    accelerator = Accelerator()
    device = accelerator.device
    model = TransformerDecoderOnly(num_class=num_class)
else:
    model = TransformerDecoderOnly(num_class=num_class).to(device)
        
Model = model.model_type # Model name

"""
Init for models, learning rate, ...
"""
# Check path
if os.path.exists(f'Temp//{Model}_{stock_symbol}_LastTrainInfo.pk'):
    # Check Init
    if init:
        print("Init model")
        lr = lr
        last_epoch = 0
        min_val_loss = 10000
        loss_train = []
        loss_valid = []
    else:
        print('Load from last train epoch')
        with open(f'Temp//{Model}_class{num_class}_{stock_symbol}_LastTrainInfo.pk', 'rb') as f:
            last_train_info = pickle.load(f)
        lr = last_train_info['lr']
        last_epoch = last_train_info['epoch']
        min_val_loss = last_train_info['min val loss']
        model.load_state_dict(torch.load(f'Temp//{Model}_class{num_class}_{stock_symbol}_checkpoint_LastTrainModel.pt'))
        with open(f'Temp//{Model}_class{num_class}_{stock_symbol}_TrainValHistLoss.pk', 'rb') as f:
            loss_train_val = pickle.load(f)
        loss_train = loss_train_val['train']
        loss_valid = loss_train_val['valid']
else:
    print("Init model")
    lr = lr
    last_epoch = 0
    min_val_loss = 10000.0
    loss_train = []
    loss_valid = []
print(f'Last train epoch: {last_epoch}  '
        f'Last train lr: {lr}   '
        f'Min val loss: {min_val_loss}')

# Criterion and Optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.00001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=len(trainloader)*1, gamma=0.9)        

# Prepare
if fp16_training:
    print('Accelerate Prepare')    
    model, optimizer, trainloader, validloader, scheduler = \
        accelerator.prepare(model, optimizer, trainloader, validloader, scheduler)
        
# Check device
for name, param in model.named_parameters():
    print(f"Parameter '{name}' is on device: {param.device}")
    break

Accelerating
Init model
Last train epoch: 0  Last train lr: 0.001   Min val loss: 10000.0
Accelerate Prepare
Parameter 'embedding.weight' is on device: cuda:0


## Train

In [4]:
"""
--- Original ---------
batch_x: (batch_size, d_model, seqlen) 
src: (total_length, d_model, seq_len)
--- Input of model ---
batch_x: (batch_size, seq_len, d_model) -> use src.permute()
src: (total_length, seq_len, d_model)   -> use batch.permute()
"""
src = src.squeeze(2).unsqueeze(0).to(device)
for epoch in range(last_epoch, num_epochs):
    # Training phase
    model.train()
    loss_train_e = 0
    for batch_x, batch_y in tqdm(trainloader): 
        if not fp16_training:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)    
               
        batch_x = batch_x.permute(0, 2, 1)
        optimizer.zero_grad()
        
        outputs = model(batch_x)
        
    
        # Loss
        loss = criterion(outputs, batch_y)
        if fp16_training:
            accelerator.backward(loss)
        else:
            loss.backward()
        optimizer.step()
        if epoch > 50:
            scheduler.step()
        loss_train_e += loss.item()
        
    loss_train_e /= len(trainloader)
    loss_train.append(loss_train_e)
    
    loss_valid_e = 0
    with torch.no_grad():
        model.eval()
        for batch_x_val, batch_y_val in tqdm(validloader):
            # batch_x_val = mask(batch_x_val)
            if not fp16_training:
                batch_x_val = batch_x_val.to(device)
                batch_y_val = batch_y_val.to(device)
            batch_x_val = batch_x_val.permute(0, 2, 1)
            
            outputs_val = model(batch_x_val)
            loss = criterion(outputs_val, batch_y_val)
            loss_valid_e += loss.item()
        loss_valid_e /= len(validloader)
        loss_valid.append(loss_valid_e)
            
        torch.save(model.state_dict(), f'Temp/{Model}_class{num_class}_{stock_symbol}_checkpoint_LastTrainModel.pt')
        if loss_valid_e < min_val_loss:
            min_val_loss = loss_valid_e
            print(f'New best model found in epoch {epoch} with val loss: {min_val_loss}')
            torch.save(model.state_dict(), f'Model_Result/{Model}_class{num_class}_{stock_symbol}_best_model.pt')            
        if epoch % 50 == 0:
            pass
            # torch.save(model, f'ConformerResult/Conformerr_{stock_symbol}_checkpoint_{epoch}.pt')
            
    with open(f'Temp/{Model}_class{num_class}_{stock_symbol}_TrainValHistLoss.pk', 'wb') as f:
        pickle.dump({'train': loss_train, 'valid': loss_valid}, f)
    with open(f'Temp/{Model}_class{num_class}_{stock_symbol}_LastTrainInfo.pk', 'wb') as f:
        pickle.dump({'min val loss': min_val_loss, 'epoch': epoch, 'lr': optimizer.param_groups[0]['lr']}, f)
        
    # Print statistics
    print(f'Epoch [{epoch}/{num_epochs}]',
        f'Training Loss: {loss_train_e:.10f}',
        f'Valid Loss: {loss_valid_e:.10f}')

100%|██████████| 65/65 [00:27<00:00,  2.38it/s]
100%|██████████| 17/17 [00:02<00:00,  7.27it/s]


New best model found in epoch 0 with val loss: 4.802058163811179
Epoch [0/500] Training Loss: 6.7612539897 Valid Loss: 4.8020581638


100%|██████████| 65/65 [00:27<00:00,  2.39it/s]
100%|██████████| 17/17 [00:02<00:00,  7.45it/s]


New best model found in epoch 1 with val loss: 3.9283670870696796
Epoch [1/500] Training Loss: 5.3817783374 Valid Loss: 3.9283670871


100%|██████████| 65/65 [00:25<00:00,  2.58it/s]
100%|██████████| 17/17 [00:02<00:00,  6.79it/s]


Epoch [2/500] Training Loss: 4.4216169532 Valid Loss: 3.9749437360


100%|██████████| 65/65 [00:28<00:00,  2.28it/s]
100%|██████████| 17/17 [00:02<00:00,  7.58it/s]


New best model found in epoch 3 with val loss: 3.6457891814848957
Epoch [3/500] Training Loss: 3.9063395739 Valid Loss: 3.6457891815


100%|██████████| 65/65 [00:28<00:00,  2.28it/s]
100%|██████████| 17/17 [00:02<00:00,  7.75it/s]


New best model found in epoch 4 with val loss: 3.44402439103407
Epoch [4/500] Training Loss: 3.3885348403 Valid Loss: 3.4440243910


100%|██████████| 65/65 [00:29<00:00,  2.19it/s]
100%|██████████| 17/17 [00:02<00:00,  8.12it/s]


Epoch [5/500] Training Loss: 3.1015004048 Valid Loss: 3.4481418063


100%|██████████| 65/65 [00:26<00:00,  2.43it/s]
100%|██████████| 17/17 [00:02<00:00,  8.18it/s]


Epoch [6/500] Training Loss: 2.8381008662 Valid Loss: 3.5358751802


100%|██████████| 65/65 [00:26<00:00,  2.41it/s]
100%|██████████| 17/17 [00:02<00:00,  7.36it/s]


New best model found in epoch 7 with val loss: 3.438989288666669
Epoch [7/500] Training Loss: 2.7208757932 Valid Loss: 3.4389892887


100%|██████████| 65/65 [00:29<00:00,  2.17it/s]
100%|██████████| 17/17 [00:02<00:00,  7.18it/s]


New best model found in epoch 8 with val loss: 3.4004140601438633
Epoch [8/500] Training Loss: 2.6073050343 Valid Loss: 3.4004140601


100%|██████████| 65/65 [00:26<00:00,  2.46it/s]
100%|██████████| 17/17 [00:02<00:00,  7.24it/s]


New best model found in epoch 9 with val loss: 3.3696217046064487
Epoch [9/500] Training Loss: 2.5382995505 Valid Loss: 3.3696217046


100%|██████████| 65/65 [00:27<00:00,  2.40it/s]
100%|██████████| 17/17 [00:02<00:00,  6.19it/s]


Epoch [10/500] Training Loss: 2.3800497211 Valid Loss: 3.5346867968


100%|██████████| 65/65 [00:33<00:00,  1.97it/s]
100%|██████████| 17/17 [00:02<00:00,  6.61it/s]


Epoch [11/500] Training Loss: 2.3963333199 Valid Loss: 3.4133140760


100%|██████████| 65/65 [00:30<00:00,  2.12it/s]
100%|██████████| 17/17 [00:03<00:00,  4.26it/s]


New best model found in epoch 12 with val loss: 3.3571436825920555
Epoch [12/500] Training Loss: 2.2193017905 Valid Loss: 3.3571436826


100%|██████████| 65/65 [00:28<00:00,  2.31it/s]
100%|██████████| 17/17 [00:02<00:00,  6.96it/s]


Epoch [13/500] Training Loss: 2.2081146020 Valid Loss: 3.3640461459


100%|██████████| 65/65 [00:31<00:00,  2.09it/s]
100%|██████████| 17/17 [00:03<00:00,  5.34it/s]


New best model found in epoch 14 with val loss: 3.313370199764476
Epoch [14/500] Training Loss: 2.2039817095 Valid Loss: 3.3133701998


100%|██████████| 65/65 [00:34<00:00,  1.86it/s]
100%|██████████| 17/17 [00:03<00:00,  5.58it/s]


Epoch [15/500] Training Loss: 2.2043683162 Valid Loss: 3.3479923431


100%|██████████| 65/65 [00:35<00:00,  1.83it/s]
100%|██████████| 17/17 [00:02<00:00,  6.86it/s]


Epoch [16/500] Training Loss: 2.1747766476 Valid Loss: 3.3624612794


100%|██████████| 65/65 [00:32<00:00,  2.01it/s]
100%|██████████| 17/17 [00:02<00:00,  7.67it/s]


Epoch [17/500] Training Loss: 2.2151739515 Valid Loss: 3.4696638584


100%|██████████| 65/65 [00:33<00:00,  1.96it/s]
100%|██████████| 17/17 [00:02<00:00,  5.71it/s]


Epoch [18/500] Training Loss: 2.2020364908 Valid Loss: 3.4768973519


100%|██████████| 65/65 [00:31<00:00,  2.08it/s]
100%|██████████| 17/17 [00:02<00:00,  5.84it/s]


Epoch [19/500] Training Loss: 2.1996129118 Valid Loss: 3.4123303469


100%|██████████| 65/65 [00:31<00:00,  2.07it/s]
100%|██████████| 17/17 [00:02<00:00,  5.78it/s]


Epoch [20/500] Training Loss: 2.2081970948 Valid Loss: 3.4076033901


100%|██████████| 65/65 [00:28<00:00,  2.26it/s]
100%|██████████| 17/17 [00:02<00:00,  7.03it/s]


Epoch [21/500] Training Loss: 2.1244901318 Valid Loss: 3.4354408068


100%|██████████| 65/65 [00:27<00:00,  2.33it/s]
100%|██████████| 17/17 [00:02<00:00,  7.72it/s]


Epoch [22/500] Training Loss: 2.1624132528 Valid Loss: 3.4048803133


100%|██████████| 65/65 [00:29<00:00,  2.17it/s]
100%|██████████| 17/17 [00:02<00:00,  7.12it/s]


Epoch [23/500] Training Loss: 2.1153532257 Valid Loss: 3.4347504518


100%|██████████| 65/65 [00:26<00:00,  2.50it/s]
100%|██████████| 17/17 [00:02<00:00,  7.85it/s]


Epoch [24/500] Training Loss: 2.1499823602 Valid Loss: 3.3912314247


100%|██████████| 65/65 [00:25<00:00,  2.51it/s]
100%|██████████| 17/17 [00:02<00:00,  7.62it/s]


Epoch [25/500] Training Loss: 2.1130768927 Valid Loss: 3.4474867021


100%|██████████| 65/65 [00:25<00:00,  2.57it/s]
100%|██████████| 17/17 [00:02<00:00,  7.47it/s]


Epoch [26/500] Training Loss: 2.1585642673 Valid Loss: 3.5158348224


100%|██████████| 65/65 [00:25<00:00,  2.53it/s]
100%|██████████| 17/17 [00:02<00:00,  6.71it/s]


Epoch [27/500] Training Loss: 2.1174029905 Valid Loss: 3.4587985137


100%|██████████| 65/65 [00:26<00:00,  2.45it/s]
100%|██████████| 17/17 [00:02<00:00,  7.92it/s]


Epoch [28/500] Training Loss: 2.1027837309 Valid Loss: 3.4400942185


100%|██████████| 65/65 [00:33<00:00,  1.92it/s]
100%|██████████| 17/17 [00:03<00:00,  5.05it/s]


Epoch [29/500] Training Loss: 2.0972872995 Valid Loss: 3.4603089445


 80%|████████  | 52/65 [00:32<00:07,  1.77it/s]