In [None]:
import sys
sys.path.append('../')
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import yfinance as yf
import numpy as np
import os
import pickle
from torchaudio.models import Conformer
import math
from torch import nn, Tensor
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import Normalizer, StandardScaler
from einops.layers.torch import Rearrange, Reduce
from utils import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_class = 2
stock_symbol = '5871.TW'
end_date = '2024-12-31'
init = False

# Init

In [None]:
with open('../DataLoader/dataloader.pk', 'rb') as f:
    data = pickle.load(f)
dataloader_train = data['trainloader']
dataloader_valid = data['validloader']
# dataloader_test = data['testloader']

# Define model
### Question
- Conformer include decoder?

In [None]:
# https://pytorch.org/tutorials/beginner/transformer_tutorial.html
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# https://zhuanlan.zhihu.com/p/348849092
class PatchEmbedding(nn.Module):
    def __init__(self, in_channels: int = 3, patch_size: int = 16, emb_size: int = 768):
        self.patch_size = patch_size
        super().__init__()
        self.projection = nn.Sequential(
            # 在s1 x s2切片中分解图像并将其平面化
            Rearrange('b c (h s1) (w s2) -> b (h w) (s1 s2 c)', s1=patch_size, s2=patch_size),
            nn.Linear(patch_size * patch_size * in_channels, emb_size)
        )
                
    def forward(self, x: Tensor) -> Tensor:
        x = self.projection(x)
        return x

# Resnet from Pytorch: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
# Resnet pretrain pytorch: https://pytorch.org/hub/pytorch_vision_resnet/
# https://medium.com/ching-i/%E5%8D%B7%E7%A9%8D%E7%A5%9E%E7%B6%93%E7%B6%B2%E7%B5%A1-cnn-%E7%B6%93%E5%85%B8%E6%A8%A1%E5%9E%8B-googlelenet-resnet-densenet-with-pytorch-code-1688015808d9
class bottleneck_block(nn.Module):
    # 輸出通道乘的倍數
    expansion = 4

    def __init__(self, in_channels, out_channels, stride, downsample):
        super(bottleneck_block, self).__init__()      
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)

        # 在 shortcut 時，若維度不一樣，要更改維度
        self.downsample = downsample 


    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out
    
class Res_Conformer_Unet(nn.Module):
    def __init__(self, net_block, layers, num_class, conformer = False, res = True):
        super(Res_Conformer_Unet, self).__init__()

        # =======
        # Unet
        self.in_channels = 64
        self.conv1 = nn.Conv2d(in_channels=5, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpooling = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self.net_block_layer(net_block, 64, layers[0])
        self.layer2 = self.net_block_layer(net_block, 128, layers[1], stride=2)
        self.layer3 = self.net_block_layer(net_block, 256, layers[2], stride=2)
        self.layer4 = self.net_block_layer(net_block, 512, layers[3], stride=2)
        
        self.avgpooling = nn.AvgPool2d(3, stride=1)        
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True)
        
        self.fc1 = nn.Linear(2048*2*2, 128)
        self.fc2 = nn.Linear(128, num_class)
        self.ln1 = nn.LayerNorm((5, 100, 100))
        
        
        # =======
        # Conformer
        self.positional_encode = PositionalEncoding(100)
        self.patch_embedding = PatchEmbedding(in_channels=5, patch_size=10, emb_size=500)
        self.conformer = Conformer(
            input_dim=500,
            num_heads=5,
            ffn_dim=128,
            num_layers=6,
            depthwise_conv_kernel_size=31)


    def net_block_layer(self, net_block, out_channels, num_blocks, stride=1):
        downsample = None

      # 在 shortcut 時，若維度不一樣，要更改維度
        if stride != 1 or self.in_channels != out_channels * net_block.expansion:
            downsample = nn.Sequential(nn.Conv2d(self.in_channels, out_channels * net_block.expansion, kernel_size=1, stride=stride, bias=False),
                      nn.BatchNorm2d(out_channels * net_block.expansion))

        layers = []
        layers.append(net_block(self.in_channels, out_channels, stride, downsample))
        if net_block.expansion != 1:
            self.in_channels = out_channels * net_block.expansion
        else:
            self.in_channels = out_channels

        for i in range(1, num_blocks):
            layers.append(net_block(self.in_channels, out_channels, 1, None))

        return nn.Sequential(*layers)
    
    def forward(self, x):
        """
        Input scale: (0, 255)
        Output scale: (0, 255)
        """
        
        x_i = x.clone()
        x_s = x.size()
        # =======
        # Conformer
        # x = x.view(x_s[0], x_s[1] * x_s[3], x_s[2])        
        # x = self.positional_encode(x)
        x = self.patch_embedding(x)
        lengths = torch.tensor([x.shape[1] for i in range(len(x))]).to(device)
        x, len_ = self.conformer(x, lengths)
        x = x.permute(0, 2, 1).view(x_s)
        
        x = self.ln1(x)
        x = x + x_i
        
        # =======
        # Res
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpooling(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpooling(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        return x



# Train

In [None]:
# !pip install accelerate==0.2.0
fp16_training = False

if fp16_training:
    print('Accelerating')
    from accelerate import Accelerator
    accelerator = Accelerator(fp16=True)
    device = accelerator.device   
    # Instantiate the model
    model = Res_Conformer_Unet(bottleneck_block, [3, 4, 23, 3], num_class)
else:
    model = Res_Conformer_Unet(bottleneck_block, [3, 4, 23, 3], num_class).to(device)

"""
x = 123
def global_var():
    global x
    x = "awesome"
    print(x)
print(x)
global_var()
"""

'\nx = 123\ndef global_var():\n    global x\n    x = "awesome"\n    print(x)\nprint(x)\nglobal_var()\n'

In [None]:
if os.path.exists(f'Temp//Conformer_{stock_symbol}_LastTrainInfo.pk'):
    if init:
        print("Init model")
        lr = 0.001
        last_epoch = 0
        min_val_loss = 10000.0
        loss_train = []
        loss_valid = []
    else:
        print('Load from last train epoch')
        with open(f'Temp//Conformer_{stock_symbol}_LastTrainInfo.pk', 'rb') as f:
            last_train_info = pickle.load(f)
        lr = last_train_info['lr']
        last_epoch = last_train_info['epoch']
        min_val_loss = last_train_info['min val loss']
        model.load_state_dict(torch.load(f'Temp//Conformer_{stock_symbol}_checkpoint_LastTrainModel.pt'))
        with open(f'Temp//Conformer_{stock_symbol}_TrainValHistLoss.pk', 'rb') as f:
            loss_train_val = pickle.load(f)
        loss_train = loss_train_val['train']
        loss_valid = loss_train_val['valid']
else:
    print("Init model")
    lr = 0.001
    last_epoch = 0
    min_val_loss = 10000.0
    loss_train = []
    loss_valid = []
    
print(f'Last train epoch: {last_epoch}  '
        f'Last train lr: {lr}   '
        f'Min val loss: {min_val_loss}')

Load from last train epoch
Last train epoch: 313  Last train lr: 5.233476330273609e-05   Min val loss: 0.00047581135945887686


In [None]:
import torch.optim as optim
import pickle

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.00001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=len(dataloader_train)*10, gamma=0.9)        

if fp16_training:
    print('Accelerate Prepare')
    """
    model, optimizer, dataloader_train, dataloader_valid, scheduler = \
        accelerator.prepare(model, optimizer, dataloader_train, dataloader_valid, scheduler)"""
    model, optimizer, dataloader_train, scheduler = accelerator.prepare(
            model, optimizer, dataloader_train, scheduler
        )

num_epochs = 1500
for epoch in range(last_epoch, num_epochs):
    # Training phase
    model.train()
    loss_train_e = 0
    for batch_x, batch_y in tqdm(dataloader_train):
        if not fp16_training:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)

        # Loss
        loss = criterion(outputs, batch_y)
        if fp16_training:
            accelerator.backward(loss)
        else:
            loss.backward()
        optimizer.step()
        scheduler.step()
        loss_train_e += loss.item()
        
    loss_train_e /= len(dataloader_train)
    loss_train.append(loss_train_e)
    
    loss_valid_e = 0
    with torch.no_grad():
        model.eval()
        for batch_x_val, batch_y_val in tqdm(dataloader_valid):
            if not fp16_training:
                batch_x_val = batch_x_val.to(device)
                batch_y_val = batch_y_val.to(device)
            outputs_val = model(batch_x_val)
            loss = criterion(outputs_val, batch_y_val)
            loss_valid_e += loss.item()
        loss_valid_e /= len(dataloader_valid)
        loss_valid.append(loss_valid_e)
            
        torch.save(model.state_dict(), f'Temp/Conformer_{stock_symbol}_checkpoint_LastTrainModel.pt')
        if loss_valid_e < min_val_loss:
            min_val_loss = loss_valid_e
            print(f'New best model found in epoch {epoch} with val loss: {min_val_loss}')
            torch.save(model.state_dict(), f'ConformerResult/Conformer_{stock_symbol}_best_model.pt')            
        if epoch % 50 == 0:
            pass
            # torch.save(model, f'ConformerResult/Conformerr_{stock_symbol}_checkpoint_{epoch}.pt')
            
    with open(f'Temp/Conformer_{stock_symbol}_TrainValHistLoss.pk', 'wb') as f:
        pickle.dump({'train': loss_train, 'valid': loss_valid}, f)
    with open(f'Temp/Conformer_{stock_symbol}_LastTrainInfo.pk', 'wb') as f:
        pickle.dump({'min val loss': min_val_loss, 'epoch': epoch, 'lr': optimizer.param_groups[0]['lr']}, f)
        
    # Print statistics
    print(f'Epoch [{epoch}/{num_epochs}]',
        f'Training Loss: {loss_train_e:.10f}',
        f'Valid Loss: {loss_valid_e:.10f}')

100%|██████████| 65/65 [00:52<00:00,  1.25it/s]
100%|██████████| 17/17 [00:03<00:00,  4.37it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [338/1500] Training Loss: 0.0000400423 Valid Loss: 0.0005468423


100%|██████████| 65/65 [00:52<00:00,  1.23it/s]
100%|██████████| 17/17 [00:03<00:00,  4.54it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [339/1500] Training Loss: 0.0000600113 Valid Loss: 0.0041233692


100%|██████████| 65/65 [00:52<00:00,  1.24it/s]
100%|██████████| 17/17 [00:03<00:00,  4.34it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [340/1500] Training Loss: 0.0000487394 Valid Loss: 0.0006000285


100%|██████████| 65/65 [00:52<00:00,  1.24it/s]
100%|██████████| 17/17 [00:03<00:00,  4.47it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [341/1500] Training Loss: 0.0000408137 Valid Loss: 0.0005676172


100%|██████████| 65/65 [00:52<00:00,  1.24it/s]
100%|██████████| 17/17 [00:03<00:00,  4.52it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [342/1500] Training Loss: 0.0000414890 Valid Loss: 0.0005707909


100%|██████████| 65/65 [00:52<00:00,  1.25it/s]
100%|██████████| 17/17 [00:03<00:00,  4.85it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [343/1500] Training Loss: 0.0000346739 Valid Loss: 0.0005721856


100%|██████████| 65/65 [00:52<00:00,  1.23it/s]
100%|██████████| 17/17 [00:03<00:00,  4.29it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [344/1500] Training Loss: 0.0000384841 Valid Loss: 0.0005602012


100%|██████████| 65/65 [00:52<00:00,  1.24it/s]
100%|██████████| 17/17 [00:03<00:00,  4.50it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [345/1500] Training Loss: 0.0000413994 Valid Loss: 0.0005465189


100%|██████████| 65/65 [00:52<00:00,  1.24it/s]
100%|██████████| 17/17 [00:03<00:00,  4.45it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [346/1500] Training Loss: 0.0000443283 Valid Loss: 0.0005301988


100%|██████████| 65/65 [00:51<00:00,  1.26it/s]
100%|██████████| 17/17 [00:03<00:00,  4.64it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [347/1500] Training Loss: 0.0000369518 Valid Loss: 0.0005443810


100%|██████████| 65/65 [00:51<00:00,  1.26it/s]
100%|██████████| 17/17 [00:03<00:00,  4.43it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [348/1500] Training Loss: 0.0000312582 Valid Loss: 0.0005713375


100%|██████████| 65/65 [00:51<00:00,  1.26it/s]
100%|██████████| 17/17 [00:03<00:00,  4.51it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [349/1500] Training Loss: 0.0000265447 Valid Loss: 0.0005611502


100%|██████████| 65/65 [00:51<00:00,  1.27it/s]
100%|██████████| 17/17 [00:03<00:00,  4.74it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [350/1500] Training Loss: 0.0000252316 Valid Loss: 0.0005667273


100%|██████████| 65/65 [00:51<00:00,  1.27it/s]
100%|██████████| 17/17 [00:03<00:00,  4.41it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [351/1500] Training Loss: 0.0000251860 Valid Loss: 0.0005546615


100%|██████████| 65/65 [00:51<00:00,  1.26it/s]
100%|██████████| 17/17 [00:03<00:00,  4.72it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [352/1500] Training Loss: 0.0000258081 Valid Loss: 0.0005465441


100%|██████████| 65/65 [00:51<00:00,  1.27it/s]
100%|██████████| 17/17 [00:03<00:00,  4.39it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [353/1500] Training Loss: 0.0000219461 Valid Loss: 0.0005306587


100%|██████████| 65/65 [00:51<00:00,  1.27it/s]
100%|██████████| 17/17 [00:03<00:00,  4.77it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [354/1500] Training Loss: 0.0000278400 Valid Loss: 0.0005320241


100%|██████████| 65/65 [00:51<00:00,  1.26it/s]
100%|██████████| 17/17 [00:03<00:00,  4.63it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [355/1500] Training Loss: 0.0000292094 Valid Loss: 0.0005381836


100%|██████████| 65/65 [00:51<00:00,  1.26it/s]
100%|██████████| 17/17 [00:03<00:00,  4.83it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [356/1500] Training Loss: 0.0000205073 Valid Loss: 0.0005410817


100%|██████████| 65/65 [00:52<00:00,  1.25it/s]
100%|██████████| 17/17 [00:03<00:00,  4.67it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [357/1500] Training Loss: 0.0000164997 Valid Loss: 0.0005400705


100%|██████████| 65/65 [00:52<00:00,  1.24it/s]
100%|██████████| 17/17 [00:03<00:00,  4.46it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [358/1500] Training Loss: 0.0000193876 Valid Loss: 0.0005470220


100%|██████████| 65/65 [00:52<00:00,  1.24it/s]
100%|██████████| 17/17 [00:03<00:00,  4.73it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [359/1500] Training Loss: 0.0000172970 Valid Loss: 0.0005457980


100%|██████████| 65/65 [00:51<00:00,  1.27it/s]
100%|██████████| 17/17 [00:03<00:00,  4.52it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [360/1500] Training Loss: 0.0000210812 Valid Loss: 0.0005470608


100%|██████████| 65/65 [00:50<00:00,  1.28it/s]
100%|██████████| 17/17 [00:03<00:00,  4.85it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [361/1500] Training Loss: 0.0000170415 Valid Loss: 0.0005405662


100%|██████████| 65/65 [00:44<00:00,  1.46it/s]
100%|██████████| 17/17 [00:01<00:00, 10.25it/s]
  0%|          | 0/65 [00:00<?, ?it/s]

Epoch [362/1500] Training Loss: 0.0000181677 Valid Loss: 0.0005243238


 94%|█████████▍| 61/65 [00:40<00:03,  1.20it/s]

KeyboardInterrupt: 

# Validate Model

In [None]:
def load_model():
    import torch
    model = torch.load(f'ConformerResult/Conformer_{stock_symbol}_best_model.pt')
    return model
model = load_model()

In [None]:

import gc
def test():
    dataloader = dataloader_test

    model.eval()
    s_pred = []
    s_true = []
    for x, y in tqdm(dataloader):
        y_pred = model(x)
        s_pred.append(y_pred.detach())
        s_true.append(y)
    y_pred_tensor = torch.concat(s_pred)
    y_test_tensor = torch.concat(s_true)
    accuracy = (torch.sign(y_pred_tensor) == torch.sign(y_test_tensor)).sum() / len(y_test_tensor)
    return y_pred_tensor, accuracy

y_pred, acc = test()
print(acc)

NameError: name 'dataloader_test' is not defined

In [None]:
# Derive y_pred and y_train_pred of shape(N, 2) and numpy type

y_pred_numpy = y_pred.cpu().numpy()

# predict with train set
y_train_pred = model(torch.tensor(X[-100:], dtype = torch.float32))
y_train_numpy = y_train_pred.detach().cpu().numpy()


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Scaling
prediction = pd.DataFrame(y_pred_numpy)
scaler = StandardScaler()
scaler.fit(y_train_numpy)
prediction = pd.DataFrame(scaler.transform(prediction))

# Get the predicted price of O and C and Prediction merge with complete data
prediction.columns = ['pred_do_1', 'pred_dc_1']
prediction['Date'] = date

true_and_pred = pd.merge(df.reset_index(), prediction, on = 'Date', how = 'left')
true_and_pred['pred_o'] = (true_and_pred['Open'] * (1 + true_and_pred['pred_do_1'])).shift(1)
true_and_pred['pred_c'] = (true_and_pred['Close'] * (1 + true_and_pred['pred_dc_1'])).shift(1)
true_and_pred['pred_oc'] = true_and_pred['pred_c'] - true_and_pred['pred_o']
true_and_pred['true_oc'] = true_and_pred['Close'] - true_and_pred['Open']

# Backtest
asset_list = []
df_backtest = true_and_pred[['Open', 'Close', 'true_oc', 'pred_oc']].dropna()
asset = 1
for index, (o, c, true, pred) in df_backtest.iterrows():
    if pred > 0:
        returns = true/o
        asset *= (1 + returns)
    asset_list.append(asset)

print(asset)
plt.plot(asset_list, label = 'resnet')
plt.plot(df_backtest.reset_index()['Close']/df_backtest['Close'].iloc[0], label = 'buy hold')
plt.legend()
plt.savefig('/ConformerResult/test_backtest.jpg')
# plt.show()