In [1]:
import glob, torch
import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt
#import seaborn as sns
device = 'cuda' if torch.cuda.is_available() else 'cpu'

leverage = 200
resolution = 100
block_size = 40
batch_size = 64
max_iters  = 1
eval_interval  = 500     
learning_rate  = 0.0003
eval_iters  = 300
n_embd  = 512                  
n_head = 8
n_layer  = 6
dropout = 0.2

## Load Dataset

In [2]:
def Data_Loader(path):
    df = pd.read_csv(path)
    df['date'] = pd.to_datetime(df['date'])
    df['lag'] = np.where(df['date'].diff(1).dt.total_seconds()> 4800, 1, 0)
    df['ticks'] = ((df['close'] - np.min(df['close'])) * leverage / np.min(df['close']) * resolution).astype(int)
    df = df.set_index('date')
    return df

text = []
for path in glob.glob("./data/*")[: 1]:
    text.append(-1)
    df, code = Data_Loader(path), path.replace('.csv', '').replace('\\', '/').split('/')[-1]
    text += list(df['ticks'])
    #plt.figure(figsize=[15, 3]); plt.scatter(df['ticks'].index, df['ticks'], s = 0.1); plt.xlabel(code); plt.ylabel('%'); plt.show()
len(text), max(text)

(2744, 190)

## Preprocessing

In [3]:
class Sampler():
    def __init__(self):
        pass
        
    def shift(self, x, vocab_size):
        if np.random.rand() > 0.5:
            x = np.array(x) + int(np.random.rand() * (vocab_size - np.max(x)))
        else:
            x = np.array(x) - int(np.random.rand() * np.min(x))
        return list(x)
        
    def fit(self, text, train_rate=0.9):
        the_chars = sorted(list(set(text)))
        vocab_size = len(the_chars)
        stoi, itos = {ch: i for i, ch in enumerate(the_chars)}, {i: ch for i, ch in enumerate(the_chars)}
        self.encode, self.decode = lambda s: [stoi[c] for c in s], lambda l: [itos[i] for i in l]  
        
        data = torch.tensor(self.encode(text), dtype=torch.long)
        n = int(train_rate * len(data))
        self.train_data, self.val_data = data[:n], data[n:]
        #population = []
        #for _ in range(1):
        #    for i in range(len(text) - block_size):
        #        x = text[i: i+block_size]
        #        if -1 not in x:
        #            population += self.shift(x, vocab_size)
        #sns.displot(population); plt.show()
        return the_chars, vocab_size

    def get_batch(self, split):
        data = self.train_data if split == "train" else self.val_data
        x, y = [], []
        while len(x) < batch_size:
            i = torch.randint(len(data) - block_size - 1, (1, ))[0]
            sample = data[i : i + 1 + block_size]
            if -1 not in sample:
                sample = self.shift(sample.detach().numpy(), vocab_size)
                x.append(sample[: -1]); y.append(sample[1: ])
        return torch.tensor(x).to(device), torch.tensor(y).to(device)

processor = Sampler()
the_chars, vocab_size = processor.fit(text, 0.9)
print("vocab_size:", vocab_size)

vocab_size: 184


In [4]:
x, y = processor.get_batch("train")
x.shape, y.shape

(torch.Size([64, 40]), torch.Size([64, 40]))

## Training

In [5]:
import torch.nn as nn
from torch.nn import functional as F

class Head(nn.Module):
    def __init__(self, head_size):
        super().__init__()
        self.key   = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        tril_def = torch.tril( torch.ones(block_size, block_size) )
        self.register_buffer('tril', tril_def)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, T, E = x.shape
        k = self.key(x)
        q = self.query(x)
        E2 = 64
        wei = q @ k.transpose(-2, -1) * E2 ** -0.5
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))   
        wei = F.softmax(wei, dim= -1)
        wei = self.dropout(wei)
        v   = self.value(x)
        out = wei @ v
        return out

class FeedForward(nn.Module):
    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )
        
    def forward(self, x):
        return self.net(x)

class MultiHeadAttention(nn.Module):
    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads ], dim = -1)
        out = self.proj(out)
        out = self.dropout(out)
        return out

class Block(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        head_size = n_embd // n_head
        self.sa   = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedForward( n_embd)
        self.ln1  = nn.LayerNorm(n_embd)
        self.ln2  = nn.LayerNorm(n_embd)
        
    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class GPT(nn.Module):
    def __init__(self):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.pos_emb_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd)        
        self.lm_ffw_head = nn.Linear(n_embd, vocab_size)
        
    def forward(self, idx, targets=None):
        B, T = idx.shape
        tok_emb = self.token_embedding_table(idx)      
        pos_emb = self.pos_emb_table(torch.arange(T, device=device))  
        x = tok_emb + pos_emb
        x = self.blocks(x)    
        x = self.ln_f(x)
        logits = self.lm_ffw_head(x)
        if targets is None:
            loss = None
        else:
            B, T, E  = logits.shape
            logits  = logits.view( B*T, E)
            targets = targets.view(B*T)
            loss    = F.cross_entropy(logits, targets)
        return logits, loss
        
    def generate(self, idx, max_new_tokens):
        input = len(idx[0])
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, loss = self(idx_cond)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim= -1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1)
        return idx[0][input:]

In [6]:
@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = processor.get_batch('split')
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

model = GPT()
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
for iter in range(max_iters):
    if iter % eval_interval == 0:
        losses = estimate_loss()
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")
    x, y = processor.get_batch('train')
    logits, loss = model(x, y)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

step 0: train loss 5.3231, val loss 5.3203


In [8]:
prompt = torch.tensor(processor.val_data[: block_size], dtype=torch.long, device=device)[None, ]
output = model.generate(prompt, max_new_tokens=5).tolist()
print('Ground True:', processor.val_data[block_size: block_size + 5].detach().numpy())
print('Prediction: ', np.array(processor.decode(output)))

  prompt = torch.tensor(processor.val_data[: block_size], dtype=torch.long, device=device)[None, ]


Input:  [163 164 165 166 168 168 168 168 171 170 168 169 167 171 170 167 168 170
 171 170 173 173 172 172 174 174 175 176 178 178 179 179 180 182 183 182
 181 179 181 181]
40
tensor([[163, 164, 165, 166, 168, 168, 168, 168, 171, 170, 168, 169, 167, 171,
         170, 167, 168, 170, 171, 170, 173, 173, 172, 172, 174, 174, 175, 176,
         178, 178, 179, 179, 180, 182, 183, 182, 181, 179, 181, 181,  24,  70,
         176,  45, 141]])
Ground True: [178 174 171 167 165]
Prediction:  [ 23  73 182  44 144]
