In [20]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

In [3]:
train = pd.read_csv("prepared_data/train.csv")

In [4]:
grouped_and_sorted = train.groupby("uid").apply(lambda x: list(x.sort_values(by=["date"])["sid"]))

In [5]:
shuffled = grouped_and_sorted.sample(frac=1)

In [11]:
sequences = shuffled.values

In [17]:
rewards = [[1 for _ in s] for s in sequences]

In [29]:
def rolling_window(a, window):
    a = np.array(a)
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

In [40]:
sizes = [len(s) for s in sequences]

In [30]:
frame_size = 5

In [36]:
sequences_t = np.concatenate([rolling_window(i, frame_size + 1) for i in sequences], 0)

In [37]:
sequences_t.shape

(45122519, 6)

In [38]:
rewards_t = np.concatenate([rolling_window(i, frame_size + 1) for i in rewards], 0)

In [39]:
rewards_t.shape

(45122519, 6)

In [45]:
b_size = len(rewards_t)
sizes_t = torch.tensor(sizes)
done = torch.zeros(b_size)
done[torch.cumsum(sizes_t - frame_size, dim=0) - 1] = 1

In [57]:
torch.cat([torch.tensor(sequences_t[0]), torch.tensor(rewards_t[0])], 0)

tensor([ 1305, 17155, 11281,  6970,  6690, 13649,     1,     1,     1,     1,
            1,     1])

In [52]:
len(sequences[0])

35

In [53]:
done[:35]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.])

In [82]:
class DQN(nn.Module):
    def __init__(
        self,
        action_n: int,
        embedding_dim: int,
        seq_size: int,
        ) -> None:
        super(DQN, self).__init__()
        
        self.action_embedding = nn.Embedding(action_n, embedding_dim)
        self.linears = nn.Sequential(
            nn.Linear(seq_size * embedding_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, action_n)
        )

    def forward(self, state):
        x = self.action_embedding(state) # [B, S] -> [B, S, F]
        x = x.view(state.size()[0], -1) # [B, S, F] -> [B, F]
        x = self.linears(x)
        return x

In [94]:
train["sid"].min()

0

In [83]:
model = DQN(action_n=len(train["sid"].unique()), embedding_dim=32, seq_size=5)

In [87]:
state = (torch.tensor(sequences_t[0])[:-1]).unsqueeze(0)
action = (torch.tensor(sequences_t[0])[-1]).unsqueeze(0)

In [88]:
q_values = model(state)

In [91]:
q_values[0][13649]

tensor(0.0278, grad_fn=<SelectBackward0>)

In [89]:
q_values.gather(1, action.unsqueeze(1)).squeeze(1)

tensor([0.0278], grad_fn=<SqueezeBackward1>)

In [5]:
import pickle
import numpy as np

with open("prepared_data/seq_dataset.pkl", "rb") as f:
    data = pickle.load(f)

In [6]:
list(data.keys())

['train', 'validation_tr', 'validation_te', 'test_tr', 'test_te']

In [25]:
train_len = np.array([len(i) for i in data["train"]])
validation_te_len = np.array([len(i) for i in data["validation_te"]])
test_te_len = np.array([len(i) for i in data["test_te"]])

In [32]:
np.median(train_len), np.quantile(train_len, 0.99), (train_len <= 512).sum() / len(train_len)

(61.0, 769.0, 0.9662341726759425)

In [33]:
np.median(validation_te_len), np.quantile(validation_te_len, 0.99), (validation_te_len <= 128).sum() / len(validation_te_len)

(12.0, 152.0, 0.982)

In [34]:
np.median(test_te_len), np.quantile(test_te_len, 0.99), (test_te_len <= 128).sum() / len(test_te_len)

(13.0, 155.0, 0.981475)