In [2]:
import math
import numpy as np
import torch
import torch.nn as nn
import torchvision
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader

In [3]:
learning_rate = 0.001
hidden_size1 = 50
hidden_size2 = 20
censor_weight = 0.1
time_weight = 1
num_epochs = 10
input_size = 102

In [4]:
sc_x = StandardScaler()
sc_time = StandardScaler()
class Standardized:
    def __call__(self, sample):
        inputs, time_label, censor_label = sample
        sample = sc_x.transform(inputs), sc_time.transform(time_label), censor_label
        return sample

class ToTensor:
    def __call__(self,sample):
        inputs, time_label, censor_label = sample
        return torch.from_numpy(inputs), torch.from_numpy(time_label), torch.from_numpy(censor_label)
        # return torch.Tensor(inputs), torch.Tensor(time_label), torch.Tensor(censor_label)


class OrderBookDataset(Dataset):
    def __init__(self, transform=None, path1=f'D:\Postgraduate_Course_Information/Director Peng/ML_Time_to_Execution/execution_torch_code/test_data/buy_x_train.csv',
                           path2=f'D:/Postgraduate_Course_Information/Director Peng/ML_Time_to_Execution/execution_torch_code/test_data/buy_y_train.csv'):
        # data loading
        x = np.loadtxt(path1, dtype=np.float32, delimiter=',')
        y = np.loadtxt(path2, dtype=np.float32, delimiter=',')
        
        # self.x = torch.from_numpy(x)
        # self.y_time = torch.unsqueeze(torch.from_numpy(y[:, 0]),dim=1)
        # self.y_censor = torch.unsqueeze(torch.from_numpy(y[:, 1]),dim=1)
        self.x = x
        self.y_time = np.log(y[:, 0]).reshape(-1,1)
        self.y_censor = y[:, 1].reshape(-1,1)      
        self.n_samples = y.shape[0]
        self.transform = transform
    def __getitem__(self, index, ):
        sample = self.x[index].reshape(-1,102), self.y_time[index].reshape(-1,1), self.y_censor[index].reshape(-1,1)
        if self.transform:
            sc_x.fit(self.x)
            sc_time.fit(self.y_time)
            sample = self.transform(sample)
        return sample
        # dataset

    def __len__(self):
        return self.n_samples

In [5]:
composed = torchvision.transforms.Compose([Standardized(),ToTensor()])
# composed = torchvision.transforms.Compose([ToTensor()])
dataset = OrderBookDataset(transform=composed)
dataloader = DataLoader(dataset=dataset, batch_size=50, shuffle=True, num_workers=0)

total_samples = len(dataset)
n_iterations = math.ceil(total_samples / 4)

In [8]:
inputs, time_label, censor_label = dataset[2]
inputs.size()

torch.Size([1, 102])

In [6]:
# model building
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_size=2):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size1)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size1, hidden_size2)
        self.l3 = nn.Linear(hidden_size2, num_size)
        self.norm1 = nn.BatchNorm1d(input_size)
        self.norm2 = nn.BatchNorm1d(hidden_size1)
        self.norm3 = nn.BatchNorm1d(hidden_size2)
        
        nn.init.xavier_uniform_(self.l1.weight)
        nn.init.xavier_uniform_(self.l2.weight)
        nn.init.xavier_uniform_(self.l3.weight)

    def forward(self, x):
        out = self.norm1(x)
        out = self.l1(out)
        out = self.norm2(out)
        out = self.relu(out)
        out = self.l2(out)
        out = self.norm3(out)
        out = self.relu(out)
        out = self.l3(out)
        return out


In [7]:
# loss function
model = NeuralNet(input_size, hidden_size1, hidden_size2)
criterion1 = nn.MSELoss()
criterion2 = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [11]:
a = list(enumerate(dataloader))
a[0].size()

In [9]:
# loop iteration
for epochs in range(num_epochs):
    for i, (inputs, time_label, censor_label) in enumerate(dataloader):
        # forward backward update
        inputs = torch.squeeze(inputs)
        # print(inputs.size())
        time_label = torch.squeeze(time_label,1)
        # print(time_label.size())
        censor_label = torch.squeeze(censor_label,1)
        # forward
        y_predicted = model(inputs)
        # print(y_predicted.size())
        out1 = y_predicted[:, 0] 
        out2 = y_predicted[:, 1]

        # print(censor_label.size())
        # print(torch.unsqueeze(torch.sigmoid(out1), dim=1).size())
        # print(torch.unsqueeze(torch.sigmoid(out1), dim=1))
        # print(torch.unsqueeze(censor_label, dim=1))


        loss1 = criterion1(out1, time_label)
        loss2 = criterion2(out2, censor_label)

        # print(loss1.size())
        # print(loss2.size())

        #loss = criterion1(y_predicted, time_label)
        loss = (time_weight * loss1 + censor_weight * loss2)
        # print(loss.size())
        # backward pass
        loss.backward()

        # update
        optimizer.step()
        optimizer.zero_grad()

        if (i + 1) % 30 == 0:
            print(f'epoch:{epochs + 1}, loss = {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


ValueError: Target size (torch.Size([30, 1])) must be the same as input size (torch.Size([30]))

In [8]:
# loop iteration
for epochs in range(num_epochs):
    for i, (inputs, time_label, censor_label) in enumerate(dataloader):
        # forward backward update
        inputs = torch.squeeze(inputs)
        # print(inputs.size())
        time_label = torch.squeeze(time_label,1)
        # print(time_label.size())
        censor_label = torch.squeeze(censor_label,1)
        # forward
        y_predicted = model(inputs)
        # print(y_predicted.size())
        out1 = torch.unsqueeze(y_predicted[:, 0],1)
        # print(out1.size())
        out2 = torch.unsqueeze(y_predicted[:, 1],1)

        # print(censor_label.size())
        # print(torch.unsqueeze(torch.sigmoid(out1), dim=1).size())
        # print(torch.unsqueeze(torch.sigmoid(out1), dim=1))
        # print(torch.unsqueeze(censor_label, dim=1))


        loss1 = criterion1(out1, time_label)
        loss2 = criterion2(out2, censor_label)

        # print(loss1.size())
        # print(loss2.size())

        # loss = criterion1(y_predicted, time_label)
        loss = (time_weight * loss1 + censor_weight * loss2)
        # print(loss.size())
        # backward pass
        loss.backward()

        # update
        optimizer.step()
        optimizer.zero_grad()

        if (i + 1) % 30 == 0:
            print(f'epoch:{epochs + 1}, loss = {loss.item():.4f}')

epoch:1, loss = 1.0909
epoch:1, loss = 1.0757
epoch:1, loss = 1.1086
epoch:1, loss = 1.1195
epoch:1, loss = 1.2189
epoch:1, loss = 1.0248
epoch:1, loss = 1.0543
epoch:1, loss = 0.8942
epoch:1, loss = 0.9429
epoch:2, loss = 0.7653
epoch:2, loss = 0.8082
epoch:2, loss = 0.7275
epoch:2, loss = 0.8951
epoch:2, loss = 1.3327
epoch:2, loss = 0.8477
epoch:2, loss = 0.8035
epoch:2, loss = 0.8568
epoch:2, loss = 0.8837
epoch:3, loss = 1.1029
epoch:3, loss = 1.1755
epoch:3, loss = 0.7664
epoch:3, loss = 0.9559
epoch:3, loss = 0.9884
epoch:3, loss = 1.0379
epoch:3, loss = 0.6356
epoch:3, loss = 0.9902
epoch:3, loss = 0.8851
epoch:4, loss = 0.9355
epoch:4, loss = 0.6988
epoch:4, loss = 1.2170
epoch:4, loss = 1.0475
epoch:4, loss = 0.8060
epoch:4, loss = 0.8609
epoch:4, loss = 1.0854
epoch:4, loss = 0.7107
epoch:4, loss = 0.8318
epoch:5, loss = 0.6062
epoch:5, loss = 1.0860
epoch:5, loss = 0.9039
epoch:5, loss = 1.1394
epoch:5, loss = 0.9231
epoch:5, loss = 0.6422
epoch:5, loss = 1.0528
epoch:5, lo

In [6]:
# test result
y_test = np.loadtxt(f'test_data/buy_y_test.csv', dtype=np.float32, delimiter=',')
x_test = np.loadtxt(f'test_data/buy_x_test.csv', dtype=np.float32, delimiter=',')
x_test = torch.from_numpy(sc_x.transform(x_test.reshape(-1,102)))
y_time_test = torch.from_numpy(sc_time.transform(np.log(y_test[:, 0].reshape(-1,1))))
y_censor_test = torch.from_numpy(y_test[:, 1].reshape(-1,1))


with torch.no_grad():
    y_predicted = model(x_test)
    y_time_predicted = y_predicted[:,0].reshape(-1,1)
    y_censor_predicted = y_predicted[:,1]
    y_predicted_cls = torch.sigmoid(y_censor_predicted).round()
    censor_acc = (y_predicted_cls == y_censor_test).sum()/float(y_censor_test.shape[0])
    # print(y_predicted_cls.eq(y_censor_test).sum())
    print(y_time_predicted)
    print(y_time_test)
    print(y_predicted_cls.size())
    print(y_censor_test.shape[0])
    time_loss = criterion2(y_time_predicted, y_time_test)
    print(y_predicted_cls)
    print(f'censor accuracy = {censor_acc:4f}, time loss = {time_loss:4f}')

OSError: test_data/buy_y_test.csv not found.

In [46]:
y_censor_predicted


tensor([0.5424, 0.6318, 0.5679,  ..., 0.5142, 0.5483, 0.5526])