In [1]:
import pandas as pd
import numpy as np
import pickle
import os
import torch
import copy
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

In [2]:
### config
RANDOM_SEED = 14
SEED = 14
BATCH_SIZE = 64
NUM_WORKERS = 4
LR = 1e-4
EPOCHS = 10
WEIGHT_DECAY = 1e-2
num_epoch = 200
device = 'cuda:0'
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [3]:
class MyDataset(Dataset):
    def __init__(self, data):
        self.sequence = [s[0] for s in data]
        self.targets = [s[1] for s in data]
        self.mask = [(s[2]) for s in data]

    def __len__(self):
        return len(self.sequence)

    def __getitem__(self, idx):
        return self.sequence[idx], self.targets[idx], self.mask[idx]

In [4]:
class MyModel(torch.nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()

        self.encoder = torch.nn.TransformerEncoder(
            encoder_layer=torch.nn.TransformerEncoderLayer(d_model=8, nhead=4, dim_feedforward=128, dropout=0.1, activation='relu'),
            num_layers=6)


    def forward(self, x):
        x = self.encoder(x)
        return x

In [5]:
def collate_function(data):
    tmp_data = []
    tmp_target = []
    tmp_mask = []
    for i in range(len(data)):
        for itr2 in range(len(data[i][0])):
            for itr3 in range(len(data[i][0][itr2])):
                data[i][0][itr2][itr3] /= 16
                data[i][1][itr2][itr3] /= 16
        tmp_data.append(data[i][0])
        tmp_target.append(data[i][1])
        tmp_mask.append(data[i][2])
    return [torch.Tensor(tmp_data), torch.Tensor(tmp_target), tmp_mask]

In [6]:
f = open('./dataset.pickle', 'rb')
data = pickle.load(f)
f.close()
f = open('./dataset_mask.pickle', 'rb')
data_mask = pickle.load(f)
f.close()
f = open('./dataset_target.pickle', 'rb')
data_target = pickle.load(f)
f.close()

In [7]:
data_list = []
for itr1 in data_target.keys():
    data_list.append((data[str(itr1)], data_target[str(itr1)], data_mask[str(itr1)]))

dataset = MyDataset(data_list)
dataloader = DataLoader(dataset, batch_size = 32, shuffle = True, num_workers = NUM_WORKERS, collate_fn = collate_function)

In [8]:
model = MyModel()
model = model.to(device)

In [11]:
# print(len(dataloader))
for data in iter(dataloader):
    # print(inputs)
    # print(labels)
    # print(mask)
    print(len(data))
    break

3


In [12]:
encoder = torch.nn.Linear(16,8)
encoder = encoder.to(device)
decoder = torch.nn.Linear(8,16)
decoder = decoder.to(device)
softmax_fn = torch.nn.Softmax(dim=0)

In [13]:
for test in dataloader:
    print(test)
    break

[tensor([[[ 0.0000,  0.0000,  0.0000,  ...,  1.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  1.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  1.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  1.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  1.0000,  0.0000,  0.0000],
         [-0.0625, -0.0625, -0.0625,  ..., -0.0625, -0.0625, -0.0625]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [-0.0625, -0.0625, -0.0625,  ..., -0.0625, -0.0625, -0.0625]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  1.0000,  0.0000,  0.0000],
         [-0.0625, -0.0625, -0.0625,  ..., -

In [14]:
loss_fn = torch.nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
print(target)
print(input)
loss_fn(input, target)

tensor([[0.4994, 0.1406, 0.1566, 0.0599, 0.1436],
        [0.0839, 0.3648, 0.2633, 0.1661, 0.1219],
        [0.3424, 0.1231, 0.2603, 0.1792, 0.0950]])
tensor([[-0.8328,  1.8499,  0.5260,  0.5182,  0.3945],
        [ 0.6742,  0.5450,  1.3759, -0.1513, -0.3500],
        [-0.0338, -1.3396, -0.4590,  2.7691,  0.7530]], requires_grad=True)


tensor(2.2772, grad_fn=<DivBackward1>)

In [15]:
writer = SummaryWriter('./exp_cross')

In [16]:
# val_acc_history = []
# loss_history = []
# best_model_wts = copy.deepcopy(model.state_dict())
# best_finetune_model = copy.deepcopy(finetune.state_dict())
train_steps = 1
# This loss is for
criterion = torch.nn.CrossEntropyLoss()
criterion_2 = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR, betas=(0.9, 0.999), eps=1e-08, weight_decay=WEIGHT_DECAY)
for epoch in range(num_epoch):
    # tqdm.set_description('\nEpoch {}/{}'.format(epoch + 1, num_epochs))

    model.train()  # Set model to training mode
    loss = 0.0
        # Iterate over data.
    data_set = tqdm(iter(dataloader), desc = f'Epoch :{epoch}')
    for data in data_set:
        ## the label has been normalize in the collate_fn
        # for itr1 in range(len(data[0])):
        #     data[0][itr1] = torch.nn.functional.normalize(data[0][itr1])
        inputs = data[0].to('cuda:0')
        # finetune = finetune.to('cuda:0')
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # Get model outputs and calculate loss
        inputs = encoder(inputs)
        outputs = model(inputs)
        outputs = decoder(outputs)
        outputs = outputs.to('cpu')
        # print(len(outputs))
        for itr1 in range(len(outputs)):
            # target1 = softmax_fn(data[1][itr1][data[2][itr1][0]])
            # target2 = softmax_fn(data[1][itr1][data[2][itr1][1]])
            # target3 = softmax_fn(data[1][itr1][data[2][itr1][2]])
            ## the target has been normalize in the collate_fn
            target1 = data[1][itr1][data[2][itr1][0]]
            target2 = data[1][itr1][data[2][itr1][1]]
            target3 = data[1][itr1][data[2][itr1][2]]
            loss = criterion(torch.unsqueeze(outputs[itr1][data[2][itr1][0]], dim = 0), torch.unsqueeze(target1, dim = 0))
            loss += criterion(torch.unsqueeze(outputs[itr1][data[2][itr1][1]], dim = 0), torch.unsqueeze(target2, dim = 0))
            loss += criterion(torch.unsqueeze(outputs[itr1][data[2][itr1][2]], dim = 0), torch.unsqueeze(target3, dim = 0))

        loss.backward()
        optimizer.step()
        train_steps += 1

        if(train_steps % 100) == 0:
            data_set.set_description(f'Epoch :{epoch}' + f' loss :{round(loss.item(), 3)}')
            writer.add_scalar('training_loss', loss.item(), train_steps)
        if(train_steps % 10000) == 0:
            torch.save(model.state_dict(), './exp_cross/pretrain_' + str(train_steps) + '.model')
            torch.save(encoder.state_dict(), './exp_cross/encoder_' + str(train_steps) + '.model')
            torch.save(decoder.state_dict(), './exp_cross/decoder_' + str(train_steps) + '.model')
            torch.save(optimizer.state_dict(), './exp_cross/optimizer_' + str(train_steps) + '.model')
# load best model weights
# model.load_state_dict(best_model_wts)
# finetune.load_state_dict(best_finetune_model)



Epoch :0:   0%|          | 0/15564 [00:00<?, ?it/s]


NameError: name 'target1' is not defined