# import tool

In [1]:
# git add ./
import json
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils

# import argparse

In [2]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--d", default = "Glove", type=str)
parser.add_argument("--t", default = True, type=bool)
parser.add_argument("--maxLen", default =20, type=int)
parser.add_argument("--BatchSize", default =3, type=int)
parser.add_argument("--embedLen", default = 300, type=int)
parser.add_argument("--EPOCH", default = 2, type=int)
parser.add_argument("--DEVICE", default = "cuda:0", type=str)
parser.add_argument("--LR", default = 0.0001, type=float)
args = parser.parse_args(args=[]) 

In [3]:
args.d = "./數值資料/%s" % args.d

# read data in

In [4]:
def read_data(str_):
    path = '%s/%s' % (args.d, str_)
    with open(path) as json_file:        
        buf = json.load(json_file)
        
    if( args.t):
        buf = buf[:20]
        
    globals()[str_[:-5]] = buf
    print("assign %s" % str_[:-5])

In [5]:
if(args.t):
    read_data("normal_testing.json")
else:
    [read_data(i) for i in tqdm(os.listdir(args.d))]

assign normal_testing


# metadata 

In [6]:
# buf = [len(i) for i in normal_training]
# sns.distplot(buf, kde=False)

# prepare data Loader

In [7]:
def prepare_data_loader(data_, label_, shuffle_):
    assert type(data_) == list, "data is not list"
    assert type(label_) == int, "label_ is not int"
    assert type(shuffle_) == bool, "data is not bool"

    buf = [torch.Tensor(i[:args.maxLen]) for i in tqdm(data_) ]
    buf = rnn_utils.pad_sequence(buf, batch_first=True)
    print("共 %s 筆 \n每筆長度: %s \n每個時間點有 %s 維" % (len(buf), len(buf[0]), len(buf[0][0])))

    data = torch.FloatTensor(buf) 
    label = torch.LongTensor([label_]*len(buf)) 

    data_set = Data.TensorDataset(data, label)
    data_loader =  Data.DataLoader(dataset = data_set, 
                                   batch_size = args.BatchSize, 
                                   shuffle = shuffle_)
    
    return(data_loader)

In [8]:
train_loader = prepare_data_loader(data_ = normal_testing, 
                                   label_ = 0, 
                                   shuffle_ = True)

100%|██████████| 20/20 [00:00<00:00, 11503.85it/s]

共 20 筆 
每筆長度: 15 
每個時間點有 300 維





In [9]:
X, _ = next(iter(train_loader))


In [10]:
args.maxLen = min(len(X[0]), args.maxLen)

In [11]:
args.maxLen

15

# Define model

In [12]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # 測試: 每個大小的都只有兩個
        self.conv2 = nn.Conv1d(in_channels = args.embedLen, 
                               out_channels = 2, 
                               kernel_size = 2, 
                               bias = False)
        
        self.conv3 = nn.Conv1d(in_channels = args.embedLen, 
                               out_channels = 2, 
                               kernel_size = 3, 
                               bias = False)
    
        self.conv4 = nn.Conv1d(in_channels = args.embedLen, 
                               out_channels = 2, 
                               kernel_size = 4, 
                               bias = False)
        
        self.pool2 = nn.MaxPool1d(args.maxLen - 1 , stride = 1)
        self.pool3 = nn.MaxPool1d(args.maxLen - 2 , stride = 1)
        self.pool4 = nn.MaxPool1d(args.maxLen - 3 , stride = 1)
        
    def forward(self, x):
        x = x.permute(0, 2, 1)
        x_2 = self.conv2(x)
        x_3 = self.conv3(x)
        x_4 = self.conv4(x)
        
        p_2 = self.pool2(x_2)
        p_3 = self.pool3(x_3)
        p_4 = self.pool4(x_4)
        
        # concat + sequeeze # [batchsize, outputlayer, 1] -> [batchsize, outputlayer]

        p2, p3, p4 = p_2.squeeze(), p_3.squeeze(), p_4.squeeze()
        
        # concat p2 p3 p4 -> [batchsize, 3*outputlayer]
        concat = torch.cat(tensors = (p2, p3, p4), dim = 1)
        
        return concat

In [13]:
net = Net()
net = net.to(args.DEVICE)

In [14]:
buf = net(X.to(args.DEVICE))

In [15]:
buf

tensor([[0.2985, 0.3594, 0.3153, 0.3410, 0.2875, 0.2085],
        [0.2724, 0.2409, 0.5157, 0.2613, 0.2653, 0.1507],
        [0.3775, 0.3940, 0.1020, 0.1369, 0.3361, 0.0486]], device='cuda:0',
       grad_fn=<CatBackward>)

# optimize part

In [16]:
optimizer = torch.optim.Adam(net.parameters(), lr = args.LR)

# Customize Loss

In [46]:
class customLoss(nn.Module):
    def __init__(self, alpha):
        # --------------------------------------------
        # Initialization
        # --------------------------------------------
        super(customLoss, self).__init__()
        self.weight = alpha

    def forward(self, X):
        # --------------------------------------------
        # Define forward pass
        # --------------------------------------------
        # Transform targets to one-hot vector
        mean_buf = torch.mean(X, 0) # loss
        mean_bs = mean_buf.repeat(len(X), 1)
        MinusNorm_norm = torch.norm(X - mean_bs, dim=1)
        
        two_norm = torch.norm(X, dim=1)
        
        return torch.mean(MinusNorm_norm + self.weight*two_norm)  

In [47]:
criterion = customLoss(100)

In [48]:
criterion(buf)

tensor(72.2525, device='cuda:0', grad_fn=<MeanBackward0>)

# Training

In [49]:
for epoch in range(args.EPOCH):
    print("\n=== EPOCH %d/%d === %s ==="% (epoch+1, args.EPOCH, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    running_loss = 0.0
    
    for data in train_loader:
        optimizer.zero_grad()
        
        X, y = data
        outputs = net(X.to(args.DEVICE))
        loss = criterion(outputs)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        #break
    #break
    print( "loss: %.3f" % running_loss)


=== EPOCH 1/2 === 2021-01-21 21:28:48 ===
loss: 452.845

=== EPOCH 2/2 === 2021-01-21 21:28:48 ===
loss: 404.221
