In [1]:
import pandas as pd

from datetime import datetime
import time

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
from torch.autograd import Variable 
import argparse

import seaborn as sns 
import matplotlib.pyplot as plt

#from collections import Counter
import torch.nn.utils as torch_utils

In [2]:
n_epochs = 1000
num_layers = 1
#fName='helpdsk'
#fName='Credit'
fName='BPI12'
#fName='BPI17'

savePath = './Result/'
modelName = 'RTPrediction_'
modelVer = 'v1'

if fName[:5]=='BPI12':
    batch_size=1000
    lr=0.0001
elif fName[:5]=='BPI15':
    batch_size=200
    lr=0.0001
elif fName[:5]=='BPI17':
    batch_size=1000
    lr=0.0001
elif fName=='helpdsk':
    batch_size=200
    lr=0.0001
elif fName=='Credit':
    batch_size=1000
    lr=0.001
else:
    batch_size=1000
    lr=0.0005



In [3]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, batch_size, output_dim=1, num_layers=2):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers
        # define the LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers, batch_first=True)
        self.lstm2 = nn.LSTM(self.hidden_dim, self.hidden_dim, self.num_layers, batch_first=True)
        # define the output layer
        self.bn = nn.BatchNorm1d(hidden_dim)
        self.fc = nn.Linear(self.hidden_dim, output_dim)
        
    def init_hidden(self):
        # initialize hidden states
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim).type(dtype),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim).type(dtype))
    
    def forward(self, input):
        input = input.type(dtype)
        #print('input shape',input.shape)
        lstm_out, self.hidden = self.lstm(input)
        #print('lstm out', lstm_out.shape)
        lstm_out = lstm_out.reshape(-1, lstm_out.shape[2])
        #print('shaped',lstm_out.shape)
        lstm_out = self.bn(lstm_out) # 배치정규화
        #print('out bn',lstm_out.shape)
        lstm_out = lstm_out.reshape(batch_size, int(lstm_out.shape[0]/batch_size), lstm_out.shape[1])
        lstm_out, self.hidden = self.lstm2(lstm_out)  
        lstm_out = lstm_out.reshape(-1, lstm_out.shape[2])
        lstm_out = self.bn(lstm_out)
        y_pred = self.fc(lstm_out)

        y_pred = y_pred.reshape(self.batch_size, -1)
        return y_pred[:,-1], self.hidden
        
        

In [4]:
def buildOHE(index,n): # 원핫 인코딩
    L=[0]*n
    L[index]=1
    return L

def buildOHE2(index,arr): # 집약패턴 인코딩(이전 정보 반영)
    arr[index]=1
    return arr

In [5]:
def load_dataset(name):

    if name=="BPI12":
        return _load_dataset_name("FinalData/bpi_12_w.csv")
    elif name=="BPI15":
        return _load_dataset_name("FinalData/BPI15_1_Final_Nava.csv")
    elif name=="BPI17":
        return _load_dataset_name("FinalData/BPI2017_M3000_Final_Nava.csv")
    elif name=='helpdsk':
        return _load_dataset_name("FinalData/helpdesk_N.csv")
    elif name=='Credit':
        return _load_dataset_name("FinalData/Credit_Final_Nava.csv")
    elif name=='Invoice':
        return _load_dataset_name("FinalData/Invoice_Final_Nava.csv")

    
def _load_dataset_name(fName):
    dataframe = pd.read_csv(fName, header=0)
    dataframe = dataframe.replace(r's+', 'empty', regex=True)
    dataframe = dataframe.fillna(0)
    dataset=dataframe.values
    
    print(dataframe[:20]) ########################################################3
    
    global gridNum
    
    values = []
    for i in range(dataset.shape[1]):
        values.append(len(np.unique(dataset[:, i])) )
    print(values)

    elems_per_fold = int(values[0] / 5)
    datasetTR = dataset[dataset[:,0]<4*elems_per_fold]
    datasetTS = dataset[dataset[:,0]>=4*elems_per_fold]       
    print('Train : 0~',4*elems_per_fold," Test : ",4*elems_per_fold,'~',len(datasetTR))
    
    return generate_set(datasetTR, values, dataframe.dtypes), generate_set(datasetTS, values, dataframe.dtypes)    
    
    
def generate_set(dataset, values, dfDtype):
    data=[]
    newdataset=[]
    temptarget=[] 
    aggregation=[]
    
    actOccurCnt=[0]*(values[1]+1)
    actOccurTime=[0]*(values[1]+1)
    loopStartPoint=0

    caseArr=[]
    caseX=''
        
    #analyze first dataset line
    caseID=dataset[0][0]
    event=dataset[0][1]
    
    # 트레이스 시작 시간, 이전 이벤트 타임스탬프, 해당 이벤트 타임스탬프 날짜의 자정, 2,3번의 차이
    starttime=datetime.fromtimestamp(time.mktime(time.strptime(dataset[0][2], "%Y-%m-%d %H:%M:%S")))
    lastevtime=starttime
    #t=time.strptime(dataset[0][2], "%Y-%m-%d %H:%M:%S")
    midnight = starttime.replace(hour=0, minute=0, second=0, microsecond=0)
    timesincemidnight = (starttime - midnight).total_seconds()
    
    # 초기화 겸 첫 이벤트 값 처리
    n=1
    temptarget.append(starttime) ############# 이부분이 왜 starttime이 들어가는지 까먹었다
    a=[0,0,timesincemidnight,starttime.weekday(),0] # 첫 이벤트는 경과시간이 0이므로 0으로 초기화
    #[경과시간,이전이벤트로부터 경과시간, 자정으로부터경과시간, 요일, 반복실행으로부터경과시간]
    idNum = int(dataset[0][1][1:])
    aggregation=(buildOHE(idNum, values[1]+1)) ##0427##1
    actOccurCnt[idNum]+=1 # 액티비티의 실행 횟수 카운트
    actOccurTime[idNum]=starttime # 해당 액티비티의 첫 등장 시간 저장


    field = 3
    b=[]
    for i in dataset[0][3:]:
        if np.issubdtype(dfDtype[field], np.number):
            a.append(i)
        else:
            b.extend(buildOHE(int(i[1:]), values[field]+1))
        field+=1
    
    # 벡터에서 어느 파트까지가 스케일링이고 아이디고 인코딩인지 표시
    vectorPoint=[len(a),len(aggregation),len(b)]
    
    a.extend(aggregation)
    a.extend(b)
    newdataset.append(a)

        
    for line in dataset[1:,:]:
        case=line[0]
        if case==caseID: #이전 이벤트랑 같은 트레이스 소속일 경우
            #t = time.strptime(line[2], "%Y-%m-%d %H:%M:%S")
            nowtime=datetime.fromtimestamp(time.mktime(time.strptime(line[2], "%Y-%m-%d %H:%M:%S")))
            midnight = nowtime.replace(hour=0, minute=0, second=0, microsecond=0)
            temptarget.append(datetime.fromtimestamp(time.mktime(time.strptime(line[2], "%Y-%m-%d %H:%M:%S"))))  ##############
            
            a=[(nowtime- starttime).total_seconds()]
            a.append((nowtime- lastevtime).total_seconds())
            a.append((nowtime - midnight).total_seconds())   
            a.append(nowtime.weekday())
            
            idNum = int(line[1][1:])
            aggregation = (buildOHE2(idNum, aggregation)) ##0427##
            
            actOccurCnt[idNum]+=1 # 액티비티의 실행 횟수 카운트
            if actOccurCnt[idNum]==1: # 해당 액티비티가 처음 등장했을 경우, 실행시간을 기록
                actOccurTime[idNum]=nowtime 
            if actOccurCnt[idNum]>1: # 해당 액티비티가 반복 실행된 액티비티일 경우
                if loopStartPoint==0: # 반복적 제어흐름의 첫 반복일 경우
                    loopStartPoint = actOccurTime[idNum] # 반복적 제어흐름의 시작시간 저장
                a.append((nowtime-loopStartPoint).total_seconds()) # 반복적 제어흐름 시작으로부터의 경과시간 계산 및 특징 추가
            else:
                loopStartPoint=0 # 반복적 제어흐름이 아닐경우 해당 변수 0으로 초기화
                a.append(0) # 반복적 제어흐름으로부터의 경과시간 값으로 0 삽입
            
            field=3
            b=[] # 스케일링 값은 a에 인코딩은 b에 그리고 나중에 a로 통합
            for i in line[3:]:
                if np.issubdtype(dfDtype[field], np.number):
                    a.append(i)
                else:
                    b.extend(buildOHE(int(line[field][1:]), values[field]+1))
                field+=1
                
            a.extend(aggregation)
            a.extend(b)
            newdataset.append(a)
            n+=1
            lastevtime = nowtime # 마지막 실행 이벤트 갱신
            finishtime = nowtime
            caseX=line[0]
                
            
        else: # 새로운 트레이스가 시작될때
            caseID=case
            for i in range(1,len(newdataset)):
                data.append(newdataset[:i+1])
                caseArr.append(caseX)
                
            actOccurCnt=[0]*(values[1]+1)
            actOccurTime=[0]*(values[1]+1)
            newdataset=[]
            starttime = datetime.fromtimestamp(time.mktime(time.strptime(line[2], "%Y-%m-%d %H:%M:%S")))
            
            #t = time.strptime(line[2], "%Y-%m-%d %H:%M:%S")
            midnight = starttime.replace(hour=0, minute=0, second=0, microsecond=0)
            timesincemidnight = (starttime - midnight).total_seconds()
            
            a=[0,0,timesincemidnight, starttime.weekday(),0]
            #[경과시간,이전이벤트로부터 경과시간, 자정으로부터경과시간, 요일, 반복실행으로부터경과시간]
            idNum = int(line[1][1:])
            aggregation=(buildOHE(int(line[1][1:]), values[1]+1))
            
            actOccurCnt[idNum]+=1 # 액티비티의 실행 횟수 카운트
            actOccurTime[idNum]=starttime # 해당 액티비티의 첫 등장 시간 저장


            field=3
            b=[]
            for i in line[3:]:
                if np.issubdtype(dfDtype[field], np.number):
                    a.append(i)
                else:
                    b.extend(buildOHE(int(line[field][1:]), values[field]+1))
                field+=1
            a.extend(aggregation)
            a.extend(b)
            newdataset.append(a)
                
            # 그동안 종료시간을 담아오다가 해당 트레이스 길이만큼 거꾸로 가면서 잔여시간 계산
            for i in range(n):
                temptarget[-(i+1)]=(finishtime-temptarget[-(i+1)]).total_seconds() 
            temptarget.pop() #마지막 종료된 상태는 예측에서 제외
            temptarget.append(starttime)
            
            lastevtime = starttime
            finishtime = starttime
            n = 1
        
    # 마지막 트레이스 처리파트
    for i in range(1,len(newdataset)):
        data.append(newdataset[:i+1])
        caseArr.append(caseX) ###0504
    
    for i in range(n):
        temptarget[-(i + 1)] = (finishtime - temptarget[-(i + 1)]).total_seconds()
    temptarget.pop()


    print(len(data))
    print("Generated dataset with n_samples:", len(temptarget))
    assert(len(temptarget)== len(data))
    print(vectorPoint)
    return data, temptarget, vectorPoint, caseArr

In [6]:
(train_x_origin, train_y_origin, vectorP, train_case),(test_x_origin, test_y_origin, vectorP, test_case)= load_dataset(fName)
#[스케일링파트, 아이디 인코딩 파트, 기타 속성 인코딩 파트] 각 파트의 길이

    CaseID Activity    CompleteTimestamp
0        1       a1  2011-10-01 19:45:00
1        1       a2  2011-10-01 20:17:00
2        1       a2   2011-10-09 0:32:00
3        1       a2  2011-10-10 19:33:00
4        1       a3  2011-10-13 18:37:00
5        2       a1  2011-10-01 19:43:00
6        2       a1  2011-10-01 22:35:00
7        2       a2  2011-10-01 22:36:00
8        2       a2   2011-10-04 0:56:00
9        2       a2   2011-10-04 0:57:00
10       2       a2  2011-10-10 18:14:00
11       2       a3  2011-10-10 19:30:00
12       2       a3  2011-10-10 22:14:00
13       2       a3  2011-10-10 22:17:00
14       3       a1  2011-10-01 19:35:00
15       3       a1  2011-10-03 19:21:00
16       3       a1  2011-10-03 21:32:00
17       3       a1  2011-10-03 21:40:00
18       3       a2  2011-10-03 21:44:00
19       3       a2   2011-10-11 0:47:00
[9658, 6, 48282]
Train : 0~ 7724  Test :  7724 ~ 59126
51403
Generated dataset with n_samples: 51403
[5, 7, 0]
11352
Generated dataset with

In [7]:
train_x_origin[9]
#경과시간, 이전이벤트로부터 경과시간, 자정으로부터 경과시간, 요일, 반복적제어흐름으로부터의 경과시간, [아이디 인코딩] , 기타 리소스

[[0, 0, 70980.0, 5, 0, 0, 1, 0, 0, 0, 0, 0],
 [10320.0, 10320.0, 81300.0, 5, 10320.0, 0, 1, 0, 0, 0, 0, 0],
 [10380.0, 60.0, 81360.0, 5, 0, 0, 1, 1, 0, 0, 0, 0],
 [191580.0, 181200.0, 3360.0, 1, 181200.0, 0, 1, 1, 0, 0, 0, 0],
 [191640.0, 60.0, 3420.0, 1, 181260.0, 0, 1, 1, 0, 0, 0, 0],
 [772260.0, 580620.0, 65640.0, 0, 761880.0, 0, 1, 1, 0, 0, 0, 0],
 [776820.0, 4560.0, 70200.0, 0, 0, 0, 1, 1, 1, 0, 0, 0]]

In [8]:
def vecToStr(vec):
    re=''
    for x in vec:
        re += str(x)
    return re

In [9]:
#집약패턴에 따른 평균 잔여시간 계산 파트

CRTdic={}
CRTvalue={}
totalRT=[] #모든 인스턴스(부분포함)의 잔여시간을 담을 리스트

preLen=99
for x,y in zip(train_x_origin,train_y_origin):
    if preLen>len(x): #인스턴스의 첫 이벤트일 경우
        cid = vecToStr(x[0][vectorP[0]:vectorP[0]+vectorP[1]])
        try:
            CRTdic[cid][0]+=y
            CRTdic[cid][1]+=1          
        except:
            CRTdic[cid]=[y,1] #총 잔여시간, 개수
            
    cid = vecToStr(x[-1][vectorP[0]:vectorP[0]+vectorP[1]])
    try:
        CRTdic[cid][0]+=y
        CRTdic[cid][1]+=1  
    except:
        CRTdic[cid]=[y,1] #총 잔여시간, 개수
    
    preLen=len(x)
    totalRT.append(y)

#잔여시간에 대한 최대 최소 및 사분위수 값 계산
totalQ = [np.quantile(totalRT,0.0),np.quantile(totalRT,0.25),np.quantile(totalRT,0.50),
                 np.quantile(totalRT,0.75),np.quantile(totalRT,1.0),np.mean(totalRT)]
#Min-Max 정규화 적용
for x in CRTdic:
    CRTvalue[x]=(totalQ[4]-int(CRTdic[x][0]/CRTdic[x][1]))/(totalQ[4]-totalQ[0])
#print(CRTvalue)

In [10]:
# 액티비티별 정규화 파트
# 액티비티는 업무수행의 기본 단위로 액티비티별로 다른 속성과 인과관계를 가지므로 개별 정규화 수행
idSpoint=vectorP[0]
idEpoint=vectorP[1]+vectorP[0]

tmpArr = [[] for i in range(vectorP[0])]
tmpArrY = [[] for i in range(vectorP[0])]

for ep in range(vectorP[0]):# 벡터 한칸씩 처리
    saveArr=[[] for i in range(vectorP[1])]
    for x in train_x_origin:  # 아이디별로 값 수집
        saveArr[x[-1][idSpoint:idEpoint].index(1)].append(x[-1][ep])
        saveArr[0].append(x[-1][ep])
            
    pointArr=[[]]
    for x in saveArr[1:]: # 최대최소 및 사분위 값 계산
        try:
            pointArr.append([np.quantile(x,0.0),np.quantile(x,0.25),np.quantile(x,0.50),np.quantile(x,0.75),np.quantile(x,1.0),np.mean(x), np.std(x)])
        except: # 처음 등장하는 이벤트의 경우 전체에대한 최대최소 값으로 정규화 수행
            pointArr.append([np.quantile(saveArr[0],0.0),np.quantile(saveArr[0],0.25),np.quantile(saveArr[0],0.50),np.quantile(saveArr[0],0.75),np.quantile(saveArr[0],1.0),np.mean(saveArr[0]), np.std(saveArr[0])])        
        

    for xi, x in enumerate(train_x_origin):
        for xxi ,xx in enumerate(x): # 이벤트 하나씩 처리
            idNum=xx[idSpoint:idEpoint].index(1)
            if not pointArr[idNum][4]==pointArr[idNum][0]: # 모든 값이 0 으로 되어있을 경우 또는 같은 값으로 이루어질경우
                tmpArr[ep].append((xx[ep]-pointArr[idNum][0])/(pointArr[idNum][4]-pointArr[idNum][0]))
            else:
                tmpArr[ep].append(0)
                
    for xi, x in enumerate(test_x_origin):
        for xxi ,xx in enumerate(x): # 이벤트 하나씩 처리
            idNum=xx[idSpoint:idEpoint].index(1)
            if not pointArr[idNum][4]==pointArr[idNum][0]: # 모든 값이 0 으로 되어있을 경우 또는 같은 값으로 이루어질경우
                tmpArrY[ep].append((xx[ep]-pointArr[idNum][0])/(pointArr[idNum][4]-pointArr[idNum][0]))
            else:
                tmpArrY[ep].append(0)
                
i=0
for xi, x in enumerate(train_x_origin):
    for xxi, xx in enumerate(x):
        tmp=[]
        for j in range(len(tmpArr)):
            tmp.append(tmpArr[j][i])
        train_x_origin[xi][xxi][:vectorP[0]]=tmp
        i+=1
i=0       
for xi, x in enumerate(test_x_origin):
    for xxi, xx in enumerate(x):
        tmp=[]
        for j in range(len(tmpArrY)):
            tmp.append(tmpArrY[j][i])
        test_x_origin[xi][xxi][:vectorP[0]]=tmp
        i+=1

In [11]:
# 목표값의 단위를 day로 변경
# 타 논문과 성능 비교를 위해 동일한 단위로 변경
train_y_origin = [y1/(3600*24) for y1 in train_y_origin]
test_y_origin = [y1/(3600*24) for y1 in test_y_origin]

In [12]:
# 입력벡터 패딩 (공백을 전방에 배치)
maxTraceLen = len(max(train_x_origin,key=len))
dicLen = len(train_x_origin[0][0])
dumyPadding = [0]*dicLen

# 실제값과 패딩값의 구분 마스크
train_mask=[]
test_mask=[]

changedArr = train_x_origin
for i in range(len(changedArr)):
    if len(changedArr[i]) < maxTraceLen:
        train_mask.append(len(changedArr[i]))
        for x in range(maxTraceLen-len(changedArr[i])):
            changedArr[i].insert(0,dumyPadding)
    elif len(changedArr[i]) > maxTraceLen:
        train_mask.append(maxTraceLen)
        changedArr[i] = changedArr[i][:maxTraceLen]
    else:
        train_mask.append(maxTraceLen)       
train_x_origin = changedArr

changedArr = test_x_origin
for i in range(len(changedArr)):
    if len(changedArr[i]) < maxTraceLen:
        test_mask.append(len(changedArr[i]))
        for x in range(maxTraceLen-len(changedArr[i])):
            changedArr[i].insert(0,dumyPadding)
    elif len(changedArr[i]) > maxTraceLen:
        test_mask.append(maxTraceLen)
        changedArr[i] = changedArr[i][:maxTraceLen]
    else:
        test_mask.append(maxTraceLen)
test_x_origin = changedArr

In [13]:
train_x=train_x_origin
train_y=train_y_origin

test_x=test_x_origin
test_y=test_y_origin

In [14]:
# 학습데이터셋의 1/8을 검증셋으로 사용 (총 학습70, 검증10, 테스트20)
Valkey = int(len(train_x)/10*8)+1
valid_x = train_x[Valkey:]
valid_y = train_y[Valkey:]
train_x = train_x[:Valkey]
train_y = train_y[:Valkey]

In [15]:
# 모델 파라미터 설정 및 모델생성

is_cuda = torch.cuda.is_available()
dict_size = len(train_x[0][0])
hidden_dim = vectorP[1]*2
#print(dict_size)
#print(hidden_dim)

model = LSTM(dict_size, hidden_dim, batch_size=batch_size, output_dim=1, num_layers=num_layers)
if is_cuda : model = model.cuda()
dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.float

criterion = nn.L1Loss() # MAE
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
minLoss=999
max_grad_norm=5

best_realV,best_predV=[],[]
best_epoch=0

train_loss=[]
valid_loss=[]

bestMAE=[999,0]

for epoch in range(1, n_epochs + 1):
    model.train()
    lossSum =0
    lossCnt =0
    for i in range(0,len(train_x),batch_size):
        if i+batch_size > len(train_x): break
        else:
            x = torch.tensor(train_x[i:i+batch_size])
            y = torch.tensor(train_y[i:i+batch_size])
            m = train_mask[i:i+batch_size]
         
            x = Variable(x)
            y = Variable(y)

            if is_cuda: x=x.cuda(); y=y.cuda()
            optimizer.zero_grad() # Clears existing gradients from previous epoch
            output, hidden = model(x.float())
            
            output.type(dtype)
            
            loss = criterion(output, y.view(-1).float())    #train       
            loss.backward(retain_graph=True) # Does backpropagation and calculates gradients
            torch_utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step() # Updates the weights accordingly    
            lossSum += loss.item()
            lossCnt += 1

    train_loss.append(lossSum/lossCnt)     
    print('[[{}/{}]]\ttraining : '.format(epoch,n_epochs),round(lossSum/lossCnt,4), end='\t')
    
    model.eval()
    lossSum =0
    lossCnt =0
    realV=[]
    predV=[]
    for i in range(0,len(valid_x),batch_size):
        if i+batch_size > len(valid_x): break
        else:
            x = torch.tensor(valid_x[i:i+batch_size])
            y = torch.tensor(valid_y[i:i+batch_size])
            m = test_mask[i:i+batch_size]
                
            x = Variable(x)
            y = Variable(y)

            if is_cuda: x=x.cuda(); y=y.cuda()
            output, hidden = model(x.float())   
            loss = criterion(output, y.view(-1).float())    #train   
            
            realV+=y.view(-1).tolist()
            predV+=output.view(-1).tolist()
           
            lossSum += loss.item()
            lossCnt += 1
    
    

    if bestMAE[0]>lossSum/lossCnt:
        bestMAE[0]=lossSum/lossCnt
        bestMAE[1]=epoch
        
    print(' // validation : ',round(lossSum/lossCnt,4), end='\t')    
    valid_loss.append(lossSum/lossCnt)
    
    if minLoss>(lossSum/lossCnt):
        print('****',str(epoch),'****')
        torch.save(model.state_dict(), savePath+modelName+fName+'_'+modelVer+".pth")
        best_realV=realV
        best_predV=predV
        best_epoch=epoch
        minLoss=lossSum/lossCnt
    else:
        print('')

[[1/1000]]	training :  12.207	 // validation :  11.4577	**** 1 ****
[[2/1000]]	training :  11.8428	 // validation :  11.5041	
[[3/1000]]	training :  11.5115	 // validation :  11.435	**** 3 ****
[[4/1000]]	training :  11.2076	 // validation :  11.2463	**** 4 ****
[[5/1000]]	training :  10.8719	 // validation :  10.9478	**** 5 ****
[[6/1000]]	training :  10.5279	 // validation :  10.6189	**** 6 ****
[[7/1000]]	training :  10.2758	 // validation :  10.3496	**** 7 ****
[[8/1000]]	training :  10.0841	 // validation :  10.1207	**** 8 ****
[[9/1000]]	training :  9.924	 // validation :  9.9303	**** 9 ****
[[10/1000]]	training :  9.7777	 // validation :  9.7655	**** 10 ****
[[11/1000]]	training :  9.6322	 // validation :  9.6142	**** 11 ****
[[12/1000]]	training :  9.4769	 // validation :  9.4923	**** 12 ****
[[13/1000]]	training :  9.3415	 // validation :  9.4038	**** 13 ****
[[14/1000]]	training :  9.2376	 // validation :  9.3251	**** 14 ****
[[15/1000]]	training :  9.1478	 // validation :  9

[[139/1000]]	training :  7.7121	 // validation :  9.3237	
[[140/1000]]	training :  7.7107	 // validation :  9.3218	
[[141/1000]]	training :  7.7093	 // validation :  9.3201	
[[142/1000]]	training :  7.7079	 // validation :  9.3175	
[[143/1000]]	training :  7.7062	 // validation :  9.316	
[[144/1000]]	training :  7.7049	 // validation :  9.3189	
[[145/1000]]	training :  7.7035	 // validation :  9.3048	
[[146/1000]]	training :  7.702	 // validation :  9.3079	
[[147/1000]]	training :  7.7006	 // validation :  9.307	
[[148/1000]]	training :  7.6991	 // validation :  9.307	
[[149/1000]]	training :  7.6975	 // validation :  9.2959	
[[150/1000]]	training :  7.6962	 // validation :  9.2958	
[[151/1000]]	training :  7.6948	 // validation :  9.2941	
[[152/1000]]	training :  7.6933	 // validation :  9.2955	
[[153/1000]]	training :  7.6919	 // validation :  9.2898	
[[154/1000]]	training :  7.6905	 // validation :  9.2854	
[[155/1000]]	training :  7.6893	 // validation :  9.2854	
[[156/1000]]	train

[[281/1000]]	training :  7.5601	 // validation :  9.1745	
[[282/1000]]	training :  7.5596	 // validation :  9.1661	
[[283/1000]]	training :  7.5585	 // validation :  9.1611	
[[284/1000]]	training :  7.5581	 // validation :  9.1504	
[[285/1000]]	training :  7.5579	 // validation :  9.1561	
[[286/1000]]	training :  7.5569	 // validation :  9.1571	
[[287/1000]]	training :  7.5556	 // validation :  9.163	
[[288/1000]]	training :  7.5549	 // validation :  9.1609	
[[289/1000]]	training :  7.554	 // validation :  9.1566	
[[290/1000]]	training :  7.5533	 // validation :  9.1721	
[[291/1000]]	training :  7.5526	 // validation :  9.1674	
[[292/1000]]	training :  7.5513	 // validation :  9.1664	
[[293/1000]]	training :  7.5507	 // validation :  9.1761	
[[294/1000]]	training :  7.5499	 // validation :  9.1832	
[[295/1000]]	training :  7.5489	 // validation :  9.187	
[[296/1000]]	training :  7.5483	 // validation :  9.1858	
[[297/1000]]	training :  7.5479	 // validation :  9.1842	
[[298/1000]]	trai

[[423/1000]]	training :  7.4594	 // validation :  9.2461	
[[424/1000]]	training :  7.4586	 // validation :  9.2483	
[[425/1000]]	training :  7.4577	 // validation :  9.2484	
[[426/1000]]	training :  7.4567	 // validation :  9.2501	
[[427/1000]]	training :  7.4559	 // validation :  9.2462	
[[428/1000]]	training :  7.4548	 // validation :  9.2444	
[[429/1000]]	training :  7.4543	 // validation :  9.244	
[[430/1000]]	training :  7.4538	 // validation :  9.2418	
[[431/1000]]	training :  7.4529	 // validation :  9.2374	
[[432/1000]]	training :  7.4527	 // validation :  9.2436	
[[433/1000]]	training :  7.4517	 // validation :  9.2379	
[[434/1000]]	training :  7.4513	 // validation :  9.2421	
[[435/1000]]	training :  7.4507	 // validation :  9.2468	
[[436/1000]]	training :  7.4504	 // validation :  9.251	
[[437/1000]]	training :  7.4494	 // validation :  9.2534	
[[438/1000]]	training :  7.4489	 // validation :  9.2601	
[[439/1000]]	training :  7.4485	 // validation :  9.2636	
[[440/1000]]	tra

[[564/1000]]	training :  7.3767	 // validation :  10.2917	
[[565/1000]]	training :  7.3764	 // validation :  10.3282	
[[566/1000]]	training :  7.3764	 // validation :  10.4292	
[[567/1000]]	training :  7.3754	 // validation :  10.519	
[[568/1000]]	training :  7.3753	 // validation :  10.6299	
[[569/1000]]	training :  7.3758	 // validation :  10.6887	
[[570/1000]]	training :  7.3754	 // validation :  10.7279	
[[571/1000]]	training :  7.3749	 // validation :  11.0006	
[[572/1000]]	training :  7.3737	 // validation :  10.866	
[[573/1000]]	training :  7.3731	 // validation :  10.9614	
[[574/1000]]	training :  7.3722	 // validation :  10.9858	
[[575/1000]]	training :  7.3718	 // validation :  11.0027	
[[576/1000]]	training :  7.371	 // validation :  11.1966	
[[577/1000]]	training :  7.3704	 // validation :  11.3023	
[[578/1000]]	training :  7.3708	 // validation :  12.1426	
[[579/1000]]	training :  7.3707	 // validation :  12.143	
[[580/1000]]	training :  7.3702	 // validation :  11.9479	
[

In [None]:
# 학습 결과 검증 그래프
print(fName)
print('learning Rate : ',lr)
print('layer : ',num_layers)
print(bestMAE)

print('hidden_dim : ',hidden_dim)
plt.scatter(best_realV, best_predV,s=0.3)
plt.show

In [None]:
# 반복학습에 따른 오차 선그래프
plt.plot(train_loss)
plt.plot(valid_loss)
plt.show()

In [None]:
model.load_state_dict(torch.load(savePath+modelName+fName+'_'+modelVer+".pth"))

minLoss=999
model.eval()
lossSum, lossCnt = 0, 0
realV, predV, maskV, caseV=[],[],[],[]

for i in range(0,len(test_x),batch_size):
    if i+batch_size > len(test_x): break
    else:
        x = torch.tensor(test_x[i:i+batch_size])
        y = torch.tensor(test_y[i:i+batch_size])
        m = test_mask[i:i+batch_size]
        cid = test_case[i:i+batch_size]
                
        x = Variable(x)
        y = Variable(y)

        if is_cuda: x=x.cuda(); y=y.cuda()
        output, hidden = model(x.float())   
        loss = criterion(output, y.view(-1).float())    #train   
            
        realV+=y.view(-1).tolist()
        predV+=output.view(-1).tolist()
        maskV+=m
        caseV+=cid
           
        lossSum += loss.item()
        lossCnt += 1
        
print('test : ',round(lossSum/lossCnt,4))

In [None]:
print(fName)
print('learning Rate : ',lr)
print('layer : ',num_layers)
print(bestMAE[0])
print('hidden_dim : ',hidden_dim)

plt.scatter(realV, predV,s=0.3)
plt.show

In [None]:
prefixResult=[0]*maxTraceLen
prefixCount=[0]*maxTraceLen
for rv,pv,mv in zip(realV,predV,maskV):
    if mv>=maxTraceLen:
        prefixResult[maxTraceLen-1]+=abs(rv-pv)
        prefixCount[maxTraceLen-1]+=1
    else:
        prefixResult[mv]+=abs(rv-pv)
        prefixCount[mv]+=1
totalMae=sum(prefixResult)/sum(prefixCount)
print('totalMAE : ',totalMae,' cnt : ',sum(prefixCount))
    
for i in range(maxTraceLen):
    if prefixCount[i]==0:
        print(str(i), '\t : ' ,prefixResult[i])
    else:     
        print(str(i), '\t : ' ,prefixResult[i]/prefixCount[i],' cnt : ',prefixCount[i])
        prefixResult[i]=prefixResult[i]/prefixCount[i]
