# Set up

In [1]:
import requests
import json, os

nn_id = 'nn992798'  # put some key value you want to test

url = "{0}:{1}".format(os.environ['HOSTNAME'] , "8000")
print("done")

done


# Net & Version Create

In [2]:
####(1) 네트워크 생성 ####
resp = requests.post('http://' + url + '/api/v1/type/common/target/nninfo/nnid/' + nn_id + '/',
                     json={
                         "biz_cate": "MES",
                         "biz_sub_cate": "M60",
                         "nn_title" : "test",
                         "nn_desc": "test desc",
                         "use_flag" : "Y",
                         "dir": "purpose?",
                         "config": "N"
                     })
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

####(2) 버전 생성 ####
resp = requests.post('http://' + url + '/api/v1/type/common/target/nninfo/nnid/' + nn_id + '/version/',
                 json={
                     "nn_def_list_info_nn_id": "",
                     "nn_wf_ver_info": "test version info",
                     "condition": "1",
                     "active_flag": "Y"
                 })
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))


evaluation result : nn992798
evaluation result : nn992798


# Graph Flow 정의
CSV 데이터를 AutoEncoder 에 훈련시키는 Graph Flow 정의, Eval Node 는 정의하지 않음. 

In [3]:
# Work Flow 틀을 구성하도로고 지시한다. (정해진 틀을 강제로 생성)
resp = requests.post('http://' + url + '/api/v1/type/wf/target/init/mode/simple/' + nn_id +'/wfver/1/',
                     json={
                         "type": 'autoencoder_csv'
                     })
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

evaluation result : autoencoder_csv


# CSV Data Upload 
한글 CSV 데이터를 업로드 한다. 

In [4]:
return_dict = {}
return_dict['test'] = open('../../data/seq2seq_mansearch_3.csv', 'rb')

resp = requests.post('http://' + url + '/api/v1/type/wf/state/framedata/src/local/form/raw/prg/source/nnid/'+nn_id+'/ver/1/node/datasrc/',
                     files = return_dict)

data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

evaluation result : ['1 file upload success']


# Data Node 정의
데이터 처리에 관한 정의 CSV 파일을 Pandas 를 통해서 읽고 HDF5 로 변환, 별도의 전처리는 지정하지 않음 

In [5]:
# (1) Train Data Node 의 속성을 정의
# 어디서 Source 를 어떻게 가지고 올것인지 파라메터 정의 
resp = requests.put('http://' + url + '/api/v1/type/wf/state/framedata/src/local/form/raw/prg/source/nnid/'+nn_id+'/ver/1/node/datasrc/',
                     json={
                         "type": "csv",
                         "source_server": "local",
                         "source_sql": "all",
                     })

# 전처리는 어떤 것을 할지 정의
resp = requests.put('http://' + url + '/api/v1/type/wf/state/framedata/src/local/form/raw/prg/pre/nnid/'+nn_id+'/ver/1/node/datasrc/',
                      json={
                          "preprocess":  "none",
                      })
# 전처리가 완료된 데이터는 어디에 저장을 할지 
resp = requests.put('http://' + url + '/api/v1/type/wf/state/framedata/src/local/form/raw/prg/store/nnid/'+nn_id+'/ver/1/node/datasrc/',)

data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

evaluation result : /hoya_str_root/nn992798/1/datasrc


# Data Feeder 
Network 에서 훈련을 위해 원하는 형태로 데이터를 편집하여 제공하는 기능 수행 

In [6]:
# (2) Network 에 데이터를 Feed하는 Node 의 속성을 정의 
resp = requests.post('http://' + url + '/api/v1/type/wf/state/pre/detail/feed/src/frame/net/autoencoder/nnid/'+nn_id+'/ver/1/node/feed_train/',
                     json={
                         "encode_column" : "encode",
                         "encode_len" : 10,
                         "vocab_size" : 100,
                         "preprocess": "mecab",
                         "embed_type" : 'onehot'
                     })
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

evaluation result : {'encode_len': 10, 'vocab_size': 100, 'embed_type': 'onehot', 'encode_column': 'encode', 'preprocess': 'mecab'}


# AutoEncoder 정의 
n_hidden 의 경우 Encoder 부만 정의하면 Decoder부는 동일한 Vector 를 Revsere로 활용하도록 되어 있다. 

In [7]:
resp = requests.put('http://' + url + '/api/v1/type/wf/state/netconf/detail/autoencoder/nnid/' + nn_id + '/ver/1/node/netconf_node/',
                     json={
                        "learning_rate" : 0.01,
                        "iter" : 10,
                        "batch_size" : 10,
                        "examples_to_show" : 10,
                        "n_hidden" : [200, 100] 
                     })
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

evaluation result : {'n_hidden': [200, 100], 'model_path': '/hoya_model_root/nn992798/1/netconf_node', 'examples_to_show': 10, 'batch_size': 10, 'iter': 10, 'learning_rate': 0.01}


# Evaluation Process 

In [10]:
return_dict = {}
return_dict['test'] = open('../../data/seq2seq_mansearch_3.csv', 'rb')

resp = requests.post('http://' + url + '/api/v1/type/wf/state/framedata/src/local/form/raw/prg/source/nnid/'+nn_id+'/ver/1/node/evaldata/',
                     files = return_dict)

data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

# 데이터 - 소스 정의
resp = requests.put('http://' + url + '/api/v1/type/wf/state/framedata/src/local/form/raw/prg/source/nnid/'+nn_id+'/ver/1/node/evaldata/',
                     json={
                         "type": "csv",
                         "source_server": "local",
                         "source_sql": "all",
                     })
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

# 데이터 - 전처리 정의
resp = requests.put('http://' + url + '/api/v1/type/wf/state/framedata/src/local/form/raw/prg/pre/nnid/'+nn_id+'/ver/1/node/evaldata/',
                      json={
                          "preprocess":  "none",
                      })
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

# 데이터 - 저장 정의
resp = requests.put('http://' + url + '/api/v1/type/wf/state/framedata/src/local/form/raw/prg/store/nnid/'+nn_id+'/ver/1/node/evaldata/',)

data = json.loads(resp.json())
print("evaluation result : {0}".format(data))


# Feeder 정의
resp = requests.post('http://' + url + '/api/v1/type/wf/state/pre/detail/feed/src/frame/net/autoencoder/nnid/'+nn_id+'/ver/1/node/feed_test/',
                     json={
                         "encode_column" : "encode",
                         "encode_len" : 10,
                         "vocab_size" : 100,
                         "preprocess": "mecab",
                         "embed_type" : 'onehot'
                     })
print("evaluation result : {0}".format(data))

node_name = 'eval_node'
resp = requests.put('http://' + url + '/api/v1/type/wf/state/eval/nnid/'+nn_id+'/ver/1/node/eval_node/',
                    json={
                        "type": "regression",
                    })

data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

evaluation result : ['1 file upload success']
evaluation result : {'preprocess': 'none', 'source_type': 'local', 'type': 'csv', 'store_path': '/hoya_str_root/nn992798/1/evaldata', 'source_server': 'local', 'source_parse_type': 'raw', 'source_sql': 'all', 'source_path': '/hoya_src_root/nn992798/1/evaldata', 'max_sentence_len': 0, 'multi_node_flag': None}
evaluation result : none
evaluation result : /hoya_str_root/nn992798/1/evaldata
evaluation result : /hoya_str_root/nn992798/1/evaldata
evaluation result : {'type': 'regression'}


# Train 을 시작한다. 

In [11]:
# Run All Workflow
resp = requests.post('http://' + url + '/api/v1/type/runmanager/state/train/nnid/'+nn_id+'/ver/1/')
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

evaluation result : [None, None, 'nn992798_1_netconf_node', None, None, {'predicts': ['0.0758592523229', '0.0674713528844', '0.0718941205278', '0.0375144234205', '0.0492059903653', '0.0689190733116', '0.0676905085549', '0.0505031946516', '0.083598860566', '0.0792136743804', '0.0696840902304', '0.0693802716635', '0.050440701492', '0.0711381661197', '0.0698202273938', '0.0618019619526', '0.059608402151', '0.062082051313', '0.0738962940855', '0.0628718306067', '0.0689535031781', '0.0644796456354', '0.0644796456354', '0.0644796456354', '0.0644796456354', '0.0644796456354', '0.0764495248837', '0.0712957792263', '0.0462818626609', '0.0601843485101', '0.0660665875256', '0.0660665875256', '0.0771977370102', '0.0601337292964', '0.0570079267025', '0.0674450546397', '0.0723711110883', '0.069552457978', '0.0720435184209', '0.0720435184209', '0.0455785944256', '0.0723257975653', '0.0493712663849', '0.0471565560845', '0.0518585704431', '0.0364147820397', '0.0364147820397', '0.0366457388232', '0.0575

# Compressed Vector 를 구해보자 
해당 Vector 의 Size 는 "n_hidden" : [200, 100, 10]에서 정의한 마지막 Layer 의 사이즈가 될 것이다. 

In [12]:
resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/autoencoder/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "드래곤은 무엇인가?" ,
                           "type" : "encoder"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data[0][0:10]))

evaluation result : [0.9999985694885254, 0.16574741899967194, 0.9018165469169617, 6.857906555524096e-06, 0.918679416179657, 2.399779759798548e-06, 1.9542167137842625e-05, 0.9833639860153198, 0.9979786276817322, 2.8478970483941657e-12]


# 복원된 Vector 추출 
복원된 Vector는 사전 Filter 개념으로 이해하고 다른 네트워크의 인풋으로도 사용할 수 있다. 

In [13]:
resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/autoencoder/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "[이름]의 [직책] 조회" ,
                           "type" : "decoder"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data[0][0:10]))

resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/autoencoder/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "전혀 알 수 없는 내용" ,
                           "type" : "decoder"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data[0][0:10]))

evaluation result : [0.999984622001648, 4.1382523363608925e-07, 0.3294834494590759, 0.5718907713890076, 0.00045332525041885674, 0.01805294305086136, 0.016850726678967476, 0.012671600095927715, 0.9968369007110596, 0.19661033153533936]
evaluation result : [0.9999961853027344, 1.936152511916589e-05, 0.0025989681016653776, 0.7812536358833313, 0.009264248423278332, 0.9823628067970276, 0.5281972885131836, 0.08224803954362869, 0.9643268585205078, 0.0009392643696628511]


# 복원된 Vector 와  입력된 Vector 유사성 테스트 
입력된 Vector 와 복원된 Vector 의 유사성을 근거로 Anomaly Detection 을 수행한다 .<br>
위의 두개가 훈련에 사용한 데이터이고 나머지는 훈련에 사용하지 않은 데이터이다. 지기 복원율을 구하는 방식으로 테스트한 결과 현재로써는 챗봇에 어느정도 사용할 수 있을 것이라고 생각된다 .

In [14]:
resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/anomaly/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "[이름]의 [직책] 조회" }
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data)) 

resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/anomaly/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "[부서] [이름] [직급]"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/anomaly/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "[이름]의 [직책] 찾아줘"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/anomaly/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "가나다라마바사"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/anomaly/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "굉장히 이상한 내용 !!!!"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/anomaly/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "히키다타하아하아다가"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

resp = requests.post('http://' + url + '/api/v1/type/service/state/predict/type/anomaly/nnid/'+nn_id+'/ver/active/',
                     json={"input_data" : "하하하 너는 바보니?"}
                     )
data = json.loads(resp.json())
print("evaluation result : {0}".format(data))

evaluation result : 0.062082051313018805
evaluation result : 0.06013372929643224
evaluation result : 0.08085691921372795
evaluation result : 0.0844293961417415
evaluation result : 0.08431725381039012
evaluation result : 0.09268495685537848
evaluation result : 0.08681194806149906
