In [1]:
import torch
import random
import numpy as np
import pandas as pd
import main_data_alignment as mda

  _dtype_to_storage = {data_type(0).dtype: data_type for data_type in _storages}


In [2]:
# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
# Case 1. model = up
# upsampling: 수집 주기가 다른 두 데이터에 대하여 수집 주기가 짧은 데이터셋의 수집 시간을 기준으로 모든 NaN 값을 선택한 method를 기반으로 보간함 
config1 = {
        "model": 'up',
        "parameter": {
            "method": 'knn', # upsampling에서 사용할 alignment 방법, ['mean','knn'] 중 선택
            "n_neighbors": 5 # KNN의 이웃 개수, int(범위: 1~20 권장, method='mean' 선택시 None으로 설정) 
        }
}

# Case 2. model = down
# down sampling: 수집 주기가 다른 두 데이터의 공통 수집 시간에 해당하는 데이터에 대하여 한 개 이상의 변수가 NaN인 시점의 데이터를 모두 제거함
config2 = {
    "model": 'down',
    "parameter": None
}

# Case 3. model = RNN_AE
# RNN_AE: 수집 주기가 다른 두 데이터에 대하여 수집 주기가 짧은 데이터셋의 수집 시간을 기준으로 각 시점을 새로운 변수로 변환함
config3 = {
    "model": 'RNN_AE',
    "parameter": {
        "window_size": 10, # 모델의 input sequence 길이, int(default: 10, 범위: 0 이상 & 원래 데이터의 sequence 길이 이하)
        "emb_dim": 32, # 변환할 데이터의 차원, int(범위: 16~256)
        "num_epochs": 50, # 학습 epoch 횟수, int(범위: 1 이상, 수렴 여부 확인 후 적합하게 설정)
        "batch_size": 128, # batch 크기, int(범위: 1 이상, 컴퓨터 사양에 적합하게 설정)
        "learning_rate": 0.0001, # learning rate, float(default: 0.0001, 범위: 0.1 이하)
        "device": 'cpu' # 학습 환경, ["cuda", "cpu"] 중 선택
    }
}

In [4]:
dataset = {
    "data1": ["./data/sample_toy_1.csv", "index"], 
    "data2": ["./data/sample_toy_2.csv", "index"]
 }

In [5]:
def getXfromDataInfo(dataset):
    data1_info = dataset["data1"]
    data2_info = dataset["data2"]
    
    x1 = pd.read_csv(data1_info[0], index_col = data1_info[1]).add_prefix('data1_')
    x2 = pd.read_csv(data2_info[0], index_col = data1_info[1]).add_prefix('data2_')
    return x1, x2

In [6]:
x1, x2 = getXfromDataInfo(dataset)

In [7]:
x1

Unnamed: 0_level_0,data1_col1,data1_col2,data1_col3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.030990,0.027961,0.059248
1,0.037388,0.057986,0.094965
2,0.051436,0.086420,0.134013
3,0.079113,0.068896,0.117764
4,0.135780,0.090790,0.162909
...,...,...,...
995,-0.674095,0.955143,0.999681
996,-0.714728,0.969691,1.006262
997,-0.720228,0.994804,1.059640
998,-0.757245,0.946264,1.002828


In [8]:
x2

Unnamed: 0_level_0,data2_col1,data2_col2,data2_col3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.049045,0.071127,0.020919
2,0.081164,0.132095,0.015630
4,0.262603,0.297847,0.046794
6,0.114814,0.316567,0.086089
8,0.172130,0.457244,0.067370
...,...,...,...
1490,-0.293417,-0.765439,0.938238
1492,-0.190411,-0.673193,1.021288
1494,-0.157759,-0.617560,0.980530
1496,-0.179489,-0.472574,0.951495


In [9]:
# Case 1. model = up
config = config1
data_alignment = mda.Alignment(config, x1, x2)
data_alignment.getResult()

Unnamed: 0,data1_col1,data1_col2,data1_col3,data2_col1,data2_col2,data2_col3
0,0.030990,0.027961,0.059248,0.049045,0.071127,0.020919
1,0.037388,0.057986,0.094965,0.126382,0.084156,0.002725
2,0.051436,0.086420,0.134013,0.081164,0.132095,0.015630
3,0.079113,0.068896,0.117764,0.172458,0.160821,0.309475
4,0.135780,0.090790,0.162909,0.262603,0.297847,0.046794
...,...,...,...,...,...,...
995,-0.674095,0.955143,0.999681,-0.711819,-0.445741,-0.135003
996,-0.714728,0.969691,1.006262,-0.752254,-0.385803,-0.871798
997,-0.720228,0.994804,1.059640,-0.678312,-0.469590,-0.139389
998,-0.757245,0.946264,1.002828,-0.608111,-0.306886,-0.895500


In [10]:
# Case 2. model = down
config = config2
data_alignment = mda.Alignment(config, x1, x2)
data_alignment.getResult()

Unnamed: 0_level_0,data1_col1,data1_col2,data1_col3,data2_col1,data2_col2,data2_col3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.030990,0.027961,0.059248,0.049045,0.071127,0.020919
2,0.051436,0.086420,0.134013,0.081164,0.132095,0.015630
4,0.135780,0.090790,0.162909,0.262603,0.297847,0.046794
6,0.152309,0.147922,0.280848,0.114814,0.316567,0.086089
8,0.204555,0.089192,0.290696,0.172130,0.457244,0.067370
...,...,...,...,...,...,...
990,-0.630880,0.925540,1.008081,-0.562762,-0.663760,-0.877121
992,-0.704630,1.015151,1.080496,-0.615416,-0.541146,-0.882403
994,-0.690072,0.956616,1.051644,-0.738065,-0.437069,-0.890971
996,-0.714728,0.969691,1.006262,-0.752254,-0.385803,-0.871798


In [11]:
# Case 3. model = RNN_AE
config = config3
data_alignment = mda.Alignment(config, x1, x2)
data_alignment.getResult()

  result = np.concatenate(np.array(result), 0)


Unnamed: 0_level_0,concat_emb1,concat_emb2,concat_emb3,concat_emb4,concat_emb5,concat_emb6,concat_emb7,concat_emb8,concat_emb9,concat_emb10,...,concat_emb23,concat_emb24,concat_emb25,concat_emb26,concat_emb27,concat_emb28,concat_emb29,concat_emb30,concat_emb31,concat_emb32
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.372583,0.477909,0.279004,0.157935,0.024772,0.529429,0.262537,0.095857,0.021283,-0.354591,...,-0.504315,-0.245826,-0.079322,0.581392,0.094970,-0.658338,0.377336,-0.272510,-0.555270,0.133426
11,0.382675,0.503570,0.283309,0.264315,-0.036538,0.602020,0.294696,0.206554,-0.020221,-0.384878,...,-0.557141,-0.271574,-0.133123,0.534835,0.149672,-0.662964,0.390029,-0.291008,-0.597048,0.047528
12,0.391243,0.528044,0.283358,0.337439,-0.085860,0.649695,0.321018,0.296375,-0.051884,-0.409783,...,-0.598337,-0.291094,-0.179961,0.499827,0.193797,-0.665526,0.400987,-0.306803,-0.628132,-0.026101
13,0.401149,0.544089,0.288363,0.398155,-0.121529,0.681237,0.341059,0.376472,-0.083667,-0.425412,...,-0.629384,-0.311430,-0.225476,0.435356,0.240372,-0.663436,0.408149,-0.316158,-0.655469,-0.121315
14,0.409130,0.557129,0.290825,0.443536,-0.150957,0.706096,0.359223,0.439598,-0.105379,-0.446722,...,-0.654838,-0.329394,-0.266780,0.367535,0.278769,-0.663076,0.413266,-0.329186,-0.675471,-0.198599
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,-0.333709,-0.288640,0.678434,0.616148,0.461931,0.795553,0.233779,0.767479,-0.216947,-0.946713,...,0.077133,-0.806155,-0.464669,-0.927393,-0.884849,-0.792021,-0.745815,-0.466470,0.660031,-0.779868
996,-0.337179,-0.289217,0.681455,0.615581,0.501197,0.793848,0.233623,0.774142,-0.215923,-0.946345,...,0.087213,-0.809386,-0.458990,-0.926618,-0.886511,-0.794504,-0.746075,-0.459784,0.665035,-0.779911
997,-0.336871,-0.286248,0.677242,0.615445,0.503007,0.792722,0.229041,0.763014,-0.210499,-0.945347,...,0.090842,-0.802887,-0.455763,-0.925955,-0.886055,-0.788798,-0.738823,-0.468972,0.665964,-0.772241
998,-0.337709,-0.285806,0.681670,0.614588,0.539819,0.790901,0.229464,0.770726,-0.209661,-0.945240,...,0.099777,-0.806958,-0.448183,-0.925086,-0.887273,-0.791932,-0.739734,-0.462278,0.670169,-0.771407
