In [1]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def load_data(file_path: str) -> tuple:
    """데이터를 불러오는 함수
    Args:
        file_path (str): 데이터 파일의 경로
    Returns:
        X_train, y_train, X_test, y_test
    """
    data = pd.read_csv(file_path)
    X = data.drop('G(mg/dL)', axis=1).values
    y = data['G(mg/dL)'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return torch.tensor(X_train, dtype=torch.float32), \
        torch.tensor(y_train, dtype=torch.float32), \
        torch.tensor(X_test, dtype=torch.float32), \
        torch.tensor(y_test, dtype=torch.float32)

In [2]:
# 데이터 불러오기
data_L1 = pd.read_csv('data/data_L1.csv')
data_L2 = pd.read_csv('data/data_L2.csv')
data_L3 = pd.read_csv('data/data_L3.csv')

mix_target = ['LD Temp', 'mPD1_dS', 'mPD2_dS', 'T-rPD_L1dS', 'T-rPD_L2dS', 'R-rPD_L1dS', 'R-rPD_L2dS']

In [3]:
def mix_data(data_frame1, data_frame2, target_label1, target_label2, front_shuffle=False, back_shuffle=False):
    class_list = [0, 50, 100, 150, 200, 250, 300, 350, 400]
    mixed_data = pd.DataFrame(columns=mix_target)
    
    for class_number in class_list:
        target_data1 = data_frame1[(data_frame1['G(mg/dL)'] == class_number)]
        target_data2 = data_frame2[(data_frame2['G(mg/dL)'] == class_number)]
        
        if len(target_label1) == 5:
            label_sampled1 = target_data1[['G(mg/dL)', 'LD Temp', 'mPD_dS', 'T-rPD_dS', 'R-rPD_dS']]
            label_sampled2 = target_data2[['mPD_dS', 'T-rPD_dS', 'R-rPD_dS']] 
        else:
            label_sampled2 = target_data2[['G(mg/dL)', 'LD Temp', 'mPD_dS', 'T-rPD_dS', 'R-rPD_dS']]
            label_sampled1 = target_data1[['mPD_dS', 'T-rPD_dS', 'R-rPD_dS']]

        target_list = target_label1 + target_label2
        env_data = pd.DataFrame(columns=target_list)
        
        # label_sampled1 = target_data1[['G(mg/dL)', 'LD Temp', 'mPD_dS', 'T-rPD_dS', 'R-rPD_dS']]
        # env_data[target_label1]
        if front_shuffle:
            label_sampled1 = label_sampled1.sample(frac=1).reset_index(drop=True)
        else:
            label_sampled1 = label_sampled1.reset_index(drop=True)
        
        label_sampled1.columns = target_label1
        env_data[target_label1] = label_sampled1[target_label1]
        
        # data2 추출
        # label_sampled2 = target_data2[['mPD_dS', 'T-rPD_dS', 'R-rPD_dS']] 
        if back_shuffle:
            label_sampled2 = label_sampled2.sample(frac=1).reset_index(drop=True)
        else:
            label_sampled2 = label_sampled2.reset_index(drop=True)
        
        label_sampled2.columns = target_label2
        env_data[target_label2] = label_sampled2[target_label2]
        
        if len(mixed_data) > 0:
            mixed_data = mixed_data.append(env_data)
        else:
            mixed_data = env_data
        
    mixed_data = mixed_data.reset_index(drop=True)
    return mixed_data

In [4]:
target_label1 = ['G(mg/dL)', 'LD Temp', 'mPD1_dS', 'T-rPD1_dS', 'R-rPD1_dS']
target_label2 = ['mPD2_dS', 'T-rPD2_dS', 'R-rPD2_dS']

mix_data12 = mix_data(data_L1, data_L2, target_label1, target_label2,True, True)
mix_data12

Unnamed: 0,G(mg/dL),LD Temp,mPD1_dS,T-rPD1_dS,R-rPD1_dS,mPD2_dS,T-rPD2_dS,R-rPD2_dS
0,0,25.302,550.597957,1789.730591,445.258270,559.514954,603.758173,995.485883
1,0,25.302,557.884143,1796.382576,446.655437,558.198837,592.240795,902.065750
2,0,25.249,556.358253,1788.033051,435.082413,558.778981,610.133547,927.157242
3,0,25.302,551.864782,1783.755882,445.805059,558.017635,596.032351,978.321211
4,0,25.302,549.511715,1787.314053,437.499993,561.589215,593.609307,931.425026
...,...,...,...,...,...,...,...,...
675,400,25.249,553.013974,1797.889549,473.740177,556.835075,611.128639,1038.452705
676,400,25.302,553.163393,1798.914762,456.836029,558.031933,596.971038,1005.722221
677,400,25.249,554.021705,1801.189293,457.784950,556.548973,611.228775,1038.648246
678,400,25.302,554.659744,1797.648166,462.687896,558.724997,593.963747,949.096881


In [5]:
mix_data12.to_csv('data/MixData_L12.csv', index=False, encoding='utf-8')

In [6]:
target_label1 = ['G(mg/dL)', 'LD Temp', 'mPD2_dS', 'T-rPD2_dS', 'R-rPD2_dS']
target_label2 = ['mPD1_dS', 'T-rPD1_dS', 'R-rPD1_dS']

mix_data21 = mix_data(data_L2, data_L1, target_label2, target_label1,True, True)
mix_data21

Unnamed: 0,mPD1_dS,T-rPD1_dS,R-rPD1_dS,G(mg/dL),LD Temp,mPD2_dS,T-rPD2_dS,R-rPD2_dS
0,558.195663,592.272584,902.142034,0,25.302,550.648820,1789.927816,445.404500
1,558.484959,594.801399,957.851776,0,25.302,549.572750,1787.718399,437.519066
2,558.270355,594.486687,883.754932,0,25.302,551.775772,1783.774925,445.773269
3,558.222671,594.286415,884.622761,0,25.249,551.747162,1782.935784,450.594154
4,558.198837,592.240795,902.065750,0,25.302,546.176047,1779.613635,460.123107
...,...,...,...,...,...,...,...,...
675,558.432526,595.049389,984.497826,400,25.249,554.040779,1801.360924,457.575142
676,562.209087,605.310127,923.307207,400,25.249,548.504650,1794.041420,447.394501
677,562.183656,605.363533,923.007133,400,25.302,549.496439,1776.507751,452.334667
678,557.745857,595.964913,970.464234,400,25.302,545.672196,1787.050690,452.487198


In [7]:
mix_data21.to_csv('data/MixData_L21.csv', index=False, encoding='utf-8')