In [1]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def load_data(file_path: str) -> tuple:
    """데이터를 불러오는 함수
    Args:
        file_path (str): 데이터 파일의 경로
    Returns:
        X_train, y_train, X_test, y_test
    """
    data = pd.read_csv(file_path)
    X = data.drop('G(mg/dL)', axis=1).values
    y = data['G(mg/dL)'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return torch.tensor(X_train, dtype=torch.float32), \
        torch.tensor(y_train, dtype=torch.float32), \
        torch.tensor(X_test, dtype=torch.float32), \
        torch.tensor(y_test, dtype=torch.float32)

In [2]:
def mix_data(data_frame1, data_frame2, mix_target, target_label1, target_label2, front_shuffle=False, back_shuffle=False):
    class_list = [0, 50, 100, 150, 200, 250, 300, 350, 400]
    mixed_data = pd.DataFrame(columns=mix_target)
    
    for class_number in class_list:
        target_data1 = data_frame1[(data_frame1['G(mg/dL)'] == class_number)]
        target_data2 = data_frame2[(data_frame2['G(mg/dL)'] == class_number)]
        
        if len(target_label1) == 5:
            label_sampled1 = target_data1[['G(mg/dL)', 'LD Temp', 'mPD_dS', 'T-rPD_dS', 'R-rPD_dS']]
            label_sampled2 = target_data2[['mPD_dS', 'T-rPD_dS', 'R-rPD_dS']] 
        else:
            label_sampled2 = target_data2[['G(mg/dL)', 'LD Temp', 'mPD_dS', 'T-rPD_dS', 'R-rPD_dS']]
            label_sampled1 = target_data1[['mPD_dS', 'T-rPD_dS', 'R-rPD_dS']]

        target_list = target_label1 + target_label2
        env_data = pd.DataFrame(columns=target_list)
        
        # label_sampled1 = target_data1[['G(mg/dL)', 'LD Temp', 'mPD_dS', 'T-rPD_dS', 'R-rPD_dS']]
        # env_data[target_label1]
        if front_shuffle:
            label_sampled1 = label_sampled1.sample(frac=1).reset_index(drop=True)
        else:
            label_sampled1 = label_sampled1.reset_index(drop=True)
        
        label_sampled1.columns = target_label1
        env_data[target_label1] = label_sampled1[target_label1]
        
        # data2 추출
        # label_sampled2 = target_data2[['mPD_dS', 'T-rPD_dS', 'R-rPD_dS']] 
        if back_shuffle:
            label_sampled2 = label_sampled2.sample(frac=1).reset_index(drop=True)
        else:
            label_sampled2 = label_sampled2.reset_index(drop=True)
        
        label_sampled2.columns = target_label2
        env_data[target_label2] = label_sampled2[target_label2]
        
        if len(mixed_data) > 0:
            mixed_data = mixed_data.append(env_data)
        else:
            mixed_data = env_data
        
    mixed_data = mixed_data.reset_index(drop=True)
    return mixed_data

In [3]:
# 데이터 불러오기
data_L1 = pd.read_csv('data/data_L1.csv')
data_L2 = pd.read_csv('data/data_L2.csv')
data_L3 = pd.read_csv('data/data_L3.csv')

mix_target23 = ['LD Temp', 'mPD2_dS', 'mPD3_dS', 'T-rPD_L2dS', 'T-rPD_L3dS', 'R-rPD_L2dS', 'R-rPD_L3dS']
mix_target13 = ['LD Temp', 'mPD1_dS', 'mPD3_dS', 'T-rPD_L1dS', 'T-rPD_L3dS', 'R-rPD_L1dS', 'R-rPD_L3dS']

In [4]:
target_label1 = ['G(mg/dL)', 'LD Temp', 'mPD2_dS', 'T-rPD2_dS', 'R-rPD2_dS']
target_label2 = ['mPD3_dS', 'T-rPD3_dS', 'R-rPD3_dS']

mix_data23 = mix_data(data_L2, data_L3, mix_target23, target_label1, target_label2,True, True)
mix_data23

Unnamed: 0,G(mg/dL),LD Temp,mPD2_dS,T-rPD2_dS,R-rPD2_dS,mPD3_dS,T-rPD3_dS,R-rPD3_dS
0,0,25.249,558.494499,592.269406,902.256485,325.188824,416.380585,630.278664
1,0,25.302,558.446800,594.162437,885.624119,327.115265,416.409180,576.557366
2,0,25.249,561.589215,593.609307,931.425026,326.687062,407.824066,632.207928
3,0,25.249,559.596016,603.685058,996.604897,322.592067,419.557682,603.795369
4,0,25.249,561.416600,593.709443,931.527058,323.448327,421.842020,598.689293
...,...,...,...,...,...,...,...,...
675,400,25.249,558.699545,595.911779,969.971035,324.993340,419.184418,575.455871
676,400,25.249,555.991066,596.107964,971.208130,323.872741,403.171982,627.989753
677,400,25.249,558.046242,600.957489,1033.037417,322.810727,408.067147,596.460853
678,400,25.302,558.895046,602.078072,978.556267,325.031487,419.203492,575.513061


In [5]:
mix_data23.to_csv('data/MixData_L23.csv', index=False, encoding='utf-8')

In [6]:
target_label1 = ['G(mg/dL)', 'LD Temp', 'mPD1_dS', 'T-rPD1_dS', 'R-rPD1_dS']
target_label2 = ['mPD3_dS', 'T-rPD3_dS', 'R-rPD3_dS']

mix_data13 = mix_data(data_L1, data_L3, mix_target13, target_label1, target_label2,True, True)
mix_data13

Unnamed: 0,G(mg/dL),LD Temp,mPD1_dS,T-rPD1_dS,R-rPD1_dS,mPD3_dS,T-rPD3_dS,R-rPD3_dS
0,0,25.302,558.051990,1795.650203,446.373150,325.192639,416.432083,630.229078
1,0,25.302,551.785309,1782.725823,450.613228,322.926984,409.455167,603.254275
2,0,25.249,551.546890,1783.450584,445.563461,327.220168,425.626579,576.280800
3,0,25.302,546.176047,1779.613635,460.123107,322.831616,409.404304,602.949099
4,0,25.302,546.144258,1779.893379,460.002308,324.209964,418.456883,580.663667
...,...,...,...,...,...,...,...,...
675,400,25.249,554.675003,1797.751144,462.558197,322.799828,408.054885,596.303489
676,400,25.302,554.076201,1801.320048,457.381682,322.211410,420.411827,602.757097
677,400,25.302,553.236968,1794.125129,453.979771,326.228347,424.262764,602.573982
678,400,25.249,554.675003,1798.189865,462.405609,325.637054,414.945256,616.183189


In [7]:
mix_data13.to_csv('data/MixData_L13.csv', index=False, encoding='utf-8')