In [1]:
import test_densenet
from base_densenet import DenseNet
from test_densenet import user_setting
from test_densenet import address_book
from test_densenet import property

import json
import os
from collections import OrderedDict

import torch
import torch.nn as nn

### 혈당 예측 용으로 활용할 모델

상단에 1d 형식의 데이터를 3차원으로 바꿔줄 1d convolution layer가 배치되고 그 이후엔 DenseNet121모델을 통과하도록 되어있다.
y값의 해당하는 Class의 갯수는 글루코스 데이터의 값에 따라 0, 150, 200, 250, 300, 350, 400, 450, 500 총 9개의 값이 각각 0 ~ 8의 슷지로 대응되도록 하였다.

In [2]:
class CgmDenseNet(nn.Module):
    def __init__(self, classes=9, device="cuda"):
        super(CgmDenseNet, self).__init__()
        self.device = torch.device(device)
        self.var_layer = nn.Conv1d(in_channels=1, out_channels=3, kernel_size=2, padding=1)
        self.var_layer.to(self.device)
        self.up = nn.Upsample(scale_factor=16, mode='nearest')
        
        self.densenet = DenseNet(num_classes=classes) # 0, 150, 200, 250, 300, 350, 400, 450, 500
        self.densenet.to(self.device)
    
    def forward(self, x):
        x = self.var_layer(x)
        x = x.view(1,3, 4, 4)
        
        x = self.up(x)
        # print("upsacled shape", x.shape)
        x = self.densenet(x)
        return x

### 모델 훈련 코드

손실(비용) 함수 : Cross Entropy Loss
옵티마이저 : Adam(모델 전체 가중치 학습 적용)
적응 학습률 : 스케줄러로 1epoch마다 10%씩 학습률 감소 조정

입력 텐서 형식 : (1, 16) : 결과 값인 Glucose값을 제외한 선별한 16개의 특성을 MinMax Scaling한 값의 텐서
-> (1, 4, 4) : view함수를 활용해 (4,4) 형식으로 변환
-> (1, 64, 64) : Upsample을 이용해(nearest)

In [3]:
import torch.optim as optim

class ModelTrainer():
    def __init__(self, classes=9, device="cuda", target_model=CgmDenseNet):
        self.device = torch.device(device)
        self.model = target_model
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0005)
        
        self.scheduler = optim.lr_scheduler.LambdaLR(optimizer=self.optimizer,
                        lr_lambda=lambda epoch: 0.9 ** epoch,
                        last_epoch=-1,
                        verbose=False)
    
    def train(self, epoch, x_data):
        for ep in range(0, epoch):
            loss_sum = 0
            for n in range(0, len(x_data)): # len(x_data)
                x_tensor = torch.Tensor([[x_data[0][n], x_data[1][n], x_data[2][n],
                                         x_data[3][n], x_data[4][n], x_data[5][n],
                                          x_data[6][n], x_data[7][n], x_data[8][n],
                                          x_data[9][n], x_data[10][n], x_data[11][n],
                                          x_data[12][n], x_data[14][n], x_data[15][n]
                                         ]]).to(self.device)
                y_tensor = torch.Tensor([x_data[16][n]]).to(self.device)
                y_tensor = y_tensor.type(torch.LongTensor)
                
                result = self.model(x_tensor)
                y_tensor = y_tensor.to(self.device)
                
                cost = self.criterion(result, y_tensor)
                # print("loss : ",cost)

                self.optimizer.zero_grad()
                cost.backward()
                self.optimizer.step()
                
                loss_sum += cost
                
                if n != 0 and n % 500 == 0:
                    print(n, " complete in epoch ", ep, " loss :", loss_sum/n)
            print(ep, ' epoch loss : ', loss_sum/len(x_data))
            self.scheduler.step()

In [4]:
dense_net = CgmDenseNet() 

### DataFrame 생성

Glucose set 데이터 중 표준편차 값이 높은 16개의 환경변수와 y값에 해당하는 Glucose값을 pandas framework로 데이터 추출.


In [5]:
import pandas as pd

origin_csv = pd.read_csv('Glucose_Set-2A-1.csv')
# df = df.sample(frac = 1)
usable_data = pd.DataFrame()
usable_data = pd.concat([origin_csv['mPD1_AL-D'], origin_csv['mPD2_AL-D'], origin_csv['mPD3_AL-D'], 
                         origin_csv['T-rPD_AL-D'], origin_csv['R-rPD_AL-D'],
                         origin_csv['LD_C_Av'], origin_csv['T-rPD'], origin_csv['Th3_C'], origin_csv['Th4_C'], origin_csv['Th6_C'],
                         origin_csv['Th3_AvD'], origin_csv['Th4_AvD'], origin_csv['Th6_AvD'],
                         origin_csv['mPD1'], origin_csv['mPD2'], origin_csv['mPD3'], origin_csv['Glucose']], axis = 1, sort = False)

usable_data = usable_data.sample(frac = 1)

In [6]:
usable_data.head()

Unnamed: 0,mPD1_AL-D,mPD2_AL-D,mPD3_AL-D,T-rPD_AL-D,R-rPD_AL-D,LD_C_Av,T-rPD,Th3_C,Th4_C,Th6_C,Th3_AvD,Th4_AvD,Th6_AvD,mPD1,mPD2,mPD3,Glucose
4365,0.072,0.057,309.48,46.810862,0.061036,16.3,66.605629,24.326,24.921,23.781,24.329,24.923,23.784,4.119936,4.234378,314.679169,500
2002,0.008,138.855,0.02,70.61621,0.102998,13.4,90.562294,24.265,24.83,23.646,24.255,24.827,23.645,4.043641,143.129623,5.035477,400
1765,0.011,138.132,0.041,70.22202,0.081381,13.4,90.218971,24.167,24.746,23.542,24.164,24.743,23.549,4.005493,142.328522,5.111773,250
840,262.722,-0.072,0.046,301.434359,0.278477,7.0,321.202423,24.025,24.652,22.867,24.025,24.652,22.867,266.765869,4.043641,5.073625,150
3643,0.053,0.031,164.426,17.248415,0.017166,13.2,37.079422,24.356,24.938,23.767,24.353,24.937,23.766,4.119936,4.158083,169.565887,500


In [7]:
usable_data.head()

Unnamed: 0,mPD1_AL-D,mPD2_AL-D,mPD3_AL-D,T-rPD_AL-D,R-rPD_AL-D,LD_C_Av,T-rPD,Th3_C,Th4_C,Th6_C,Th3_AvD,Th4_AvD,Th6_AvD,mPD1,mPD2,mPD3,Glucose
4365,0.072,0.057,309.48,46.810862,0.061036,16.3,66.605629,24.326,24.921,23.781,24.329,24.923,23.784,4.119936,4.234378,314.679169,500
2002,0.008,138.855,0.02,70.61621,0.102998,13.4,90.562294,24.265,24.83,23.646,24.255,24.827,23.645,4.043641,143.129623,5.035477,400
1765,0.011,138.132,0.041,70.22202,0.081381,13.4,90.218971,24.167,24.746,23.542,24.164,24.743,23.549,4.005493,142.328522,5.111773,250
840,262.722,-0.072,0.046,301.434359,0.278477,7.0,321.202423,24.025,24.652,22.867,24.025,24.652,22.867,266.765869,4.043641,5.073625,150
3643,0.053,0.031,164.426,17.248415,0.017166,13.2,37.079422,24.356,24.938,23.767,24.353,24.937,23.766,4.119936,4.158083,169.565887,500


### 정규화 전처리

MinMaxScaling으로 임시적으로 각 변수들에 대한 Scaling 전처리를 해준다.

In [8]:
from sklearn.preprocessing import MinMaxScaler

mMscaler = MinMaxScaler()
mMscaler.fit(usable_data)

mMscaled_data = mMscaler.transform(usable_data)
x_data = pd.DataFrame(mMscaled_data)

x_data[0:10]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0.000663,0.000352,0.998069,0.104676,0.245283,1.0,0.104623,0.949669,0.951111,0.868744,0.960758,0.961625,0.874166,0.000578,0.000593,0.998524,1.0
1,0.00042,0.359756,0.000335,0.1881,0.349056,0.718447,0.188536,0.868874,0.748889,0.741265,0.860622,0.744921,0.741659,0.000289,0.360162,0.000369,0.8
2,0.000432,0.357884,0.000403,0.186718,0.295597,0.718447,0.187333,0.739073,0.562222,0.643059,0.737483,0.555305,0.650143,0.000144,0.358088,0.000615,0.5
3,0.995003,1.8e-05,0.000419,0.996985,0.783021,0.097087,0.996392,0.550993,0.353333,0.005666,0.549391,0.349887,0.0,0.994658,9.9e-05,0.000492,0.3
4,0.000591,0.000285,0.530399,0.001076,0.136791,0.699029,0.001203,0.989404,0.988889,0.855524,0.993234,0.993228,0.857007,0.000578,0.000395,0.530743,1.0
5,0.000352,0.360966,0.000403,0.18855,0.274934,0.718447,0.189337,0.980132,0.944444,0.884797,0.979702,0.943567,0.887512,0.000289,0.361347,0.000492,1.0
6,0.998789,0.000215,0.00021,0.998817,0.783021,0.097087,0.998664,0.512583,0.368889,0.780925,0.516915,0.365688,0.780744,0.998845,0.000296,0.000246,0.7
7,0.000375,0.000194,0.998633,0.104261,0.235848,1.0,0.104222,0.695364,0.562222,0.602455,0.684709,0.555305,0.596759,0.000289,0.000395,0.998401,0.5
8,0.000553,0.000303,0.530824,0.000722,0.167115,0.699029,0.000534,0.817219,0.78,0.925401,0.810555,0.778781,0.926597,0.000578,0.000395,0.530497,1.0
9,0.00042,0.357962,0.000429,0.186403,0.266845,0.718447,0.186531,0.818543,0.657778,0.542021,0.809202,0.647856,0.534795,0.000144,0.358779,0.000615,0.4


### Class Mapping

0부터 500까지 50, 100을 제외한 500이하 50의 배수들을 각각의 0~8까지의 클래스를 뜻하는 값으로 매핑해준다.

In [10]:
x_data = x_data.replace(('Glucose',1.0), 8)
x_data = x_data.replace(('Glucose',0.3), 1)
x_data = x_data.replace(('Glucose',0.4), 2)
x_data = x_data.replace(('Glucose',0.5), 3)
x_data = x_data.replace(('Glucose',0.6), 4)
x_data = x_data.replace(('Glucose',0.7000000000000001), 5)
x_data = x_data.replace(('Glucose',0.8), 6)
x_data = x_data.replace(('Glucose',0.9), 7)
# x_data = x_data.replace(('Glucose',0.7), 5)

x_data[0:10]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0.000663,0.000352,0.998069,0.104676,0.245283,8.0,0.104623,0.949669,0.951111,0.868744,0.960758,0.961625,0.874166,0.000578,0.000593,0.998524,8.0
1,0.00042,0.359756,0.000335,0.1881,0.349056,0.718447,0.188536,0.868874,0.748889,0.741265,0.860622,0.744921,0.741659,0.000289,0.360162,0.000369,6.0
2,0.000432,0.357884,0.000403,0.186718,0.295597,0.718447,0.187333,0.739073,0.562222,0.643059,0.737483,0.555305,0.650143,0.000144,0.358088,0.000615,3.0
3,0.995003,1.8e-05,0.000419,0.996985,0.783021,0.097087,0.996392,0.550993,0.353333,0.005666,0.549391,0.349887,0.0,0.994658,9.9e-05,0.000492,1.0
4,0.000591,0.000285,0.530399,0.001076,0.136791,0.699029,0.001203,0.989404,0.988889,0.855524,0.993234,0.993228,0.857007,0.000578,0.000395,0.530743,8.0
5,0.000352,0.360966,0.000403,0.18855,0.274934,0.718447,0.189337,0.980132,0.944444,0.884797,0.979702,0.943567,0.887512,0.000289,0.361347,0.000492,8.0
6,0.998789,0.000215,0.00021,0.998817,0.783021,0.097087,0.998664,0.512583,0.368889,0.780925,0.516915,0.365688,0.780744,0.998845,0.000296,0.000246,5.0
7,0.000375,0.000194,0.998633,0.104261,0.235848,8.0,0.104222,0.695364,0.562222,0.602455,0.684709,0.555305,0.596759,0.000289,0.000395,0.998401,3.0
8,0.000553,0.000303,0.530824,0.000722,0.167115,0.699029,0.000534,0.817219,0.78,0.925401,0.810555,0.778781,0.926597,0.000578,0.000395,0.530497,8.0
9,0.00042,0.357962,0.000429,0.186403,0.266845,0.718447,0.186531,0.818543,0.657778,0.542021,0.809202,0.647856,0.534795,0.000144,0.358779,0.000615,2.0


### Trainer 동작

In [11]:
device = torch.device('cuda')
wgt_file = torch.load("result_test.pth", map_location="cuda")
dense_net.load_state_dict(wgt_file)
dense_net.train()

trainer = ModelTrainer(device=device, target_model=dense_net)
trainer.train(4, x_data)
torch.save(dense_net.state_dict(), "test2.pth")

500  complete in epoch  0  loss : tensor(0.5411, device='cuda:0', grad_fn=<DivBackward0>)
1000  complete in epoch  0  loss : tensor(0.5243, device='cuda:0', grad_fn=<DivBackward0>)
1500  complete in epoch  0  loss : tensor(0.5250, device='cuda:0', grad_fn=<DivBackward0>)
2000  complete in epoch  0  loss : tensor(0.5228, device='cuda:0', grad_fn=<DivBackward0>)
2500  complete in epoch  0  loss : tensor(0.5217, device='cuda:0', grad_fn=<DivBackward0>)
3000  complete in epoch  0  loss : tensor(0.5186, device='cuda:0', grad_fn=<DivBackward0>)
3500  complete in epoch  0  loss : tensor(0.5150, device='cuda:0', grad_fn=<DivBackward0>)
4000  complete in epoch  0  loss : tensor(0.5164, device='cuda:0', grad_fn=<DivBackward0>)
0  epoch loss :  tensor(0.5105, device='cuda:0', grad_fn=<DivBackward0>)
500  complete in epoch  1  loss : tensor(0.4313, device='cuda:0', grad_fn=<DivBackward0>)
1000  complete in epoch  1  loss : tensor(0.4091, device='cuda:0', grad_fn=<DivBackward0>)
1500  complete in e