In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from MySQLdb import connect
%matplotlib inline

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
conn = connect(
    host="140.112.174.26",
    db="ijv",
    user="md703",
    passwd=os.getenv("PASSWD")
)
df = pd.read_sql("SELECT * FROM ijv_ann", con=conn)

In [5]:
df.columns

Index(['id', 'idx', 'skin_mua', 'skin_mus', 'skin_g', 'skin_n', 'fat_mua',
       'fat_mus', 'fat_g', 'fat_n', 'muscle_mua', 'muscle_mus', 'muscle_g',
       'muscle_n', 'ijv_mua', 'ijv_mus', 'ijv_g', 'ijv_n', 'cca_mua',
       'cca_mus', 'cca_g', 'cca_n', 'skin_thickness', 'fat_thickness',
       'ijv_radius', 'ijv_depth', 'cca_radius', 'cca_depth',
       'ijv_cca_distance', 'reflectance_20', 'reflectance_24',
       'reflectance_28'],
      dtype='object')

In [17]:
df.iloc[1][2:22]

skin_mua        5.8498
skin_mus       4.05295
skin_g        0.733152
skin_n         1.39427
fat_mua       0.001289
fat_mus        10.3142
fat_g         0.906961
fat_n           1.4371
muscle_mua    0.243906
muscle_mus     9.98201
muscle_g       0.94193
muscle_n       1.43048
ijv_mua       0.651252
ijv_mus        12.4042
ijv_g         0.919755
ijv_n          1.42366
cca_mua       0.477174
cca_mus        17.5248
cca_g         0.938117
cca_n          1.43314
Name: 1, dtype: object

In [19]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.spec_layer = nn.Linear(20, 64)
        self.bn_spec = nn.BatchNorm1d(64)
        self.geo_layer = nn.Linear(7, 64)
        self.bn_geo = nn.BatchNorm1d(64)
        
        self.fc1 = nn.Linear(64, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 3)
    
    def forward(self, spec, geo):
        spec = self.spec_layer(spec)
        spec = self.bn_spec(spec)
        spec = torch.relu(spec)
        
        geo = self.geo_layer(geo)
        geo = self.bn_geo(geo)
        geo = torch.relu(geo)
        
        out = self.fc1(spec + geo)
        out = self.bn1(out)
        out = torch.relu(out)
        
        out = self.fc2(out)
        out = self.bn2(out)
        out = torch.relu(out)
        out = self.fc3(out)
#         out = torch.clamp(out, 0, 100)
#         out = torch.sigmoid(out)
        
        return out

In [32]:
df.iloc[1][-3:].astype(np.float).values

array([1.26006813e-08, 2.90812540e-09, 4.89341706e-10])

In [39]:
class SpecData(Dataset):
    def __init__(self, df):
        self.df = df
        

    def __getitem__(self, idx):
        # 取最後三個(20mm, 24mm, 28mm的reflectance)
        spec = self.df.iloc[idx][-3:].astype(np.float).values
        spec = torch.tensor(spec).float()
        
        geo = self.df.iloc[idx][-10:-3].astype(np.float).values
        geo = torch.tensor(geo).float()
        
        param = self.df.iloc[idx][2:22].astype(np.float).values
        param = torch.tensor(param).float()
        return spec, geo, param
    
    def __len__(self):
        return len(self.df)

In [43]:
model = Model()
model.to(device)
loss_func = nn.MSELoss(reduction="sum")
# loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(lr=1e-4, params=model.parameters(), betas=(0.9, 0.999))
# optimizer = optim.SGD(lr=1e-4, params=model.parameters())
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)

In [40]:
df = df.sample(frac=1)
train_set = SpecData(df[:1000000])
trainloader = DataLoader(train_set, batch_size=32,
                        shuffle=True, num_workers=2)
valid_set = SpecData(df[1000000:])
validloader = DataLoader(valid_set, batch_size=32,
                        shuffle=True, num_workers=2)

In [41]:
lr_list = []
train_loss_list = []
valid_loss_list = []

In [None]:
for epoch in range(1000):
    print("epoch: ", epoch, end="\r")
    loss_ = []
    for i, (spec, geo, param) in enumerate(trainloader):
        spec, geo, param = spec.to(device), geo.to(device), param.to(device)
        optimizer.zero_grad()
        predict = model(param, geo)
        loss = loss_func(predict, spec)
        loss.backward()
        optimizer.step()
        loss_ += [float(loss.data)]
    train_loss_list += [np.mean(loss_)]
    lr_list += [optimizer.param_groups[0]["lr"]]
    
    model.eval()
    for i, (spec, geo, param) in enumerate(validloader):
        spec, geo, param = spec.to(device), geo.to(device), param.to(device)
        predict = model(param, geo)
        loss = loss_func(predict, spec)

        loss_ += [float(loss.data)]
    model.train()
    valid_loss_list += [np.mean(loss_)]
    lr_list += [optimizer.param_groups[0]["lr"]]
    
    
    if epoch % 10 == 0:
        print("train loss: ", train_loss_list[-1])
        print("test loss: ", valid_loss_list[-1])
#         print(predict)
#         print(param)
    scheduler.step(loss)

epoch:  0