In [46]:
# 라이브러리
from sklearn.datasets import load_diabetes
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import torch.nn.functional as F

In [8]:
diabetes = load_diabetes()
diabetes.keys()

dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename', 'data_module'])

In [9]:
print(diabetes.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

:Number of Instances: 442

:Number of Attributes: First 10 columns are numeric predictive values

:Target: Column 11 is a quantitative measure of disease progression one year after baseline

:Attribute Information:
    - age     age in years
    - sex
    - bmi     body mass index
    - bp      average blood pressure
    - s1      tc, total serum cholesterol
    - s2      ldl, low-density lipoproteins
    - s3      hdl, high-density lipoproteins
    - s4      tch, total cholesterol / HDL
    - s5      ltg, possibly log of serum triglycerides level
    - s6      glu, blood sugar level

Note: Each of these 10 feature variables have bee

In [12]:
df = pd.DataFrame(diabetes.data, columns = diabetes.feature_names)
df['target'] = diabetes.target

display(df.head())
print(df.info())


Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 442 entries, 0 to 441
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     442 non-null    float64
 1   sex     442 non-null    float64
 2   bmi     442 non-null    float64
 3   bp      442 non-null    float64
 4   s1      442 non-null    float64
 5   s2      442 non-null    float64
 6   s3      442 non-null    float64
 7   s4      442 non-null    float64
 8   s5      442 non-null    float64
 9   s6      442 non-null    float64
 10  target  442 non-null    float64
dtypes: float64(11)
memory usage: 38.1 KB
None


In [28]:
# 이미 정규화 되어있으므로 데이터 분리
X = torch.from_numpy(diabetes.data).float()
y = torch.from_numpy(diabetes.target).float()
y = y.reshape(-1, 1)

In [29]:
# 훈련/학습 데이터 분리
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = 0.2)

In [30]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([353, 10]),
 torch.Size([89, 10]),
 torch.Size([353, 1]),
 torch.Size([89, 1]))

# 기본 선형계층

In [62]:
# 훈련 전 모델 생성 클래스 만들기

class MakeLinear(nn.Module): # 이거
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim

        super().__init__()
        self.linear = nn.Linear(self.input_dim, output_dim)

    def forward(self, x):
        y = self.linear(x)
        return y

# 모델 생성
model_linear = MakeLinear(X.size(-1), y.size(-1))
print(model_linear)



# 손실함수, 옵티마이저 정의
criterion = nn.MSELoss()

learn_rate = 0.001
optimizer = optim.Adam(model_linear.parameters(), lr = learn_rate)

MakeLinear(
  (linear): Linear(in_features=10, out_features=1, bias=True)
)


In [64]:
epochs = 10000
print_val = 1000

for i in tqdm(range(epochs)):
    model_linear.train()

    optimizer.zero_grad()                  # 그래디언트 초기화

    pred_y = model_linear.forward(X_train)    # 순전파
    current_loss = criterion(pred_y, y_train)

    current_loss.backward()                          # 역전파
    optimizer.step()                          # 가중치 업데이트

    if (i+1) % print_val == 0:
        print(f"epochs: {i+1}, loss: {loss:.4f}")

 14%|█▍        | 1383/10000 [00:00<00:04, 1941.26it/s]

epochs: 1000, loss: 29627.3633


 23%|██▎       | 2274/10000 [00:01<00:03, 2187.86it/s]

epochs: 2000, loss: 29627.3633


 34%|███▍      | 3446/10000 [00:01<00:02, 2338.55it/s]

epochs: 3000, loss: 29627.3633


 44%|████▎     | 4361/10000 [00:02<00:02, 2184.33it/s]

epochs: 4000, loss: 29627.3633


 52%|█████▏    | 5167/10000 [00:02<00:02, 1813.50it/s]

epochs: 5000, loss: 29627.3633


 62%|██████▏   | 6234/10000 [00:03<00:02, 1615.75it/s]

epochs: 6000, loss: 29627.3633


 74%|███████▍  | 7380/10000 [00:04<00:01, 1991.19it/s]

epochs: 7000, loss: 29627.3633


 82%|████████▏ | 8220/10000 [00:04<00:01, 1771.04it/s]

epochs: 8000, loss: 29627.3633


 94%|█████████▍| 9391/10000 [00:04<00:00, 2250.26it/s]

epochs: 9000, loss: 29627.3633


100%|██████████| 10000/10000 [00:05<00:00, 1892.89it/s]

epochs: 10000, loss: 29627.3633





# 심층신경망

In [79]:
class MakeDeep(nn.Module):
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim

        super().__init__()
        # 학습 층
        self.linear1 = nn.Linear(self.input_dim, 256)
        self.linear2 = nn.Linear(256, 128)
        self.linear3 = nn.Linear(128, 64)
        self.linear4 = nn.Linear(64, output_dim)

        # 활성화함수
        self.act = nn.ReLU()

    def forward(self, x):
        h = self.act(self.linear1(x))
        h = self.act(self.linear2(h))
        h = self.act(self.linear3(h))
        y = self.linear4(h)

        return y

# 모델 생성
model_deep = MakeDeep(X.size(-1), y.size(-1))
print(model_deep)

# 손실함수, 옵티마이저 정의
loss = nn.MSELoss()
optimizer = optim.Adam(model_deep.parameters(), lr=0.001)

epochs = 20000
print_interval = 2000

# 학습
for i in tqdm(range(epochs)):
    optimizer.zero_grad()

    pred_y = model_deep.forward(X_train)
    current_loss = loss(pred_y, y_train)

    current_loss.backward()
    optimizer.step()

    if (i+1) % print_interval == 0:
        print(f"epochs: {i+1}, loss: {current_loss:.4f}")

MakeDeep(
  (linear1): Linear(in_features=10, out_features=256, bias=True)
  (linear2): Linear(in_features=256, out_features=128, bias=True)
  (linear3): Linear(in_features=128, out_features=64, bias=True)
  (linear4): Linear(in_features=64, out_features=1, bias=True)
  (act): ReLU()
)


 10%|█         | 2053/20000 [00:05<00:50, 353.04it/s]

epochs: 2000, loss: 2169.0312


 20%|██        | 4078/20000 [00:12<00:44, 357.37it/s]

epochs: 4000, loss: 582.8406


 30%|███       | 6035/20000 [00:18<00:46, 301.98it/s]

epochs: 6000, loss: 301.5608


 40%|████      | 8046/20000 [00:25<00:33, 356.87it/s]

epochs: 8000, loss: 164.4917


 50%|█████     | 10038/20000 [00:31<00:39, 251.03it/s]

epochs: 10000, loss: 102.4297


 60%|██████    | 12052/20000 [00:38<00:24, 321.03it/s]

epochs: 12000, loss: 70.6921


 70%|███████   | 14051/20000 [00:45<00:19, 302.70it/s]

epochs: 14000, loss: 38.4367


 80%|████████  | 16077/20000 [00:52<00:10, 363.28it/s]

epochs: 16000, loss: 34.3748


 90%|█████████ | 18068/20000 [00:58<00:05, 365.73it/s]

epochs: 18000, loss: 21.5273


100%|██████████| 20000/20000 [01:04<00:00, 311.71it/s]

epochs: 20000, loss: 15.9972





In [80]:
y_pred = model_deep.forward(X_test).detach()

In [81]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

print("MSE")
print(mean_squared_error(y_test, y_pred))
print("MSE")
print(mean_absolute_error(y_test, y_pred))

MSE
12609.340200479053
MSE
89.31674665815375
