## 데이터 수집


In [1]:
from sklearn.datasets import load_diabetes
data_diabetes = load_diabetes()
features, label = data_diabetes.data, data_diabetes.target

In [2]:
features.shape, label.shape

((442, 10), (442,))


## 데이터 전처리
- 데이터 분석

## 데이터 분할
- train, test, validation

In [3]:
from sklearn.model_selection import train_test_split
train_features, test_features, train_label, test_label = train_test_split(features, label, test_size=0.2, random_state=42)

In [4]:
train_features.shape, test_features.shape, train_label.shape, test_label.shape

((353, 10), (89, 10), (353,), (89,))

In [5]:
type(train_features), type(test_features), type(train_label), type(test_label)

(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [6]:
import torch

In [7]:
train_features_tensor = torch.tensor(train_features, dtype=torch.float32)
train_label_tensor = torch.tensor(train_label, dtype=torch.float32).view(-1,1) # 행만 있는 경우 행열로 변환 해줌
# test_features_tensor = torch.tensor(test_features)
# test_label_tensor = torch.tensor(test_label)
type(train_features_tensor), type(train_label_tensor)

(torch.Tensor, torch.Tensor)

In [8]:
train_features_tensor.shape, train_label_tensor.shape

(torch.Size([353, 10]), torch.Size([353, 1]))

## 모델 학습



In [9]:
# simple model linear regression
# model, loss function, optimizer function
class LinearRegression(torch.nn.Module) :
    def __init__(self, input_dim, output_dim): # input : feature 의 열 갯수, output : label의 카테고리 갯수 (연속형은 값 하나)
        super(LinearRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim,output_dim)

    def forward(self, x):
        out = self.linear(x)
        return out


In [10]:
train_features_tensor.shape[1], train_label_tensor.shape[1]

(10, 1)

In [11]:
model = LinearRegression(train_features_tensor.shape[1], train_label_tensor.shape[1])

In [12]:
model

LinearRegression(
  (linear): Linear(in_features=10, out_features=1, bias=True)
)

In [13]:
criterion = torch.nn.MSELoss() # Loss function
optimizer = torch.optim.SGD(model.parameters(), lr=0.01) # Optimizer fuction, lr = learnning rate : gradient decsent 빠르게 찾기 최근에 ADAMW 가 제일 좋다. 찾아가는 과정 어렵다. 그래서 optimizer 성능 중요하다.

In [14]:
## 반복 학습
# for epoch in range(10):
for epoch in range(1000):
    pred_y = model.forward(train_features_tensor)
    loss = criterion(pred_y, train_label_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"epoch : {epoch}, loss : {loss.item()}")



epoch : 0, loss : 29754.302734375
epoch : 100, loss : 6412.3916015625
epoch : 200, loss : 5926.95947265625
epoch : 300, loss : 5845.83203125
epoch : 400, loss : 5774.06494140625
epoch : 500, loss : 5704.63671875
epoch : 600, loss : 5637.3486328125
epoch : 700, loss : 5572.12646484375
epoch : 800, loss : 5508.9013671875
epoch : 900, loss : 5447.60498046875


## 모델 평가



In [15]:
model.eval()

LinearRegression(
  (linear): Linear(in_features=10, out_features=1, bias=True)
)

In [16]:
with torch.no_grad(): # 학습 목정이 아닌 평가 목적 위해 고정
    pred_y = model(train_features_tensor)
    loss = criterion(pred_y, train_label_tensor) # 예측도 간은 loss function
    print(f"loss : {loss.item()}")


loss : 5388.1728515625


## 모델 배포