<a href="https://colab.research.google.com/github/427paul/Machine_Learning/blob/main/DL_1_2_%EB%94%A5%EB%9F%AC%EB%8B%9D_%EB%AC%B4%EC%9E%91%EC%A0%95_%EB%94%B0%EB%9D%BC%ED%95%98%EA%B8%B0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **딥러닝 무작정 따라하기**

# 1.환경준비

### (1) 라이브러리 Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam

### (2) 필요 함수 생성

* 모델링을 위한 데이터로더 만들기
    * 학습시, 배치 단위로 데이터를 처리하기 위함

In [3]:
def make_DataSet(x_train, x_val, y_train, y_val, batch_size = 32) :

    # 데이터 텐서로 변환
    x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    x_val_tensor = torch.tensor(x_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

    # TensorDataset 생성 : 텐서 데이터셋으로 합치기
    train_dataset = TensorDataset(x_train_tensor, y_train_tensor)

    # DataLoader 생성
    train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)

    return train_loader, x_val_tensor, y_val_tensor

* 학습을 위한 함수

In [4]:
def train(dataloader, model, loss_fn, optimizer, device):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    tr_loss = 0                  # 전체 데이터셋의 크기
    model.train()                                   # 훈련 모드로 설정
    for batch, (X, y) in enumerate(dataloader):     # batch : 현재 배치 번호, (X, y) : 입력 데이터와 레이블
        X, y = X.to(device), y.to(device)           # 입력 데이터와 레이블을 지정된 장치(device, CPU 또는 GPU)로 연결

        # Feed Forward
        pred = model(X)
        loss = loss_fn(pred, y)
        tr_loss += loss

        # Backpropagation
        loss.backward()             # 역전파를 통해 모델의 각 파라미터에 대한 손실의 기울기를 계산
        optimizer.step()            # 옵티마이저가 계산된 기울기를 사용하여 모델의 파라미터를 업데이트
        optimizer.zero_grad()       # 옵티마이저의 기울기 값 초기화. 기울기가 누적되는 것 방지

    tr_loss /= num_batches          # 모든 배치에서의 loss 평균

    return tr_loss.item()

* 검증을 위한 함수

In [5]:
def evaluate(x_val_tensor, y_val_tensor, model, loss_fn, device):
    model.eval()                        # 모델을 평가 모드로 설정

    with torch.no_grad():               # 평가 과정에서 기울기를 계산하지 않도록 설정(메모리 사용을 줄이고 평가 속도를 높입니다.)
        x, y = x_val_tensor.to(device), y_val_tensor.to(device)
        pred = model(x)
        eval_loss = loss_fn(pred, y).item()    # 예측 값 pred와 실제 값 y 사이의 손실 계산

    # print(f"evaluete_err: {eval_loss:>7f}")

    return eval_loss

### (3) device 준비(cpu or gpu)

In [6]:
# cpu 혹은 gpu 사용
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


# 2.Regression : Advertising

## (1) 데이터 전처리 - 기본

### 1) 데이터 준비

In [7]:
path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/advertising.csv'
adv = pd.read_csv(path)
adv.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [8]:
target = 'Sales'
x = adv.drop(target, axis=1)
y = adv.loc[:, target]

### 2) 가변수화

### 3) 데이터분할

In [9]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 20)

### 4) 스케일링

In [10]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

## (2) 딥러닝 모델링

### 1) 딥러닝을 위한 데이터 준비

*  pandas 데이터프레임 ==> PyTorch의 DataLoader로 변환
    * 데이터 텐서로 변환
    * 텐서 데이터셋으로 합치기 : x, y
    * 데이터 로더 생성

* 1-(2) 에서 생성한 함수 : **make_DataLoader**

In [11]:
train_loader, x_val_ts, y_val_ts = make_DataSet(x_train, x_val, y_train, y_val, batch_size = 4)

In [12]:
# 첫번째 배치만 로딩해서 살펴보기
for x, y in train_loader:
    print(f"Shape of x [rows, columns]: {x.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of x [rows, columns]: torch.Size([4, 3])
Shape of y: torch.Size([4, 1]) torch.float32


### 2) 모델 선언

In [13]:
x.shape[1]

3

In [14]:
n_feature = x.shape[1]

# 모델 구조 설계
model = nn.Sequential(
            nn.Linear(n_feature, 3),    # hidden layer(input, output)
            nn.ReLU(),                  # 활성함수
            nn.Linear(3, 1)             # output layer
        ).to(device)                    # cpu, gpu 사용 설정. cpu인 경우 생략가능

print(model)

Sequential(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)


* Loss function과 Optimizer

In [15]:
loss_fn = nn.MSELoss()          # MSE
optimizer = Adam(model.parameters(), lr=0.01)  # model.parameters() : 모델의 가중치와 편향

### 4) 학습

* 1-(2)에서 생성하 함수 : **train**, **evaluate**

In [16]:
epochs = 20
for t in range(epochs): # 20번 반복
    tr_loss = train(train_loader, model, loss_fn, optimizer, device) # 학습
    val_loss = evaluate(x_val_ts, y_val_ts, model, loss_fn, device) # 평가

    print(f"Epoch {t+1}, train loss : {tr_loss:4f}, val loss : {val_loss:4f}")

Epoch 1, train loss : 210.315964, val loss : 197.853241
Epoch 2, train loss : 159.953979, val loss : 137.683029
Epoch 3, train loss : 95.840652, val loss : 72.659744
Epoch 4, train loss : 41.783318, val loss : 30.167765
Epoch 5, train loss : 15.210310, val loss : 15.119060
Epoch 6, train loss : 8.139363, val loss : 12.356478
Epoch 7, train loss : 7.034497, val loss : 11.787580
Epoch 8, train loss : 6.778351, val loss : 11.356905
Epoch 9, train loss : 6.544739, val loss : 10.930197
Epoch 10, train loss : 6.279342, val loss : 10.483088
Epoch 11, train loss : 5.961531, val loss : 9.828399
Epoch 12, train loss : 5.529760, val loss : 9.015481
Epoch 13, train loss : 5.067438, val loss : 8.103922
Epoch 14, train loss : 4.463823, val loss : 7.012988
Epoch 15, train loss : 3.909812, val loss : 6.043315
Epoch 16, train loss : 3.515601, val loss : 4.960412
Epoch 17, train loss : 3.146557, val loss : 4.342975
Epoch 18, train loss : 2.892650, val loss : 3.891888
Epoch 19, train loss : 2.736468, val

### 5) 모델 평가

In [17]:
evaluate(x_val_ts, y_val_ts, model, loss_fn, device)

3.3248190879821777

# 3.Regression : 보스턴 집값

## (1) 데이터 전처리 - 기본

### 1) 데이터 준비

In [18]:
path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/boston.csv'
data = pd.read_csv(path)
data.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2


In [19]:
target = 'medv'
x = data.drop(target, axis=1)
y = data.loc[:, target]

### 2) 가변수화

### 3) 데이터분할

In [20]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 20)

### 4) 스케일링

In [21]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

## (2) 딥러닝 모델링

### 1) 딥러닝을 위한 데이터 준비

*  pandas 데이터프레임 ==> PyTorch의 DataLoader로 변환
    * 데이터 텐서로 변환
    * 텐서 데이터셋으로 합치기 : x, y
    * 데이터 로더 생성

* 1-(2) 에서 생성한 함수 : **make_DataLoader**

In [22]:
train_loader, x_val_ts, y_val_ts = make_DataSet(x_train, x_val, y_train, y_val, batch_size = 4)

In [23]:
# 첫번째 배치만 로딩해서 살펴보기
for x, y in train_loader:
    print(f"Shape of x [rows, columns]: {x.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of x [rows, columns]: torch.Size([4, 12])
Shape of y: torch.Size([4, 1]) torch.float32


### 2) 모델 선언

In [29]:
n_feature = x.shape[1]

# 모델 구조 설계
model = nn.Sequential(
            nn.Linear(n_feature, 3),
            nn.ReLU(),
            nn.Linear(3,1)
        ).to(device)

print(model)

Sequential(
  (0): Linear(in_features=12, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)


* Loss function과 Optimizer

In [30]:
loss_fn = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.01)

### 4) 학습

In [31]:
epochs = 20
for t in range(epochs):
    tr_loss = train(train_loader, model, loss_fn, optimizer, device)
    val_loss = evaluate(x_val_ts, y_val_ts, model, loss_fn, device)

    print(f"Epoch {t+1}, train loss : {tr_loss:.4f}, val loss : {val_loss:.4f}")

Epoch 1, train loss : 425.9845, val loss : 140.8179
Epoch 2, train loss : 131.4906, val loss : 71.0788
Epoch 3, train loss : 76.5080, val loss : 51.5395
Epoch 4, train loss : 59.9080, val loss : 40.8088
Epoch 5, train loss : 50.4518, val loss : 36.3492
Epoch 6, train loss : 44.9152, val loss : 32.9976
Epoch 7, train loss : 40.7969, val loss : 30.0724
Epoch 8, train loss : 36.0232, val loss : 27.5630
Epoch 9, train loss : 32.8102, val loss : 25.4867
Epoch 10, train loss : 30.2200, val loss : 23.9752
Epoch 11, train loss : 27.7286, val loss : 23.3011
Epoch 12, train loss : 27.4801, val loss : 21.7507
Epoch 13, train loss : 24.9990, val loss : 21.4303
Epoch 14, train loss : 24.6132, val loss : 20.8755
Epoch 15, train loss : 23.8814, val loss : 20.8250
Epoch 16, train loss : 23.3405, val loss : 19.9529
Epoch 17, train loss : 22.8132, val loss : 19.9206
Epoch 18, train loss : 22.7646, val loss : 21.9158
Epoch 19, train loss : 22.6858, val loss : 19.5384
Epoch 20, train loss : 22.0662, val l

### 5) 모델 평가

In [32]:
evaluate(x_val_ts, y_val_ts, model, loss_fn, device)

19.28388214111328