# Assignment #2 IRIS 데이터를 이용한 Deep Neural Network 모델 설계

- 202055623
- 허치영

## 라이브러리 import

In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
## pytorch device 설정

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


## Dataset 불러오기

In [3]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
iris = fetch_ucirepo(id=53)

In [4]:
# data (as pandas dataframes)
X = iris.data.features
y = iris.data.targets

# metadata
print(iris.metadata)

# variable information
print(iris.variables)

{'uci_id': 53, 'name': 'Iris', 'repository_url': 'https://archive.ics.uci.edu/dataset/53/iris', 'data_url': 'https://archive.ics.uci.edu/static/public/53/data.csv', 'abstract': 'A small classic dataset from Fisher, 1936. One of the earliest known datasets used for evaluating classification methods.\n', 'area': 'Biology', 'tasks': ['Classification'], 'characteristics': ['Tabular'], 'num_instances': 150, 'num_features': 4, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1936, 'last_updated': 'Tue Sep 12 2023', 'dataset_doi': '10.24432/C56C76', 'creators': ['R. A. Fisher'], 'intro_paper': {'title': 'The Iris data set: In search of the source of virginica', 'authors': 'A. Unwin, K. Kleinman', 'published_in': 'Significance, 2021', 'year': 2021, 'url': 'https://www.semanticscholar.org/paper/4599862ea877863669a6a8e63a3c707a787d5d7e', 'doi': '1740-9713.01589'}, 'add

## 데이터 전처리

In [5]:
# target이 문자열이므로 LabelEncoder()를 사용하여 숫자로 변환

le = LabelEncoder()
display(y[:5])
y = le.fit_transform(np.ravel(y))
display(y[:5])

Unnamed: 0,class
0,Iris-setosa
1,Iris-setosa
2,Iris-setosa
3,Iris-setosa
4,Iris-setosa


array([0, 0, 0, 0, 0])

In [6]:
# Train : Test = 8:2로 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# StandardScaler instance
scaler = StandardScaler()

# X_train에 대해 fit() 메서드를 호출하여 평균과 표준편차 계산
scaler.fit(X_train)

# X_train과 X_test data transform
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# scaling 결과 확인
print("X_train mean: ", X_train.mean())
print("X_train std: ", X_train.std())
print("X_test mean: ", X_test.mean())
print("X_test std: ", X_test.std())

X_train mean:  4.357625371653739e-16
X_train std:  1.0
X_test mean:  0.0914914534439311
X_test std:  0.9984293963082908


- `LabelEn₩coder`를 이용하여 target을 숫자로 변환
- `StandardScaler`를 이용하여 feature를 정규화

## MLP 모델 구성

In [7]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        # input layer : 4 -> 64
        # dataset의 input feature가 4개이므로 input layer의 노드 수는 4
        self.input_layer = nn.Linear(4, 64)
        # hidden layer : 64 -> 64
        self.hidden_layer0 = nn.Linear(64, 64)
        self.hidden_layer1 = nn.Linear(64, 64)
        # output layer : 64 -> 3
        # dataset의 target class가 3개이므로 output layer의 노드 수는 3
        self.output_layer = nn.Linear(64, 3)
        # activation function : tanh
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = x.to(device)
        x = self.tanh(self.input_layer(x))
        x = self.tanh(self.hidden_layer0(x))
        x = self.tanh(self.hidden_layer1(x))
        x = self.output_layer(x)
        return x

In [8]:
model = MLP().to(device)
print(model)

MLP(
  (input_layer): Linear(in_features=4, out_features=64, bias=True)
  (hidden_layer0): Linear(in_features=64, out_features=64, bias=True)
  (hidden_layer1): Linear(in_features=64, out_features=64, bias=True)
  (output_layer): Linear(in_features=64, out_features=3, bias=True)
  (tanh): Tanh()
)


- Pytorch 라이브러리를 이용
- `nn.Module`을 상속받아 모델을 구성

## 모델 학습

In [9]:
# K-Fold Cross Validation 수행
# k = 4 (Train : Val : Test = 6 : 2 : 2)

skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)

# best model
best_model = None
best_loss = None
best_params = None

# hyperparameters
params = [
    {"learning_rate": 0.01, "epochs": 100},
    {"learning_rate": 0.001, "epochs": 100},
    {"learning_rate": 0.0001, "epochs": 100},
]

# loss function
criterion = nn.CrossEntropyLoss()

# batch size = 1 (Stochastic Gradient Descent)
batch_size = 1

for param in params:
    learning_rate = param["learning_rate"]
    epochs = param["epochs"]

    print(f"Learning Rate: {learning_rate}, Epochs: {epochs}")
    # training loop
    for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
        print(f"Fold [{fold+1}/{skf.get_n_splits()}]")
        # train set, validation set 분할
        X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
        y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

        # Pytorch Tensor로 변환
        X_train_fold = torch.tensor(X_train_fold, dtype=torch.float32).to(device)
        y_train_fold = torch.tensor(y_train_fold, dtype=torch.long).to(device)

        # DataLoader 생성
        train_dataset = TensorDataset(X_train_fold, y_train_fold)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # validation loss를 저장할 list
        val_losses = []

        # model, optimizer 초기화
        model = MLP().to(device)
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)

        for epoch in range(epochs):
            model.train()  # 학습 모드로 설정
            # batch_size(1) 만큼씩 데이터를 가져옴
            for inputs, targets in train_loader:
                optimizer.zero_grad()  # optimizer gradient 초기화
                # forward pass
                y_pred = model(inputs)
                loss = criterion(y_pred, targets)  # loss 계산
                # backward pass
                loss.backward()
                optimizer.step()  # weight 업데이트

            if epoch % 20 == 0:
                print(f"Epoch [{epoch}/{epochs}] Train Loss: {loss.item()}")

        # validation loss 계산
        model.eval()
        X_val_tensor = torch.tensor(X_val_fold, dtype=torch.float32).to(device)
        y_val_tensor = torch.tensor(y_val_fold, dtype=torch.long).to(device)
        y_pred = model(X_val_tensor)
        loss = criterion(y_pred, y_val_tensor)
        val_losses.append(loss.item())
        print(f"Validation Loss: {loss.item()}")

    # average validation loss 계산
    val_losses = np.array(val_losses)
    avg_val_loss = val_losses.mean()
    print(f"Average Validation Loss: {avg_val_loss}")

    # best model, best loss , best params 저장
    if best_loss is None or avg_val_loss < best_loss:
        best_loss = avg_val_loss
        best_model = model
        best_params = param
        
    print("-" * 50)


Learning Rate: 0.01, Epochs: 100
Fold [1/4]
Epoch [0/100] Train Loss: 0.30380526185035706
Epoch [20/100] Train Loss: 0.0046497792936861515
Epoch [40/100] Train Loss: 0.1716327965259552
Epoch [60/100] Train Loss: 0.0005343916127458215
Epoch [80/100] Train Loss: 0.0005504761938937008
Validation Loss: 0.053488049656152725
Fold [2/4]
Epoch [0/100] Train Loss: 0.2779849171638489
Epoch [20/100] Train Loss: 0.01555101852864027
Epoch [40/100] Train Loss: 0.004436887335032225
Epoch [60/100] Train Loss: 0.024087172001600266
Epoch [80/100] Train Loss: 0.6337399482727051
Validation Loss: 0.12411072850227356
Fold [3/4]
Epoch [0/100] Train Loss: 0.9727568626403809
Epoch [20/100] Train Loss: 0.005414227023720741
Epoch [40/100] Train Loss: 0.005305140744894743
Epoch [60/100] Train Loss: 0.4388662874698639
Epoch [80/100] Train Loss: 0.0009737040963955224
Validation Loss: 0.06812217086553574
Fold [4/4]
Epoch [0/100] Train Loss: 0.5507019758224487
Epoch [20/100] Train Loss: 0.019667144864797592
Epoch [40

- 4-fold cross validation 진행 (train : val : test = 6 : 2 : 2)
- `StratifiedKFold`를 이용하여 class 분포를 유지
- Loss function : CrossEntropyLoss
- Optimizer : Stoachastic Gradient Descent

### best hyperparameter

In [10]:
print(best_params)

{'learning_rate': 0.001, 'epochs': 100}


## Test set으로 모델 평가

In [11]:
best_model.eval() # 평가 모드로 설정
# Pytorch Tensor로 변환
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)
y_pred = best_model(X_test_tensor)
loss = criterion(y_pred, y_test_tensor) # loss 계산

print(f"Test Loss: {loss.item()}")
_, predicted = torch.max(y_pred, 1) # 예측값
correct = (predicted == y_test_tensor).sum().item() # 정답 개수
accuracy = correct / y_test_tensor.size(0) # Categorical Accuracy 계산
print(f"Test Accuracy: {accuracy}")

# 예측 결과 출력
df = pd.DataFrame()
df["predicted"] = le.inverse_transform(predicted.cpu().numpy())
df["target"] = le.inverse_transform(y_test)
print(df)


Test Loss: 0.06601735949516296
Test Accuracy: 1.0
          predicted           target
0   Iris-versicolor  Iris-versicolor
1       Iris-setosa      Iris-setosa
2    Iris-virginica   Iris-virginica
3   Iris-versicolor  Iris-versicolor
4   Iris-versicolor  Iris-versicolor
5       Iris-setosa      Iris-setosa
6   Iris-versicolor  Iris-versicolor
7    Iris-virginica   Iris-virginica
8   Iris-versicolor  Iris-versicolor
9   Iris-versicolor  Iris-versicolor
10   Iris-virginica   Iris-virginica
11      Iris-setosa      Iris-setosa
12      Iris-setosa      Iris-setosa
13      Iris-setosa      Iris-setosa
14      Iris-setosa      Iris-setosa
15  Iris-versicolor  Iris-versicolor
16   Iris-virginica   Iris-virginica
17  Iris-versicolor  Iris-versicolor
18  Iris-versicolor  Iris-versicolor
19   Iris-virginica   Iris-virginica
20      Iris-setosa      Iris-setosa
21   Iris-virginica   Iris-virginica
22      Iris-setosa      Iris-setosa
23   Iris-virginica   Iris-virginica
24   Iris-virginica   Iri

## 모델 결과 출력

In [12]:
summary(best_model, (4,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 64]             320
              Tanh-2                   [-1, 64]               0
            Linear-3                   [-1, 64]           4,160
              Tanh-4                   [-1, 64]               0
            Linear-5                   [-1, 64]           4,160
              Tanh-6                   [-1, 64]               0
            Linear-7                    [-1, 3]             195
Total params: 8,835
Trainable params: 8,835
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.03
Estimated Total Size (MB): 0.04
----------------------------------------------------------------
