<a href="https://colab.research.google.com/github/J-o-y-y/ms_ai_school/blob/main/20230627_logistic_regression_ensemble_(%EB%A1%9C%EC%A7%80%EC%8A%A4%ED%8B%B1_%ED%9A%8C%EA%B7%80_%EC%95%99%EC%83%81%EB%B8%94).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 로지스틱 희귀 앙상블 실습
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
### 데이터셋, 데이터 로더 생성
dataset = FashionMNIST(root="./data", train=True, transform=ToTensor(), download=True)
train_set, val_set = train_test_split(dataset, test_size=0.1, random_state=777)

train_loader = DataLoader(train_set, batch_size=100, shuffle=True)
test_loader = DataLoader(val_set, batch_size=100, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:03<00:00, 6913985.91it/s] 


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 118777.37it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:02<00:00, 2169502.40it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 14510938.84it/s]


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw



In [4]:
### 모델 선언
class LogisticRegression(nn.Module) :
    def __init__(self, input_size, num_classes) :
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, x) :
        out = self.linear(x)

        return out

In [5]:
### 하이퍼 파라미터 설정
input_size = 28 * 28
num_classes = 10
num_epoch = 100
lr =  0.001
num_models = 5 # 앙상블에 사용할 모델 개수

In [6]:
### 앙상블
models = [LogisticRegression(input_size, num_classes) for _ in range(num_models)]
print(models)

[LogisticRegression(
  (linear): Linear(in_features=784, out_features=10, bias=True)
), LogisticRegression(
  (linear): Linear(in_features=784, out_features=10, bias=True)
), LogisticRegression(
  (linear): Linear(in_features=784, out_features=10, bias=True)
), LogisticRegression(
  (linear): Linear(in_features=784, out_features=10, bias=True)
), LogisticRegression(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)]


In [7]:
### 모델, 손실 함수, 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizers = [optim.SGD(model.parameters(), lr=lr) for model in models]
print(optimizers)

[SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
), SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
), SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
), SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
), SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)]


In [8]:

### train loop
for epoch in range(num_epoch) :
    for i, (images, labels) in enumerate(train_loader) :
        # 데이터 로드
        images = images.reshape(-1, input_size)
        labels = labels

        # 순전파 및 손실 계산
        for j in range(num_models) :
            outputs = models[j](images)
            loss = criterion(outputs, labels)

           # 역전파 및 가중치 업데이트
            optimizers[j].zero_grad()
            loss.backward()
            optimizers[j].step()

    # 검증 코드 추가 #
    with torch.no_grad() :
        total, correct = 0,0
        for images, labels in test_loader :
            images = images.reshape(-1, input_size)
            """
            outputs = torch.zeros(images.size()[0], num_classes)
            이미지 배치에 대한 출력 텐서 초기화
            >> 후속 단계에서 이미지에 대한 예측값 업데이트 가능
            """
            outputs = torch.zeros(images.size()[0], num_classes)
            # 앙상블 모델의 예측값 더하기
            for j in range(num_models) :
                outputs += models[j](images)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epoch}], Val ACC >> {val_acc:.2f}%")


Epoch [1/100], Val ACC >> 65.43%
Epoch [2/100], Val ACC >> 66.97%
Epoch [3/100], Val ACC >> 68.05%
Epoch [4/100], Val ACC >> 68.97%
Epoch [5/100], Val ACC >> 70.33%
Epoch [6/100], Val ACC >> 71.90%
Epoch [7/100], Val ACC >> 73.08%
Epoch [8/100], Val ACC >> 73.80%
Epoch [9/100], Val ACC >> 74.45%
Epoch [10/100], Val ACC >> 75.08%
Epoch [11/100], Val ACC >> 75.58%
Epoch [12/100], Val ACC >> 76.08%
Epoch [13/100], Val ACC >> 76.63%
Epoch [14/100], Val ACC >> 77.00%
Epoch [15/100], Val ACC >> 77.43%
Epoch [16/100], Val ACC >> 77.63%
Epoch [17/100], Val ACC >> 77.87%
Epoch [18/100], Val ACC >> 78.05%
Epoch [19/100], Val ACC >> 78.35%
Epoch [20/100], Val ACC >> 78.68%
Epoch [21/100], Val ACC >> 78.80%
Epoch [22/100], Val ACC >> 79.08%
Epoch [23/100], Val ACC >> 79.32%
Epoch [24/100], Val ACC >> 79.48%
Epoch [25/100], Val ACC >> 79.53%
Epoch [26/100], Val ACC >> 79.75%
Epoch [27/100], Val ACC >> 79.78%
Epoch [28/100], Val ACC >> 80.00%
Epoch [29/100], Val ACC >> 80.03%
Epoch [30/100], Val ACC