<a href="https://colab.research.google.com/github/J-o-y-y/ms_ai_school/blob/main/20230626_%EC%84%A0%ED%98%95_%EB%B6%84%EB%A5%98_%EB%A1%9C%EC%A7%80%EC%8A%A4%ED%8B%B1_%ED%9A%8C%EA%B7%80_(Logistic_Regression)ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 선형 분류 - 로지스틱 회귀 (Logistic Regression)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from torch.utils.data import Dataset, DataLoader

In [None]:
# 데이터셋을 만들기 make_classificaiton 함수를 사용하여 무작위로 데이터셋을 만들기

In [2]:
x, y = make_classification(
    n_samples=3000, # 생성할 데이터 수
    n_features=5,   # 독립변수 수 (입력변수에 사용되는 변수) / 종속변수 -> 라벨
    n_informative=2,# 독립변수 수 중에서 실제로 유의미한 의미가 있는 변수 계수
    n_redundant=0,  # 독립변수 중에 다른 독립 변수로부터 파생된 불필요한 독립변수 계수
    n_clusters_per_class=1, # 클래스당 클러스트 계수
    random_state=42         # 난수 생성 발생기의 시드값
)

print(x, y)

[[-0.02439857 -0.57540077  1.26796049 -1.42222965 -0.9629849 ]
 [-1.07638119  0.3872175   1.08299994 -0.67379011 -2.65098736]
 [-1.12984986 -0.26922916  1.12735009 -0.82383687 -1.70574586]
 ...
 [-0.53797853  0.26401859 -0.48915618  0.4664446  -1.57451325]
 [ 0.01920342  0.9761859  -0.14717165 -1.51725386  2.31873002]
 [-0.37051336  0.93603022 -0.62133172 -0.23084897  1.66473405]] [0 0 0 ... 0 1 1]


In [None]:
# 이제 데이터셋을 Train set 과 Test set 으로 나누기

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print("x_train >> " , len(x_train))
print("x_test >> ", len(x_test))
print("y_train >> ", len(y_train))
print("y_test >> ", len(y_test))

x_train >>  2400
x_test >>  600
y_train >>  2400
y_test >>  600


In [None]:
# Pytorc의 Dataset 과 Dataloader 를 사용하기 위한 Custom Dataset 구현

In [4]:
class MyCustomDataset(Dataset) :
    def __init__(self, x, y) :
        # 텐서형태로 변환
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)


    def __getitem__(self, index) :

        return self.x[index], self.y[index]


    def __len__(self) :
        return len(self.x)

# 데이터셋 정의
train_dataset = MyCustomDataset(x_train, y_train)
test_dataset = MyCustomDataset(x_test, y_test)

# 데이터 로드 정의
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=False)

# ########## 디버깅 코드 ##############
# test = MyCustomDataset(x_test, y_test)
# for i in test :
#     print(i)
########## 디버깅 코드 ##############
# for index, (datas, lables) in enumerate(test_loader) :
#     print(index, datas, lables)

In [None]:
# 모델 정의

In [5]:
class LogisticRegression(nn.Module) :
    def __init__(self, input_dim) :
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x) :
        out = self.linear(x)
        out = torch.sigmoid(out)

        return out

model = LogisticRegression(input_dim=5)
print(model)

LogisticRegression(
  (linear): Linear(in_features=5, out_features=1, bias=True)
)


In [None]:
# 모델을 학습시키기 전에 학습에 필요한 Loss function, optimizer 선언

In [8]:
!pip install adamp

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting adamp
  Downloading adamp-0.3.0.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: adamp
  Building wheel for adamp (setup.py) ... [?25l[?25hdone
  Created wheel for adamp: filename=adamp-0.3.0-py3-none-any.whl size=5980 sha256=bc5599ecad15b6fea93eb7e202e20358c00c7f2dfc1fc720b222c7a69d3f0135
  Stored in directory: /root/.cache/pip/wheels/c7/ad/0f/b41b1c45b18c66e5eef5d2254415af8055c7e2b0934145157d
Successfully built adamp
Installing collected packages: adamp
Successfully installed adamp-0.3.0


In [9]:
from adamp import SGDP

"""
# define your params
optimizer = SGDP(params, lr=0.1, weight_decay=1e-5, momentum=0.9, nesterov=True)
"""

criterion = nn.BCELoss() # 사용한 이유는 0과 1 분류 이진분류 하기 떄문입니다.
# optimizer = SGDP(model.parameters(), lr=0.25, weight_decay=1e-5, momentum=0.9, nesterov=True)
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=True)

print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.01
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 1e-05
)


In [None]:
# tarin loop 구현

In [10]:
num_epochs = 100

for epoch in range(num_epochs) :
    for inputs, targets in train_loader :
#         print(targets)
#         print("unsqueeze >> ", targets.unsqueeze(1))
        # optimizer 초기화 진행
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()

        optimizer.step()

    if epoch % 10 == 0 :
        print(f"Epoch : [{epoch+1}/{num_epochs}], Loss : [{loss.item():.4f}]")

    """
    SGDP
    Epoch : [1/100], Loss : [0.2932]
    Epoch : [11/100], Loss : [0.2758]
    Epoch : [21/100], Loss : [0.1802]
    Epoch : [31/100], Loss : [0.4343]
    Epoch : [41/100], Loss : [0.1413]
    Epoch : [51/100], Loss : [0.2233]
    Epoch : [61/100], Loss : [0.2452]
    Epoch : [71/100], Loss : [0.1318]
    Epoch : [81/100], Loss : [0.2263]
    Epoch : [91/100], Loss : [0.2263]

    SGD
    Epoch : [1/100], Loss : [0.3412]
    Epoch : [11/100], Loss : [0.2807]
    Epoch : [21/100], Loss : [0.3580]
    Epoch : [31/100], Loss : [0.3004]
    Epoch : [41/100], Loss : [0.2017]
    Epoch : [51/100], Loss : [0.2862]
    Epoch : [61/100], Loss : [0.2308]
    Epoch : [71/100], Loss : [0.2103]
    Epoch : [81/100], Loss : [0.2252]
    Epoch : [91/100], Loss : [0.2543]
    """


Epoch : [1/100], Loss : [0.2500]
Epoch : [11/100], Loss : [0.2131]
Epoch : [21/100], Loss : [0.1315]
Epoch : [31/100], Loss : [0.2635]
Epoch : [41/100], Loss : [0.3350]
Epoch : [51/100], Loss : [0.3222]
Epoch : [61/100], Loss : [0.3622]
Epoch : [71/100], Loss : [0.2937]
Epoch : [81/100], Loss : [0.2925]
Epoch : [91/100], Loss : [0.2068]


In [None]:
# 평가 코드 작성

In [11]:
# device on GPU 인지 혹은 CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device >> ", device)

model.eval()
with torch.no_grad() :
    correct = 0
    total = 0
    for test_inputs, test_targets in test_loader :
#         print(test_inputs, test_targets)
        test_input, test_target = test_inputs.to(device), test_targets.to(device)
        outputs_test = model(test_input)
        _, pred_test = torch.max(outputs_test, 1)
        total += test_targets.size(0)
        correct += (pred_test == test_targets).sum().item()

    print("Acc >> %d%%"%(100 * correct / total))



Using device >>  cpu
Acc >> 48%
