선형 분류 - 로지스틱 회귀(Logistic Regression)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from torch.utils.data import Dataset, DataLoader

데이터셋을 만들기 make_classification 함수를 사용허여 무작위로 데이터셋을 만들기

In [2]:
x, y = make_classification(
    n_samples=3000,         # 데이터 수
    n_features=5,           # 독립 변수 중 출력
    n_informative=2,        # 독립 변수 중, 실제로 유의미한 의미가 있는 변수 계수
    n_redundant=0,          # 독립 변수 중, 다른 독립 변수로부터 파생된 불필요한 독립변수 계수
    n_clusters_per_class=1, # 클래스당 클러스트 계수
    random_state=42         # 난수 생성 발생기의 시드 값
)

print((x,y))

(array([[-0.02439857, -0.57540077,  1.26796049, -1.42222965, -0.9629849 ],
       [-1.07638119,  0.3872175 ,  1.08299994, -0.67379011, -2.65098736],
       [-1.12984986, -0.26922916,  1.12735009, -0.82383687, -1.70574586],
       ...,
       [-0.53797853,  0.26401859, -0.48915618,  0.4664446 , -1.57451325],
       [ 0.01920342,  0.9761859 , -0.14717165, -1.51725386,  2.31873002],
       [-0.37051336,  0.93603022, -0.62133172, -0.23084897,  1.66473405]]), array([0, 0, 0, ..., 0, 1, 1]))


이전 데이터 셋을 Train set과 Test set으로 나누기

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.1, random_state=42)
print('x_train >> ',len(x_train))
print('y_train >> ',len(x_train))
print('x_test >> ',len(x_test))
print('y_test >> ',len(y_test))

x_train >>  2700
y_train >>  2700
x_test >>  300
y_test >>  300


Pytorch의 Dataset과 DataLoader를 사용하기 위한 Custom Dataset 구현

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader

class MyCustomDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

# 데이터셋의 정의
train_dataset = MyCustomDataset(x_train, y_train)
test_dataset = MyCustomDataset(x_test, y_test)

# 데이터 로더 정의
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

##########  디버깅코드  ############
# test = MyCustomDataset(x_test, y_test)
# for i in test:
#     print(i)
##########  디버깅코드  ############

for index, (datas, labels) in enumerate(test_loader):
    print(index, datas, labels)

0 tensor([[-0.1814,  0.9541, -0.8362,  0.9478,  1.1666],
        [-1.0747, -0.5975, -0.0673, -0.7824, -1.8099],
        [ 2.0192,  1.4252,  0.7749,  0.9027,  0.9276],
        [ 0.4525,  0.4803,  0.6397,  0.7536,  2.2430],
        [ 1.3994,  0.5470, -0.7957, -0.2365, -1.8805],
        [-0.5218,  1.1701, -1.8481, -1.2085,  1.2183],
        [-0.3237,  0.8182, -0.7245, -0.2665,  1.0007],
        [ 1.8711,  1.4021, -0.5651, -0.8509, -1.1655],
        [ 0.7552,  0.1501,  1.1108,  1.0845,  1.0893],
        [ 0.3990,  2.0237, -0.6619,  1.7075, -0.4431],
        [-0.2678,  0.3786,  1.6651,  1.3227, -0.5773],
        [ 0.4529,  1.2332, -0.6631, -0.2662,  0.8212],
        [ 0.4259, -0.3123,  0.6507, -1.0707, -0.7738],
        [-1.3319,  0.8085, -0.7474,  1.3080,  0.7274],
        [-1.4163,  0.0362,  0.1478, -0.6428, -2.0718],
        [-1.0560,  0.7615,  1.0216, -1.0501,  0.1911],
        [ 1.5948,  0.7361,  1.6070, -1.0155,  1.2248],
        [-0.3499,  1.8803, -0.8156,  0.6496,  0.7292],
        

모델 정의

In [5]:
import torch
import torch.nn as nn

class LogisticRegression(nn.Module):
    
    def __init__(self, input_dim):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
        
    def forward(self, x):
        out = self.linear(x)
        out = torch.sigmoid(out)
        return out
    
model = LogisticRegression(input_dim=5)
print(model)


LogisticRegression(
  (linear): Linear(in_features=5, out_features=1, bias=True)
)


모델을 학습시키기 전에 학습에 필요한 Loss function, Optimizer 선언

In [6]:
# !pip install adamp

In [7]:
from adamp import SGDP

"""
# define your params
optimizer = SGDP(params, lr=0.1, weight_decay= 1e-5, momentum=0.9,nesterov=True)
"""

criterion = nn.BCELoss() # 0과 1 분류, 이진 분류하기 위해서 사용
optimizer = SGDP(model.parameters(), lr=0.01, weight_decay=0.9, nesterov=True)    # SDGP
# optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=True)   #SGD
print(optimizer)

SGDP (
Parameter Group 0
    dampening: 0
    delta: 0.1
    eps: 1e-08
    lr: 0.01
    momentum: 0
    nesterov: True
    wd_ratio: 0.1
    weight_decay: 0.9
)


Train Loop 구현

In [8]:
num_epochs = 100

for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        
        # unsqueeze 확인
        # print(targets)
        # print('unsqueeze >> ', targets.unsqueeze(1))
        
        # optimizer 초기화 진행
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()
        
        optimizer.step()
    
    if epoch % 10 == 0 :
        print(f'Epoch : [{epoch+1}/{num_epochs}], Loss : [{loss.item():.4f}]')

Epoch : [1/100], Loss : [0.5113]
Epoch : [11/100], Loss : [0.5531]
Epoch : [21/100], Loss : [0.4578]
Epoch : [31/100], Loss : [0.5221]
Epoch : [41/100], Loss : [0.5387]
Epoch : [51/100], Loss : [0.5137]
Epoch : [61/100], Loss : [0.4988]
Epoch : [71/100], Loss : [0.5319]
Epoch : [81/100], Loss : [0.5267]
Epoch : [91/100], Loss : [0.4945]


In [9]:
"""
SGDP
Epoch : [1/100],Loss : [0.4654]
Epoch : [11/100],Loss : [0.5070]
Epoch : [21/100],Loss : [0.5388]
Epoch : [31/100],Loss : [0.4690]
Epoch : [41/100],Loss : [0.4880]
Epoch : [51/100],Loss : [0.4890]
Epoch : [61/100],Loss : [0.5359]
Epoch : [71/100],Loss : [0.5107]
Epoch : [81/100],Loss : [0.5114]
Epoch : [91/100],Loss : [0.5285]

SGD
Epoch : [1/100], Loss : [0.4764]
Epoch : [11/100], Loss : [0.1516]
Epoch : [21/100], Loss : [0.1447]
Epoch : [31/100], Loss : [0.1025]
Epoch : [41/100], Loss : [0.4601]
Epoch : [51/100], Loss : [0.0434]
Epoch : [61/100], Loss : [0.1413]
Epoch : [71/100], Loss : [0.1562]
Epoch : [81/100], Loss : [0.0402]
Epoch : [91/100], Loss : [0.2346]

"""

'\nSGDP\nEpoch : [1/100],Loss : [0.4654]\nEpoch : [11/100],Loss : [0.5070]\nEpoch : [21/100],Loss : [0.5388]\nEpoch : [31/100],Loss : [0.4690]\nEpoch : [41/100],Loss : [0.4880]\nEpoch : [51/100],Loss : [0.4890]\nEpoch : [61/100],Loss : [0.5359]\nEpoch : [71/100],Loss : [0.5107]\nEpoch : [81/100],Loss : [0.5114]\nEpoch : [91/100],Loss : [0.5285]\n\nSGD\nEpoch : [1/100], Loss : [0.4764]\nEpoch : [11/100], Loss : [0.1516]\nEpoch : [21/100], Loss : [0.1447]\nEpoch : [31/100], Loss : [0.1025]\nEpoch : [41/100], Loss : [0.4601]\nEpoch : [51/100], Loss : [0.0434]\nEpoch : [61/100], Loss : [0.1413]\nEpoch : [71/100], Loss : [0.1562]\nEpoch : [81/100], Loss : [0.0402]\nEpoch : [91/100], Loss : [0.2346]\n\n'

평가 코드 작성

In [10]:
# device on GPU 인지 혹은 CPU
device = torch.device('CUDA' if torch.cuda.is_available() else 'CPU')
print('Using Device >> ', device)

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for test_inputs, test_targers in test_loader :
        test_input, test_target = test_inputs.to(device), test_targets.to(device)
        outputs_test = model(test_input)
        _, pred_test = torch.max(outputs_test, 1)
        print(pred_test)

RuntimeError: Expected one of cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, ort, mps, xla, lazy, vulkan, meta, hpu, privateuseone device type at start of device string: CPU

In [14]:
# device on GPU 인지 혹은 CPU
device = torch. device ( 'cuda' if torch.cuda.is_available() else 'cpu')
print("Using device »> ", device)

model.eval ()
with torch.no_grad() :
    correct = 0
    total = 0
    for test_inputs, test_targets in test_loader :
    #print (test_inputs, test_targets)
        test_input, test_target = test_inputs.to(device), test_targets.to(device)
        outputs_test = model(test_input)
        _, pred_test = torch.max(outputs_test, 1)
        total += test_targets.size (0)
        correct += (pred_test == test_targets).sum().item()
        print ("Acc >> %d%%"% (100 * correct / total))

Using device »>  cpu
Acc >> 43%
Acc >> 50%
Acc >> 45%
Acc >> 44%
Acc >> 45%
Acc >> 47%
Acc >> 46%
Acc >> 48%
Acc >> 48%
Acc >> 49%
