In [267]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

In [268]:
# 랜덤 시드 설정 ------------------------------
torch.manual_seed(1)

<torch._C.Generator at 0x1eff905b890>

# 모듈 로딩

In [269]:
import pandas as pd

In [270]:
file='../data/winequality-white.csv'
import csv

[1] 판다스 들고 오기

In [271]:
WINE=pd.read_csv(file, sep=';')

In [272]:
WINE.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

[2]

In [273]:
wineq_numpy=np.loadtxt(file,dtype=np.float32,delimiter=';',skiprows=1)

In [274]:
wineq_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [275]:
col_list=next(csv.reader(open(file),delimiter=';'))

In [276]:
wineq_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

# [2-2] Numpy 사용한 데이터 로딩

In [277]:
wineq=torch.from_numpy(wineq_numpy)

In [278]:
wineq_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [279]:
# 훈련 데이터 ---------------------------------
x_data = wineq_numpy[:,:-1]
y_data = wineq_numpy[:,-1]
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)

In [280]:
# from sklearn.preprocessing import label_binarize

In [281]:
print(y_train)

tensor([6., 6., 6.,  ..., 6., 7., 6.])


In [282]:
# y_train 분류해서 0,1로 만들기( 원핫 인코딩처럼 해보기)
for i in range(len(y_train)):
   if y_train[i] > 5:
      y_train[i] = 1
   else:
      y_train[i] = 0

In [283]:
print(y_train)

tensor([1., 1., 1.,  ..., 1., 1., 1.])


In [284]:
# 분류로 인해 개수가 줄었는지 확인 하기 위해서 print(sum(y_train))사용
print(sum(y_train))

tensor(3258.)


타겟의 고유값 찾아 분류하기 위해 unique() 사용

In [285]:
WINE['quality'].unique()

array([6, 5, 7, 8, 4, 3, 9], dtype=int64)

In [286]:
#y_train의 차원을 맞춰주기 위해(뜬 오류를 제거하기 위해) 차원추가
y_train.unsqueeze_(dim=1)

tensor([[1.],
        [1.],
        [1.],
        ...,
        [1.],
        [1.],
        [1.]])

# [3] 분류 로지스틱 회귀사용

In [287]:
# 모델 설계 ------------------------------------
# Sequential 순서대로 진행되어야 하는 모듈을
#            묶음으로 관리
model = nn.Sequential(
   nn.Linear(11, 1), # input_dim = 11, output_dim = 1(현재 피쳐 개수가 11개이므로 )
   nn.Sigmoid()     # 출력은 시그모이드 함수 거침(현재 2진분류를 위해 시그모이드 함수를 사용하였음)
)

# optimizer 설정-----------------------------
# 경사하강법으로 업데이트 해야하는 W,와 b는 
# 모델 인스턴스에서 가지고 있음
# => 가지고 오는 메서드 : model.parameters()
optimizer = optim.SGD(model.parameters(), lr=1) #경사하강법을 사용하여 W,B를 뽑아오는 메서드

In [288]:

# 학습 -----------------------------------------

def training():
   nb_epochs = 1000
   for epoch in range(nb_epochs + 1):

      # H(x) 계산
      hypothesis = model(x_train)

      # cost 계산 => 이진 분류 손실함수  binary_cross_entropy()
      cost = F.binary_cross_entropy(hypothesis, y_train)  #2진분류, 예측값과 실제값의 차이를 분류

      # cost로 W,b값 업데이트 => SGD 경사하강법 방식으로 업데이트 진행
      optimizer.zero_grad()
      cost.backward()
      optimizer.step()

      # 20번마다 로그 출력(정확도 기능 만들기)
      if epoch % 20 == 0:
         prediction = hypothesis >= torch.FloatTensor([0.5])  # 예측값이 0.5를 넘으면 True로 간주
         correct_prediction = prediction.float() == y_train  # 실제값과 일치하는 경우만 True로 간주
         accuracy = correct_prediction.sum().item() / len(correct_prediction)  # 정확도를 계산

         print(f'Epoch {epoch:4d}/{nb_epochs} Cost: {cost.item():.6f} Accuracy {accuracy * 100:2.2f}%')


In [289]:
training()

Epoch    0/1000 Cost: 0.955788 Accuracy 61.94%
Epoch   20/1000 Cost: 33.483055 Accuracy 66.52%
Epoch   40/1000 Cost: 33.483055 Accuracy 66.52%
Epoch   60/1000 Cost: 33.483055 Accuracy 66.52%
Epoch   80/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  100/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  120/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  140/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  160/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  180/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  200/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  220/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  240/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  260/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  280/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  300/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  320/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  340/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  360/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  380/1000 Cost: 33.483055 Accuracy 66.52%
Epoch  400/1000 Cost: 33.483055 Accuracy 

In [290]:
print(list(model.parameters()))

[Parameter containing:
tensor([[ 0.4424, -0.1255, -0.0449, -0.1242, -0.2831, -1.0646,  6.1282,  0.1965,
          0.1907, -0.0093,  0.7265]], requires_grad=True), Parameter containing:
tensor([0.0587], requires_grad=True)]


[4] [ 다중 선형 회귀 ]

In [291]:
# 훈련데이터 -----------------------------------
x_train_mul  =  torch.FloatTensor(x_data)
y_train_mul  =  torch.FloatTensor(y_data)

In [292]:
print(f'x_train_mul :  {x_train_mul.shape}, {x_train_mul.ndim}차원')
print(f'y_train_mul :  {y_train_mul.shape}, {y_train_mul.ndim}차원')

x_train_mul :  torch.Size([4898, 11]), 2차원
y_train_mul :  torch.Size([4898]), 1차원


In [293]:
# 모델 초기화 ---------------------------------------------
# 가중치와 편향 선언
W_mul = torch.zeros((11, 1), requires_grad=True)
b_mul = torch.zeros(1, requires_grad=True)

In [294]:
print(f'W ==> \n{W_mul}\n\nb ==> {b_mul}')

W ==> 
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], requires_grad=True)

b ==> tensor([0.], requires_grad=True)


In [295]:
# optimizer 설정
optimizer_mul = optim.SGD([W_mul,b_mul], lr=1e-5)

In [296]:
# 학습 진행 -------------------------------------------------
nb_epochs = 20
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    # 편향 b는 브로드 캐스팅되어 각 샘플에 더해집니다.
    hypothesis_mul = x_train_mul.matmul(W_mul) + b_mul

    # cost 계산
    cost = torch.mean((hypothesis_mul - y_train_mul) ** 2)

    # cost로 H(x) 개선
    optimizer_mul.zero_grad()
    cost.backward()
    optimizer_mul.step()

    print(f'Epoch {epoch:4d}/{nb_epochs} hypothesis: { hypothesis_mul.squeeze().detach()} Cost: {cost.item():.6f}')

Epoch    0/20 hypothesis: tensor([0., 0., 0.,  ..., 0., 0., 0.]) Cost: 0.665169
Epoch    1/20 hypothesis: tensor([0.3378, 0.2518, 0.1955,  ..., 0.2206, 0.2144, 0.1932]) Cost: 0.382649
Epoch    2/20 hypothesis: tensor([0.5235, 0.3902, 0.3031,  ..., 0.3418, 0.3323, 0.2994]) Cost: 0.297255
Epoch    3/20 hypothesis: tensor([0.6256, 0.4663, 0.3622,  ..., 0.4085, 0.3972, 0.3578]) Cost: 0.271431
Epoch    4/20 hypothesis: tensor([0.6817, 0.5082, 0.3947,  ..., 0.4452, 0.4329, 0.3900]) Cost: 0.263609
Epoch    5/20 hypothesis: tensor([0.7125, 0.5312, 0.4127,  ..., 0.4654, 0.4526, 0.4077]) Cost: 0.261226
Epoch    6/20 hypothesis: tensor([0.7294, 0.5439, 0.4225,  ..., 0.4765, 0.4635, 0.4175]) Cost: 0.260487
Epoch    7/20 hypothesis: tensor([0.7386, 0.5509, 0.4280,  ..., 0.4826, 0.4695, 0.4229]) Cost: 0.260244
Epoch    8/20 hypothesis: tensor([0.7437, 0.5548, 0.4310,  ..., 0.4860, 0.4728, 0.4259]) Cost: 0.260152
Epoch    9/20 hypothesis: tensor([0.7465, 0.5570, 0.4327,  ..., 0.4879, 0.4747, 0.4277])

In [300]:
# 예측 ------------------------------------------------------
# 
pre_y=x_train_mul[0].matmul(W_mul)+b_mul  # y_train_mul(실제값) 행렬곱 W_mul+ b_mul

print(f'pre_y => {pre_y.item()} : {y_train_mul[0].item()}')

pre_y => 0.7492175102233887 : 1.0


In [301]:
x_train_mul[0].matmul(W_mul)+b_mul

tensor([0.7492], grad_fn=<AddBackward0>)