# WINE_이정빈

In [101]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
import seaborn as sns
import platform

# seaborn 설정 리셋
sns.reset_defaults()

# 폰트설정
if platform.system() == 'Windows' :
    path = 'c:/Windows/Fonts/malgun.ttf'
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
elif platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
else :
    print('Check your OS System')
    
# 그래프에 마이너스 표시
matplotlib.rcParams['axes.unicode_minus'] = False

In [3]:
from sklearn.datasets import load_wine

In [69]:
# 데이터 불러오기
wine_이정빈 = load_wine()

In [81]:
# 데이터 정제
wine_data = wine_이정빈['data']
wine_target = wine_이정빈['target']
wine_columns = wine_이정빈['feature_names']
wine_names = wine_이정빈['target_names']

In [96]:
wine_names

array(['class_0', 'class_1', 'class_2'], dtype='<U7')

In [83]:
# 편향 방지를 위한 데이터 랜덤하게 분배하기
from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = \
    train_test_split(wine_data, wine_target, random_state= 8)

In [84]:
print(train_input.shape)
print(train_target.shape)
print(test_input.shape)
print(test_target.shape)

(133, 13)
(133,)
(45, 13)
(45,)


In [85]:
# 정규화
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(train_input, train_target)
train_scaled = ss.transform(train_input)
test_scaled = ss.transform(test_input)

## 로지스틱 회귀모델
### 소프트맥스(다중분류)

In [95]:
### 로지스틱 회귀모델 생성 - 소프트맥스(다중분류)
from sklearn.linear_model import LogisticRegression
lr_soft = LogisticRegression(C = 0.3, max_iter=1000)
lr_soft.fit(train_scaled, train_target)
print('소프트맥스 훈련데이터 정확도:', round(lr_soft.score(train_scaled,train_target),3))
print('소프트맥스 테스트데이터 정확도:', round(lr_soft.score(test_scaled,test_target),3))

소프트맥스 훈련데이터 정확도: 0.992
소프트맥스 테스트데이터 정확도: 0.978


In [99]:
# 데이터 입력 -> 분류 확인
print('-------------입력된 값의 클래스------------')
print(lr_soft.predict(test_scaled[:5]))
print('0 = class_0, 1 = class_1, 2 = class_2')

-------------입력된 값의 클래스------------
[0 2 1 0 1]
0 = class_0, 1 = class_1, 2 = class_2


In [102]:
# 훈련모델이 가지고있는 범주 순서
print('-----범주 순서-----')
print(lr_soft.classes_)
print()

# 전체 범주별 확률 확인
proba = lr_soft.predict_proba(test_scaled[:5])
print('----범주별 확률----')
print(np.round(proba, decimals= 3))

-----범주 순서-----
[0 1 2]

----범주별 확률----
[[0.989 0.007 0.003]
 [0.022 0.047 0.931]
 [0.023 0.972 0.005]
 [0.977 0.021 0.002]
 [0.001 0.996 0.003]]


In [103]:
# 계수(또는 가중치) 및 y절편값 조회하기
print('---------------계수(가중치)--------------')
print(lr_soft.coef_)
print()
print('------------------y절편------------------')
print(lr_soft.intercept_)

---------------계수(가중치)--------------
[[ 0.57858651  0.11302686  0.25222238 -0.57049469  0.0866538   0.1954274
   0.44808461 -0.24379972  0.06905318  0.12715842  0.14903888  0.45038045
   0.74261216]
 [-0.68528415 -0.40811032 -0.42057155  0.44963955  0.00215143  0.05362204
   0.18558046  0.13380328  0.19164221 -0.66735524  0.44432417  0.02700966
  -0.74467837]
 [ 0.10669764  0.29508346  0.16834917  0.12085514 -0.08880523 -0.24904944
  -0.63366507  0.10999644 -0.26069538  0.54019682 -0.59336305 -0.47739011
   0.00206622]]

------------------y절편------------------
[ 0.22609947  0.689502   -0.91560146]


In [104]:
# 소프트맥스 함수를 통한 확률 공식

# z값 조회하기
decisions = lr_soft.decision_function(test_scaled[:5])
print('------------z값------------')
print(np.round(decisions, decimals=3))
print()

# 소프트맥스(softmax)
    # 사용하는 패키지 : scipy.special
    # 사용하는 모듈 : softmax

from scipy.special import softmax
proba = softmax(decisions, axis=1)
print('------소프트맥스 확률------')
print(np.round(proba, decimals=3))

------------z값------------
[[ 3.519 -1.379 -2.14 ]
 [-1.485 -0.753  2.239]
 [-0.766  2.979 -2.213]
 [ 3.371 -0.466 -2.905]
 [-2.504  4.176 -1.671]]

------소프트맥스 확률------
[[0.989 0.007 0.003]
 [0.022 0.047 0.931]
 [0.023 0.972 0.005]
 [0.977 0.021 0.002]
 [0.001 0.996 0.003]]
