### 목표 : 선형 모델 기반의 분류 모델 구현
- 데이터 : sklearn.datasets의 iris
- 피쳐 : 2개
- 타겟 : 3개

(1) 모듈 로딩 & 데이터 준비

In [1]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np

In [2]:
# 데이터 로딩 => Bunch 타입으로 dict와 유사한 scikit-learn 클래스
data = load_iris()
print(data.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])


In [3]:
# ndarray 형식으로 로딩
dataXy = load_iris(return_X_y=True)

print(type(dataXy), type(dataXy[0]), len(dataXy))

<class 'tuple'> <class 'numpy.ndarray'> 2


In [4]:
# 데이터와 타겟을 DataFrame 형식으로 로딩
ataXy = load_iris(return_X_y=True, as_frame=True)

print(type(ataXy), type(ataXy[0]), len(ataXy))
ataXy[0]

<class 'tuple'> <class 'pandas.core.frame.DataFrame'> 2


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [5]:
data = load_iris(as_frame=True)

print(data.keys(), data['data'], data['target'])

# 2가지 품종 피쳐와 타겟
# featureDF = data['data'][:100]
# targetDF = data['target'][:100]

# 3가지 품종 피쳐와 타겟
featureDF = data['data']
targetDF = data['target']

print(featureDF.shape, targetDF.shape)

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])      sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                  5.1               3.5                1.4               0.2
1                  4.9               3.0                1.4               0.2
2                  4.7               3.2                1.3               0.2
3                  4.6               3.1                1.5               0.2
4                  5.0               3.6                1.4               0.2
..                 ...               ...                ...               ...
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9            

(2) 모델 학습 진행

In [6]:
# 모듈 로딩
from sklearn.linear_model import LogisticRegression

In [7]:
# 모델 인스턴스 생성
model = LogisticRegression(max_iter=1000)   # max_iter 설정 이유??
model.fit(featureDF, targetDF)

In [8]:
# 학습 후 결정된 모델 파라미터 확인
print('classes_ :', model.classes_)
print('feature_names_in_ :', model.feature_names_in_)
print('n_iter_ : ', model.n_iter_)
print('coef_ :', model.coef_)               # coef      12개 = 4 (피쳐 수) * 3 (타겟 수)
print('intercept_ :', model.intercept_)     # intercept 3개 = 1 * 3 (타겟 수)

classes_ : [0 1 2]
feature_names_in_ : ['sepal length (cm)' 'sepal width (cm)' 'petal length (cm)'
 'petal width (cm)']
n_iter_ :  [120]
coef_ : [[-0.42364806  0.96739434 -2.51708319 -1.07937296]
 [ 0.53447765 -0.3216458  -0.20639707 -0.94423775]
 [-0.11082959 -0.64574854  2.72348026  2.02361071]]
intercept_ : [  9.84997373   2.23721656 -12.08719029]


In [9]:
model.score(featureDF, targetDF)

0.9733333333333334

In [10]:
featureDF.head(1), featureDF.iloc[[0]]  # 둘 다 2차원 데이터

(   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 0                5.1               3.5                1.4               0.2,
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 0                5.1               3.5                1.4               0.2)

In [11]:
model.predict(featureDF.iloc[[0]])      # 2차원 데이터를 넘김

array([0])

In [12]:
model.predict(featureDF.iloc[[-1]])

array([2])