# アヤメの分類

## デフォルトインポート

In [1]:
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

warnings.filterwarnings('ignore')

%matplotlib inline

## データの準備

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()

X = iris.data
y = iris.target

In [3]:
df = pd.DataFrame(X, columns=iris.feature_names)
df['y'] = y
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),y
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## 数値モデルの構築

In [42]:
from sklearn.model_selection import train_test_split
X_trainval, X_test, y_trainval, y_test = train_test_split(df.drop(columns=['y']), df['y'], train_size=0.7, random_state=0)
print(X_trainval.shape, X_testval.shape)

(105, 4) (45, 4)


In [43]:
X_train, X_valid, y_train, y_valid = train_test_split(X_trainval, y_trainval, train_size=0.7, random_state=0)
print(X_train.shape, X_test.shape)

(73, 4) (45, 4)


In [44]:
# 分割後のバランス確認
np.bincount(y_train), np.bincount(y_test)

(array([19, 24, 30], dtype=int64), array([16, 18, 11], dtype=int64))

## 予測モデルの構築(ロジスティック回帰)

### その1

In [45]:
from sklearn.linear_model import LogisticRegression
logistic_regression01 = LogisticRegression(C=1.0)
logistic_regression01.fit(X_train, y_train)

print(f'Train Acc: {logistic_regression01.score(X_train, y_train):.3f}')
print(f'Valid Acc: {logistic_regression01.score(X_valid, y_valid):.3f}')

Train Acc: 0.945
Valid Acc: 0.938


### その2

In [46]:
logistic_regression01 = LogisticRegression(C=0.01)
logistic_regression01.fit(X_train, y_train)

print(f'Train Acc: {logistic_regression01.score(X_train, y_train):.3f}')
print(f' Test Acc: {logistic_regression01.score(X_valid, y_valid):.3f}')

Train Acc: 0.671
 Test Acc: 0.750


### その3

In [47]:
logistic_regression01 = LogisticRegression(C=100)
logistic_regression01.fit(X_train, y_train)

print(f'Train Acc: {logistic_regression01.score(X_train, y_train):.3f}')
print(f' Test Acc: {logistic_regression01.score(X_valid, y_valid):.3f}')

Train Acc: 0.986
 Test Acc: 1.000


## 一番良いモデルで再学習

In [48]:
final_model = LogisticRegression(C=100)
final_model.fit(X_trainval, y_trainval)

LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

## テストデータでの最終評価

In [50]:
print(f'Test Acc: {final_model.score(X_test, y_test):.3f}')

Test Acc: 0.956
