In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
%precision 3

'%.3f'

In [2]:
iris = load_iris()

In [3]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [4]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [5]:
X = iris.data[50:150, 0:2]
y = iris.target[50:150]

In [6]:
# 説明変数の行数・列数
X.shape

(100, 2)

In [7]:
# 応答変数の行数・列数
y.shape

(100,)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    random_state=2
)

In [9]:
# 説明変数の行数・列数
X_train.shape

(75, 2)

In [10]:
y_train.shape

(75,)

In [11]:
y_train[0:10]

array([1, 1, 2, 2, 2, 2, 1, 1, 1, 1])

In [12]:
X_train_df = pd.DataFrame(
    X_train,
    columns=['sepal_len', 'sepal_wid']
)
y_train_df = pd.DataFrame({'species': y_train - 1})
iris_train_df = pd.concat(
    [y_train_df, X_train_df],
    axis=1
)
iris_train_df.head(3)

Unnamed: 0,species,sepal_len,sepal_wid
0,0,5.7,2.8
1,0,6.6,3.0
2,1,6.1,3.0


In [13]:
logi_mod_full = smf.glm(
    'species ~ sepal_len + sepal_wid',
    data=iris_train_df,
    family=sm.families.Binomial()
).fit()
logi_mod_len = smf.glm(
    'species ~ sepal_len',
    data=iris_train_df,
    family=sm.families.Binomial()
).fit()
logi_mod_wid = smf.glm(
    'species ~ sepal_wid',
    data=iris_train_df,
    family=sm.families.Binomial()
).fit()
logi_mod_null = smf.glm(
    'species ~ 1',
    data=iris_train_df,
    family=sm.families.Binomial()
).fit()

In [14]:
# full
logi_mod_full.aic.round(3)

76.813

In [15]:
# len
logi_mod_len.aic.round(3)

76.234

In [16]:
# wid
logi_mod_wid.aic.round(3)

92.768

In [17]:
# null
logi_mod_null.aic.round(3)

105.318

In [18]:
logi_mod_len.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-16.4152,4.000,-4.104,0.000,-24.256,-8.575
sepal_len,2.6478,0.639,4.142,0.000,1.395,3.901


In [20]:
X_test_df = pd.DataFrame(
    X_test,
    columns=['sepal_len', 'sepal_wid']
)

logi_fit = logi_mod_len.fittedvalues.round(0)
logi_pred = logi_mod_len.predict(X_test_df).round(0)

true_train = sp.sum(logi_fit == (y_train - 1))
true_test = sp.sum(logi_pred == (y_test - 1))

result_train = true_train / len(y_train)
result_test = true_test / len(y_test)

In [21]:
# 訓練データーの的中率
result_train

0.7466666666666667

In [22]:
# テストデータの的中率
result_test

0.68

In [23]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [24]:
sp.std(X_train_scaled, axis=0)

array([1., 1.])

In [25]:
sp.std(X_test_scaled, axis=0)

array([0.74 , 0.679])

In [26]:
nnet = MLPClassifier(
    hidden_layer_sizes=(100, 100),
    alpha=0.07,
    max_iter=10000,
    random_state=0
)
nnet.fit(X_train_scaled, y_train)

MLPClassifier(activation='relu', alpha=0.07, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=10000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=0, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [27]:
# 訓練データの的中率
nnet.score(X_train_scaled, y_train)

0.8933333333333333

In [28]:
# テストデータの的中率
nnet.score(X_test_scaled, y_test)

0.68