In [48]:
from sklearn import datasets
import pandas as pd
import numpy as np

In [2]:
iris = datasets.load_iris()

In [3]:
df = pd.DataFrame(data=iris.data,columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [4]:
df['label']=iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [6]:
df.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2
149,5.9,3.0,5.1,1.8,2


### 標準化

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
scaler = StandardScaler()

In [10]:
scaler.fit(df.drop(columns=['label']))

StandardScaler(copy=True, with_mean=True, with_std=True)

In [22]:
np_std=scaler.transform(df.drop(columns=['label']))

#### np_stdはnumpy配列なのでDataFrameに変換しておく

In [23]:
df_std = pd.DataFrame(np_std)

In [24]:
df_std.head()

Unnamed: 0,0,1,2,3
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444


In [25]:
columns = ['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)']
df_std = pd.DataFrame(np_std,columns=columns)
df_std.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444


In [28]:
from sklearn.model_selection import train_test_split

In [33]:
X = df_std
y = pd.Series(data = iris.target)

In [36]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 0)

In [37]:
from sklearn.linear_model import Perceptron

In [38]:
ppn = Perceptron(n_iter=40,eta0=0.1,random_state=1)

In [39]:
ppn.fit(X_train,y_train)

Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=0.1,
      fit_intercept=True, max_iter=None, n_iter=40, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=1, shuffle=True, tol=None,
      validation_fraction=0.1, verbose=0, warm_start=False)

In [40]:
pred = ppn.predict(X_test)

In [41]:
from sklearn.metrics import accuracy_score

In [42]:
acc = accuracy_score(y_test,pred)

In [43]:
print(acc)

0.9333333333333333


### モデルの作成時に２つの特徴量を用いたときの可視化

In [44]:
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

In [45]:
def plot_decision_regions(X,y,classifier,test_idx=None,resolution=0.02):
    
    ##マーカーとカラーマップの準備
    markers = ('s','x','o','^','v')
    colors = ('red','blue','lightgreen','gray','cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    ##決定領域のプロット
    x1_min, x1_max = X[:,0].min() - 1, X[:,0].max() + 1
    x2_min, x2_max = X[:,1].min() - 1, X[:,1].max() + 1

    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                            np.arange(x2_min,x2_max, resolution))

    Z = classifier.predict(np.array([xx1.ravel(),xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1,xx2,Z,alpha=0.3,cmap = cmap)

    plt.xlim(xx1.min(),xx1.max())
    plt.ylim(xx2.min(),xx2.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl,0],y=X[y == cl, 1],
                    alpha = 0.8,
                    c = colors[idx],
                    marker=markers[idx],
                    label=cl,
                    edgecolor='black')

    if test_idx:
        X_test, y_test = X[test_idx, : ], y[test_idx] 
        plt.scatter(X_test[:,0],X_test[:,1],
                    c ='',
                    edgecolor='black',
                    alpha = 1.0,
                    linewidth=1,
                    marker='o',
                    s = 100,
                    label = 'test set')

In [46]:
X_plt = df_std.drop(columns=['sepal width (cm)','petal width (cm)'])
X_plt.head()

Unnamed: 0,sepal length (cm),petal length (cm)
0,-0.900681,-1.340227
1,-1.143017,-1.340227
2,-1.385353,-1.397064
3,-1.506521,-1.283389
4,-1.021849,-1.340227


In [55]:
X_pltnp = X_plt.values
X_pltnp

array([[-0.90068117, -1.34022653],
       [-1.14301691, -1.34022653],
       [-1.38535265, -1.39706395],
       [-1.50652052, -1.2833891 ],
       [-1.02184904, -1.34022653],
       [-0.53717756, -1.16971425],
       [-1.50652052, -1.34022653],
       [-1.02184904, -1.2833891 ],
       [-1.74885626, -1.34022653],
       [-1.14301691, -1.2833891 ],
       [-0.53717756, -1.2833891 ],
       [-1.26418478, -1.22655167],
       [-1.26418478, -1.34022653],
       [-1.87002413, -1.51073881],
       [-0.05250608, -1.45390138],
       [-0.17367395, -1.2833891 ],
       [-0.53717756, -1.39706395],
       [-0.90068117, -1.34022653],
       [-0.17367395, -1.16971425],
       [-0.90068117, -1.2833891 ],
       [-0.53717756, -1.16971425],
       [-0.90068117, -1.2833891 ],
       [-1.50652052, -1.56757623],
       [-0.90068117, -1.16971425],
       [-1.26418478, -1.05603939],
       [-1.02184904, -1.22655167],
       [-1.02184904, -1.22655167],
       [-0.7795133 , -1.2833891 ],
       [-0.7795133 ,

In [56]:
plot_decision_regions(df_std.values,y,classifier=ppn)
plt.show()

ValueError: X has 2 features per sample; expecting 4

array([[-9.00681170e-01,  1.01900435e+00, -1.34022653e+00,
        -1.31544430e+00],
       [-1.14301691e+00, -1.31979479e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-1.38535265e+00,  3.28414053e-01, -1.39706395e+00,
        -1.31544430e+00],
       [-1.50652052e+00,  9.82172869e-02, -1.28338910e+00,
        -1.31544430e+00],
       [-1.02184904e+00,  1.24920112e+00, -1.34022653e+00,
        -1.31544430e+00],
       [-5.37177559e-01,  1.93979142e+00, -1.16971425e+00,
        -1.05217993e+00],
       [-1.50652052e+00,  7.88807586e-01, -1.34022653e+00,
        -1.18381211e+00],
       [-1.02184904e+00,  7.88807586e-01, -1.28338910e+00,
        -1.31544430e+00],
       [-1.74885626e+00, -3.62176246e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-1.14301691e+00,  9.82172869e-02, -1.28338910e+00,
        -1.44707648e+00],
       [-5.37177559e-01,  1.47939788e+00, -1.28338910e+00,
        -1.31544430e+00],
       [-1.26418478e+00,  7.88807586e-01, -1.22655167e+00,
      

In [58]:
df_std.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444


In [63]:
df_std = df.iloc[0:100,[0,2]]
df_std.values

array([[5.1, 1.4],
       [4.9, 1.4],
       [4.7, 1.3],
       [4.6, 1.5],
       [5. , 1.4],
       [5.4, 1.7],
       [4.6, 1.4],
       [5. , 1.5],
       [4.4, 1.4],
       [4.9, 1.5],
       [5.4, 1.5],
       [4.8, 1.6],
       [4.8, 1.4],
       [4.3, 1.1],
       [5.8, 1.2],
       [5.7, 1.5],
       [5.4, 1.3],
       [5.1, 1.4],
       [5.7, 1.7],
       [5.1, 1.5],
       [5.4, 1.7],
       [5.1, 1.5],
       [4.6, 1. ],
       [5.1, 1.7],
       [4.8, 1.9],
       [5. , 1.6],
       [5. , 1.6],
       [5.2, 1.5],
       [5.2, 1.4],
       [4.7, 1.6],
       [4.8, 1.6],
       [5.4, 1.5],
       [5.2, 1.5],
       [5.5, 1.4],
       [4.9, 1.5],
       [5. , 1.2],
       [5.5, 1.3],
       [4.9, 1.4],
       [4.4, 1.3],
       [5.1, 1.5],
       [5. , 1.3],
       [4.5, 1.3],
       [4.4, 1.3],
       [5. , 1.6],
       [5.1, 1.9],
       [4.8, 1.4],
       [5.1, 1.6],
       [4.6, 1.4],
       [5.3, 1.5],
       [5. , 1.4],
       [7. , 4.7],
       [6.4, 4.5],
       [6.9,