In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
from sklearn.utils.testing import all_estimators

import warnings
warnings.filterwarnings('ignore')

# アヤメデータの読み込み
iris_data = pd.read_csv("iris.csv", encoding="utf-8")

# アヤメデータをラベルと入力データに分離する 
y = iris_data.loc[:,"Name"]
x = iris_data.loc[:,["SepalLength","SepalWidth","PetalLength","PetalWidth"]]

# 学習用とテスト用に分離する 
warnings.filterwarnings('ignore')
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, train_size = 0.8, shuffle = True)

# classifierのアルゴリズム全てを取得する --- (*1)
warnings.filterwarnings('ignore')
allAlgorithms = all_estimators(type_filter="classifier")

for(name, algorithm) in allAlgorithms:
    # 各アリゴリズムのオブジェクトを作成 --- (*2)
    clf = algorithm()

    # 学習して、評価する --- (*3)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(name,"の正解率 = " , accuracy_score(y_test, y_pred))

AdaBoostClassifier の正解率 =  1.0
BaggingClassifier の正解率 =  1.0
BernoulliNB の正解率 =  0.233333333333
CalibratedClassifierCV の正解率 =  1.0
DecisionTreeClassifier の正解率 =  1.0
ExtraTreeClassifier の正解率 =  1.0
ExtraTreesClassifier の正解率 =  1.0
GaussianNB の正解率 =  1.0
GaussianProcessClassifier の正解率 =  1.0
GradientBoostingClassifier の正解率 =  1.0
KNeighborsClassifier の正解率 =  0.966666666667
LabelPropagation の正解率 =  1.0
LabelSpreading の正解率 =  1.0
LinearDiscriminantAnalysis の正解率 =  1.0
LinearSVC の正解率 =  1.0
LogisticRegression の正解率 =  1.0
LogisticRegressionCV の正解率 =  0.966666666667
MLPClassifier の正解率 =  1.0
MultinomialNB の正解率 =  0.966666666667
NearestCentroid の正解率 =  0.966666666667
NuSVC の正解率 =  1.0
PassiveAggressiveClassifier の正解率 =  0.9
Perceptron の正解率 =  0.633333333333
QuadraticDiscriminantAnalysis の正解率 =  0.966666666667
RadiusNeighborsClassifier の正解率 =  1.0
RandomForestClassifier の正解率 =  1.0
RidgeClassifier の正解率 =  0.866666666667
RidgeClassifierCV の正解率 =  0.866666666667
SGDClassifier の正解率 =  0.666666666

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

# アヤメデータの読み込み
iris_data = pd.read_csv("iris.csv", encoding="utf-8")

# アヤメデータをラベルと入力データに分離する
y = iris_data.loc[:,"Name"]
x = iris_data.loc[:,["SepalLength","SepalWidth","PetalLength","PetalWidth"]]

# 学習用とテスト用に分離する 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, train_size = 0.8, shuffle = True)

# グリッドサーチで利用するパラメータを指定 --- (*1)
parameters = [
    {"C": [1, 10, 100, 1000], "kernel":["linear"]},
    {"C": [1, 10, 100, 1000], "kernel":["rbf"], "gamma":[0.001, 0.0001]},
    {"C": [1, 10, 100, 1000], "kernel":["sigmoid"], "gamma": [0.001, 0.0001]}
]

# グリッドサーチを行う --- (*2)
kfold_cv = KFold(n_splits=5, shuffle=True)
clf = GridSearchCV( SVC(), parameters, cv=kfold_cv)
clf.fit(x_train, y_train)
print("最適なパラメータ = ", clf.best_estimator_)

# 最適なパラメータで評価 --- (*3)
y_pred = clf.predict(x_test)
print("評価時の正解率 = " , accuracy_score(y_test, y_pred))

最適なパラメータ =  SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
評価時の正解率 =  1.0


In [3]:
import pandas as pd
from sklearn.utils.testing import all_estimators
from sklearn.model_selection import KFold
import warnings
from sklearn.model_selection import cross_val_score

# アヤメデータの読み込み
iris_data = pd.read_csv("iris.csv", encoding="utf-8")

# アヤメデータをラベルと入力データに分離する 
y = iris_data.loc[:,"Name"]
x = iris_data.loc[:,["SepalLength","SepalWidth","PetalLength","PetalWidth"]]

# classifierのアルゴリズム全てを取得する 
warnings.filterwarnings('ignore')
allAlgorithms = all_estimators(type_filter="classifier")

# K分割クロスバリデーション用オブジェクト --- (*1)
kfold_cv = KFold(n_splits=5, shuffle=True)

for(name, algorithm) in allAlgorithms:
    # 各アリゴリズムのオブジェクトを作成
    clf = algorithm()

    # scoreメソッドをもつクラスを対象とする--- (*2)
    if hasattr(clf,"score"):
        
        # クロスバリデーションを行う--- (*3)
        scores = cross_val_score(clf, x, y, cv=kfold_cv)
        print(name,"の正解率=")
        print(scores)

AdaBoostClassifier の正解率=
[ 0.96666667  0.96666667  0.93333333  0.86666667  0.96666667]
BaggingClassifier の正解率=
[ 1.          0.93333333  0.96666667  1.          0.9       ]
BernoulliNB の正解率=
[ 0.26666667  0.16666667  0.3         0.2         0.23333333]
CalibratedClassifierCV の正解率=
[ 0.93333333  0.93333333  0.96666667  0.93333333  0.83333333]
DecisionTreeClassifier の正解率=
[ 1.          0.96666667  0.86666667  0.96666667  0.86666667]
ExtraTreeClassifier の正解率=
[ 0.93333333  0.9         0.93333333  0.9         1.        ]
ExtraTreesClassifier の正解率=
[ 0.93333333  0.96666667  1.          0.9         0.96666667]
GaussianNB の正解率=
[ 0.93333333  0.96666667  0.93333333  0.93333333  0.96666667]
GaussianProcessClassifier の正解率=
[ 0.93333333  0.96666667  0.96666667  0.96666667  0.96666667]
GradientBoostingClassifier の正解率=
[ 1.          0.96666667  0.93333333  0.9         0.96666667]
KNeighborsClassifier の正解率=
[ 0.96666667  0.93333333  0.96666667  1.          0.96666667]
LabelPropagation の正解率=
[ 0.9666