In [83]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn import tree
from sklearn.pipeline import Pipeline

In [84]:
# Load and split the data
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

In [85]:
# Construct svm pipeline

pipe_svm = Pipeline([('ss1', StandardScaler()),
			('pca', PCA(n_components=2)),
			('svm', svm.SVC(random_state=42))])
			
# Construct knn pipeline
pipe_knn = Pipeline([('ss2', StandardScaler()),
			('knn', KNeighborsClassifier(n_neighbors=6, metric='euclidean'))])

# Construct DT pipeline
pipe_dt = Pipeline([('ss3', StandardScaler()),
			('minmax', MinMaxScaler()),
			('dt', tree.DecisionTreeClassifier(random_state=42))])

# Construct Random Forest pipeline
num_trees = 100
max_features = 1
pipe_rf = Pipeline([('ss4', StandardScaler()),
			('pca', PCA(n_components=2)),
			('rf', RandomForestClassifier(n_estimators=num_trees, max_features=max_features))])

In [86]:
pipe_dic = {0: 'K Nearest Neighbours', 1: 'Decision Tree', 2:'Random Forest', 3:'Support Vector Machines'}

In [87]:
pipelines = [pipe_knn, pipe_dt,pipe_rf,pipe_svm]

In [88]:
for pipe in pipelines:
	pipe.fit(X_train, y_train)

In [89]:
for idx, val in enumerate(pipelines):
	print('%s pipeline test accuracy: %.2f' % (pipe_dic[idx], val.score(X_test, y_test)))

K Nearest Neighbours pipeline test accuracy: 1.00
Decision Tree pipeline test accuracy: 1.00
Random Forest pipeline test accuracy: 0.90
Support Vector Machines pipeline test accuracy: 0.90


In [90]:
best_accuracy = 0
best_classifier = 0
best_pipeline = ''
for idx, val in enumerate(pipelines):
	if val.score(X_test, y_test) > best_accuracy:
		best_accuracy = val.score(X_test, y_test)
		best_pipeline = val
		best_classifier = idx
print('%s is the classifier has the best accuracy of %.2f' % (pipe_dic[best_classifier],best_accuracy))

K Nearest Neighbours is the classifier has the best accuracy of 1.00
