<a href="https://colab.research.google.com/github/Bunnykey/PFDS/blob/master/knnclassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

#data load
wine = load_wine()
wine.feature_names, wine.target

In [None]:
#data split
X,y = load_wine(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, train_size = 0.7, random_state = 42, shuffle = True)
X_train, X_test, y_train, y_test

In [None]:
#data preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import RobustScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

stdModel = Pipeline([
('scaler', StandardScaler()),
('classifier', KNeighborsClassifier(n_neighbors = 3)),
])

robustModel = Pipeline([
('scaler', RobustScaler()),
('classifier', KNeighborsClassifier(n_neighbors = 3)),
])

minmaxModel = Pipeline([
('scaler', MinMaxScaler()),
('classifier', KNeighborsClassifier(n_neighbors = 3)),
])

normalModel = Pipeline([
('scaler', Normalizer()),
('classifier', KNeighborsClassifier(n_neighbors = 3)),
])

stdClf = stdModel.fit(X_train, y_train)
robustClf = robustModel.fit(X_train, y_train)
minmaxClf = minmaxModel.fit(X_train, y_train)
normalClf = normalModel.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score
y_train_hat1 = stdClf.predict(X_train)
print('std train accuracy :', accuracy_score(y_train, y_train_hat1))
y_test_hat1 = stdClf.predict(X_test)
print('std test accuracy :', accuracy_score(y_test, y_test_hat1))

y_train_hat2 = robustClf.predict(X_train)
print('robust train accuracy :', accuracy_score(y_train, y_train_hat2))
y_test_hat2 = robustClf.predict(X_test)
print('robust test accuracy :', accuracy_score(y_test, y_test_hat2))

y_train_hat3 = minmaxClf.predict(X_train)
print('minmax train accuracy :', accuracy_score(y_train, y_train_hat3))
y_test_hat3 = minmaxClf.predict(X_test)
print('minmax test accuracy :', accuracy_score(y_test, y_test_hat3))

y_train_hat4 = normalClf.predict(X_train)
print('normal train accuracy :', accuracy_score(y_train, y_train_hat4))
y_test_hat4 = normalClf.predict(X_test)
print('normal test accuracy :', accuracy_score(y_test, y_test_hat4))

In [None]:
labels = ['stdscl', 'robust', 'minmax', 'normal']
x = np.arange(len(labels))
width = 0.35
trainHatList = [y_train_hat1, y_train_hat2, y_train_hat3, y_train_hat4]
testHatList = [y_test_hat1, y_test_hat2, y_test_hat3, y_test_hat4]

trainScores = []
testScores = []
for i in trainHatList:
  trainScores.append(accuracy_score(y_train,i))
for i in testHatList:
  testScores.append(accuracy_score(y_test,i))

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, trainScores, width, label='train')
rects2 = ax.bar(x + width/2, testScores, width, label='test')

ax.set_ylim(0.7,1.0)
ax.set_ylabel('Accuracy')
ax.set_title('Accuracy by Scaling Methods')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate(' ',
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)

fig.tight_layout()
plt.show()
    