In [12]:
import numpy as np

class StandardScaler:
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    def fit(self, X):
        """根据训练数据集X获得数据的均值和方差，只处理二维数据"""
        assert X.ndim == 2, "The dimension of X must be 2"
        self.mean_ = np.array([np.mean(X[:, i]) for i in range(X.shape[1])])
        self.scale_ = np.array([np.std(X[:, i]) for i in range(X.shape[1])])

        return self

    def transform(self, X):
        """将X根据这个StandardScaler进行均值方差归一化处理"""
        assert X.ndim == 2, "The dimension of X must be 2"
        """而且fit必须在transform之前执行，所以mean_和scale_必须是非空的"""
        assert self.mean_ is not None and self.scale_ is not None, \
            "must fit before transform!"
        assert X.shape[1] == len(self.mean_), \
            "the feature nunmber of X must be equal to mean_ and std_"
        resX = np.empty(shape=X.shape, dtype=float)
        for col in range(X.shape[1]):
            resX[:, col] = (X[:, col] - self.mean_[col]) / self.scale_[col]
        return resX


In [13]:
import numpy as np
from sklearn import datasets

In [14]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [15]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

In [16]:
X_train

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [5.7, 2.8, 4.1, 1.3],
       [6.2, 3.4, 5.4, 2.3],
       [5.1, 2.5, 3. , 1.1],
       [7. , 3.2, 4.7, 1.4],
       [6.1, 2.6, 5.6, 1.4],
       [7.6, 3. , 6.6, 2.1],
       [5.2, 4.1, 1.5, 0.1],
       [6.2, 2.2, 4.5, 1.5],
       [7.3, 2.9, 6.3, 1.8],
       [6.4, 3.2, 5.3, 2.3],
       [6. , 3.4, 4.5, 1.6],
       [5.2, 2.7, 3.9, 1.4],
       [5.4, 3.7, 1.5, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3.5, 1.6, 0.6],
       [4.4, 2.9, 1.4, 0.2],
       [5.8, 2.7, 3.9, 1.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.6, 3.4, 1.4, 0.3],
       [6.5, 3.2, 5.1, 2. ],
       [5.7, 2.9, 4.2, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6. , 2.9, 4.5, 1.5],
       [4.7, 3.2, 1.6, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [6.7, 3.1, 5.6, 2.4],
       [6.3, 2.7, 4.9, 1.8],
       [6.1, 2.8, 4.7, 1.2],
       [6.2, 2.8, 4.8, 1.8],
       [5.7, 4.4, 1.5, 0.4],
       [6.3, 2.5, 4.9, 1.5],
       [6.4, 2.9, 4.3, 1.3],
       [5.1, 3

In [17]:
%run D:\\python-code\preprocessing.py

In [18]:
myScaler = StandardScaler()
X_train

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [5.7, 2.8, 4.1, 1.3],
       [6.2, 3.4, 5.4, 2.3],
       [5.1, 2.5, 3. , 1.1],
       [7. , 3.2, 4.7, 1.4],
       [6.1, 2.6, 5.6, 1.4],
       [7.6, 3. , 6.6, 2.1],
       [5.2, 4.1, 1.5, 0.1],
       [6.2, 2.2, 4.5, 1.5],
       [7.3, 2.9, 6.3, 1.8],
       [6.4, 3.2, 5.3, 2.3],
       [6. , 3.4, 4.5, 1.6],
       [5.2, 2.7, 3.9, 1.4],
       [5.4, 3.7, 1.5, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3.5, 1.6, 0.6],
       [4.4, 2.9, 1.4, 0.2],
       [5.8, 2.7, 3.9, 1.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.6, 3.4, 1.4, 0.3],
       [6.5, 3.2, 5.1, 2. ],
       [5.7, 2.9, 4.2, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6. , 2.9, 4.5, 1.5],
       [4.7, 3.2, 1.6, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [6.7, 3.1, 5.6, 2.4],
       [6.3, 2.7, 4.9, 1.8],
       [6.1, 2.8, 4.7, 1.2],
       [6.2, 2.8, 4.8, 1.8],
       [5.7, 4.4, 1.5, 0.4],
       [6.3, 2.5, 4.9, 1.5],
       [6.4, 2.9, 4.3, 1.3],
       [5.1, 3

In [20]:
X_train.ndim

2

In [19]:
myScaler.fit(X_train)

<__main__.StandardScaler at 0x1ea6c468ba8>

In [21]:
X_train = myScaler.transform(X_train)
X_train

array([[-0.90616043,  0.94720873, -1.30982967, -1.28485856],
       [-1.15301457, -0.18717298, -1.30982967, -1.28485856],
       [-0.16559799, -0.64092567,  0.22169257,  0.17345038],
       [ 0.45153738,  0.72033239,  0.95909217,  1.49918578],
       [-0.90616043, -1.3215547 , -0.40226093, -0.0916967 ],
       [ 1.43895396,  0.2665797 ,  0.56203085,  0.30602392],
       [ 0.3281103 , -1.09467835,  1.07253826,  0.30602392],
       [ 2.1795164 , -0.18717298,  1.63976872,  1.2340387 ],
       [-0.78273335,  2.30846679, -1.25310662, -1.4174321 ],
       [ 0.45153738, -2.00218372,  0.44858475,  0.43859746],
       [ 1.80923518, -0.41404933,  1.46959958,  0.83631808],
       [ 0.69839152,  0.2665797 ,  0.90236912,  1.49918578],
       [ 0.20468323,  0.72033239,  0.44858475,  0.571171  ],
       [-0.78273335, -0.86780201,  0.10824648,  0.30602392],
       [-0.53587921,  1.40096142, -1.25310662, -1.28485856],
       [-0.65930628,  1.40096142, -1.25310662, -1.28485856],
       [-1.0295875 ,  0.

In [22]:
X_test_scaler = myScaler.transform(X_test)
X_test_scaler

array([[-0.28902506, -0.18717298,  0.44858475,  0.43859746],
       [-0.04217092, -0.64092567,  0.78892303,  1.63175932],
       [-1.0295875 , -1.77530738, -0.2320918 , -0.22427024],
       [-0.04217092, -0.86780201,  0.78892303,  0.96889162],
       [-1.52329579,  0.03970336, -1.25310662, -1.28485856],
       [-0.41245214, -1.3215547 ,  0.16496953,  0.17345038],
       [-0.16559799, -0.64092567,  0.44858475,  0.17345038],
       [ 0.82181859, -0.18717298,  0.84564608,  1.10146516],
       [ 0.57496445, -1.77530738,  0.39186171,  0.17345038],
       [-0.41245214, -1.09467835,  0.39186171,  0.04087684],
       [ 1.06867274,  0.03970336,  0.39186171,  0.30602392],
       [-1.64672287, -1.77530738, -1.36655271, -1.15228502],
       [-1.27644165,  0.03970336, -1.19638358, -1.28485856],
       [-0.53587921,  0.72033239, -1.25310662, -1.01971148],
       [ 1.68580811,  1.17408507,  1.35615349,  1.76433286],
       [-0.04217092, -0.86780201,  0.22169257, -0.22427024],
       [-1.52329579,  1.

In [23]:
from sklearn.neighbors import KNeighborsClassifier

In [25]:
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train, y_train)
knn_clf.score(X_test_scaler, y_test)

1.0