### 1.分类
1. from sklearn.model_selection import train_test_split  
2. X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)

### 2.KNN算法
1. from sklearn.neighbors import KNeighborsClassifier
2. kNN_classifier = KNeighborsClassifier(n_neighbors=6)
3. kNN_classifier.fit(X_train, y_train)
    * y_predict = kNN_classifier.predict(X_test)   预测值
    * knn_clf.score(X_test, y_test)   准确度

### 3.Grid Search
1.
```
param_grid = [
    {
        'weights': ['uniform'], 
        'n_neighbors': [i for i in range(1, 11)]
    },
    {
        'weights': ['distance'],
        'n_neighbors': [i for i in range(1, 11)], 
        'p': [i for i in range(1, 6)]
    }
]
```
2. knn_clf = KNeighborsClassifier()
3. from sklearn.model_selection import GridSearchCV
4. grid_search = GridSearchCV(knn_clf, param_grid)
5. grid_search.fit(X_train, y_train)
    * grid_search.best_estimator_
    * grid_search.best_score_
    * grid_search.best_params_

### 4.归一化(特征缩放)
1. from sklearn.preprocessing import StandardScaler
2. standardScalar = StandardScaler() 
3. standardScalar.fit(X_train)
    * standardScalar.mean_   (均值)
    * standardScalar.scale_   (标准差)
4. X_train_standard = standardScalar.transform(X_train)   (对X_train进行归一化)
5. 以KNN举例 knn_clf.fit(X_train_standard, y_train)
6. knn_clf.score(X_test_standard, y_test)

### 5.数据集导入(以鸢尾花数据集为例)
1. from sklearn import datasets
2. iris = datasets.load_iris()
    * iris.keys()   # 输出：dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])
    * print(iris.DESCR)
    * iris.feature_names
3. X = iris.data
4. y = iris.target

### 6.MSE(均方误差)和MAE(平均绝对误差)和MRE(均方根误差)和R²
from sklearn.metrics import mean_squared_error  
from sklearn.metrics import mean_absolute_error  
from sklearn.metrics import root_mean_squared_error  
from sklearn.metrics import r2_score  
* r2_score(y_test, y_predict)

### 7.线性回归(多元，简单，梯度下降)
1. from sklearn.linear_model import LinearRegression
2. lin_reg = LinearRegression()
3. lin_reg.fit(X_train, y_train)
    * lin_reg.coef_
    * lin_reg.intercept_
    * lin_reg.score(X_test, y_test)   # R²值

### 8.SGD(随机梯度下降)
只能解决线性模型
1. from sklearn.linear_model import SGDRegressor
2. sgd_reg = SGDRegressor(n_iter=50)
3. sgd_reg.fit(X_train_standard, y_train)
    * sgd_reg.score(X_test_standard, y_test)

### 9.PCA(梯度上升，特征压缩)
1. from sklearn.decomposition import PCA
2. pca = PCA(n_components=1)   # n_components表示特征维度
    * pca = PCA(0.95)  # 表示不知道要取几个维度，但是取的主成分个数能解释原数据95%的方差
3. pca.fit(X)
    * pca.components_
    * pca.n_components_  # 通过pca = PCA(0.95)得出的主成分个数
4. X_reduction = pca.transform(X)
5. X_restore = pca.inverse_transform(X_reduction)
#### 与其他数据预测算法结合(以KNN算法为例)
    1. X_train_reduction = pca.transform(X_train)
    2. X_test_reduction = pca.transform(X_test
    3. knn_clf = KNeighborsClassifier()
    4. knn_clf.fit(X_train_reduction, y_train)
        * knn_clf.score(X_test_reduction, y_test)

### 10.多项式回归(PCA和多项式回归都是对现有X数据进行变形)
1. from sklearn.preprocessing import PolynomialFeatures
2. poly = PolynomialFeatures(degree=n)   # 为原本的数据集最多添加n次幂这样的特征,sklearn中鬼自动添加零次幂
3. poly.fit(X)
4. X2 = poly.transform(X)   # 把X转换成多项式X2
#### 与线性回归算法结合(多项式回归只是预处理过程(sklearn.preprocessing)，真正拟合还是得用线性回归算法)
    1. from sklearn.linear_model import LinearRegression
    2. lin_reg2 = LinearRegression()
    3. lin_reg2.fit(X2, y)
        * y_predict2 = lin_reg2.predict(X2)
        * lin_reg2.score(X2, y)

### 11.PipeLine(管道)
```
1. 多项式的特征(或者其他预处理算法)
2. 数据的归一化(或者其他预处理算法)(对于多项式回归，数据标准化是必要的，因为如果超参数degree很大的话，数据之间的差距会很大，比如1的一次方和100的100次方之间的差距)
3. 线性回归(或者其他拟合算法)
...
PipeLine将三步合在一起
```
1. from sklearn.pipeline import Pipeline
2. from sklearn.preprocessing import StandardScaler
3. from sklearn.linear_model import LinearRegression
4.  
```
poly_reg = Pipeline([
    ("poly", PolynomialFeatures(degree=2)),
    ("std_scaler", StandardScaler()),
    ("lin_reg", LinearRegression())
])
```
5. poly_reg.fit(X, y)
    * y_predict = poly_reg.predict(X)