In [1]:
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
X, y = make_blobs(n_samples=200, centers=2, cluster_std=5)
X_train, X_test, y_train, y_test=train_test_split(X,y,random_state=38)
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
print(X_train_scaled.shape, X_test_scaled.shape)

(150, 2) (50, 2)


In [16]:
from sklearn.model_selection import GridSearchCV
params = {'hidden_layer_sizes':[(50,),(100,),(100,100)],
         'alpha':[0.0001, 0.001, 0.01, 0.1]}
grid = GridSearchCV(MLPClassifier(max_iter=1600,
                                  random_state=38), param_grid=params, cv=3)
grid.fit(X_train_scaled, y_train)
print('模型最佳得分：{:.2f}'.format(grid.best_score_))
print('模型最佳参数：{}'.format(grid.best_params_))

模型最佳得分：0.83
模型最佳参数：{'alpha': 0.0001, 'hidden_layer_sizes': (100,)}


In [17]:
print('测试集得分：{}'.format(grid.score(X_test_scaled, y_test)))

测试集得分：0.86


In [18]:
from sklearn.pipeline import Pipeline
pipeline = Pipeline([('scaler',StandardScaler()),
                     ('mlp',MLPClassifier(max_iter=1600,random_state=38))])
pipeline.fit(X_train, y_train)
print('使用管道模型的MLP模型评分：{:.2f}'.format(
    pipeline.score(X_test,y_test)))

使用管道模型的MLP模型评分：0.86


In [21]:
params = {'mlp__hidden_layer_sizes':[(50,),(100,),(100,100)],
         'mlp__alpha':[0.0001, 0.001, 0.01, 0.1]}
grid = GridSearchCV(pipeline, param_grid=params, cv=3)
grid.fit(X_train, y_train)
print('交叉验证最高分:{:.2f}'.format(grid.best_score_))
print('模型最优参数：{}'.format(grid.best_params_))
print('测试集得分：{}'.format(grid.score(X_test,y_test)))

交叉验证最高分:0.84
模型最优参数：{'mlp__alpha': 0.1, 'mlp__hidden_layer_sizes': (100,)}
测试集得分：0.86


In [23]:
print(pipeline.steps)

[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('mlp', MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1600, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=38, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False))]


In [26]:
import pandas as pd
stocks = pd.read_csv('d:/stock dataset/stock dataset10-20.csv', 
                     encoding = 'gbk')
X = stocks.loc[:,'现价':'流通股(亿)'].values
y = stocks['涨幅%%']
print (X.shape, y.shape)

(3421, 23) (3421,)


In [32]:
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPRegressor
scores = cross_val_score(MLPRegressor(random_state=38),X,y,cv=3)
print ('模型平均分：{:.2f}'.format(scores.mean()))

模型平均分：-20060191.16


In [34]:
from sklearn.pipeline import make_pipeline
pipeline = Pipeline([('scaler',StandardScaler()), 
                     ('mlp',MLPRegressor(random_state=38))])
pipe = make_pipeline(StandardScaler(), MLPRegressor(random_state=38))
print(pipeline.steps)
print('\n',pipe.steps)

[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('mlp', MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=38, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False))]

 [('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('mlpregressor', MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=38, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1,

In [36]:
scores = cross_val_score(pipe,X,y,cv=3)
print('模型平均分：{:.2f}'.format(scores.mean()))

模型平均分：0.71


In [39]:
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestRegressor
pipe = make_pipeline(StandardScaler(),
                 SelectFromModel(RandomForestRegressor(random_state=38)),
                MLPRegressor(random_state=38))
pipe.steps

[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)),
 ('selectfrommodel',
  SelectFromModel(estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
             max_features='auto', max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
             oob_score=False, random_state=38, verbose=0, warm_start=False),
          norm_order=1, prefit=False, threshold=None)),
 ('mlpregressor',
  MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
         beta_2=0.999, early_stopping=False, epsilon=1e-08,
         hidden_layer_sizes=(100,), learning_rate='constant',
         learning_rate_init=0.001, max_iter=200, momentum=0.9,
         nesterovs_momentum=True, power_t=0.5, random_state=38, shuffle=True,
         solver='adam', tol=0.0001, validation_fraction=0.1, ver

In [41]:
scores = cross_val_score(pipe, X,y, cv=3)
print('管道模型平均分：{:.2f}'.format(scores.mean()))

管道模型平均分：0.81


In [44]:
pipe.fit(X,y)
mask = pipe.named_steps['selectfrommodel'].get_support()
print(mask)

[False  True False  True False False False False False False False False
 False  True False  True False False False False False False False]


In [47]:
params = [{'reg':[MLPRegressor(random_state=38)],
           'scaler':[StandardScaler(),None]},
         {'reg':[RandomForestRegressor(random_state=38)],
         'scaler':[None]}]
#下面对pipeline进行实例化
pipe = Pipeline([('scaler',StandardScaler()),('reg',MLPRegressor())])
grid = GridSearchCV(pipe, params, cv=3)
grid.fit(X,y)

print('最佳模型是：\n{}'.format(grid.best_params_))
print('\n模型最佳得分是:{:.2f}'.format(grid.best_score_))

最佳模型是：
{'reg': RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=38, verbose=0, warm_start=False), 'scaler': None}

模型最佳得分是:0.80


In [51]:
params = [{'reg':[MLPRegressor(random_state=38)],
          'scaler':[StandardScaler(),None],
          'reg__hidden_layer_sizes':[(50,),(100,),(100,100)]},
         {'reg':[RandomForestRegressor(random_state=38)],
         'scaler':[None],
         'reg__n_estimators':[10,50,100]}]
pipe = Pipeline([('scaler',StandardScaler()),('reg',MLPRegressor())])
grid = GridSearchCV(pipe, params, cv=3)
grid.fit(X,y)

print('最佳模型是：\n{}'.format(grid.best_params_))
print('\n模型最佳得分是:{:.2f}'.format(grid.best_score_))



最佳模型是：
{'reg': MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=38, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False), 'reg__hidden_layer_sizes': (100, 100), 'scaler': StandardScaler(copy=True, with_mean=True, with_std=True)}

模型最佳得分是:0.88


In [54]:
params = [{'reg':[MLPRegressor(random_state=38, max_iter=1000)],
          'scaler':[StandardScaler(),None],
          'reg__hidden_layer_sizes':[(50,),(100,),(100,100)]},
         {'reg':[RandomForestRegressor(random_state=38)],
         'scaler':[None],
         'reg__n_estimators':[100,500,1000]}]
pipe = Pipeline([('scaler',StandardScaler()),('reg',MLPRegressor())])
grid = GridSearchCV(pipe, params, cv=3)
grid.fit(X,y)

print('最佳模型是：\n{}'.format(grid.best_params_))
print('\n模型最佳得分是:{:.2f}'.format(grid.best_score_))

最佳模型是：
{'reg': MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=38, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False), 'reg__hidden_layer_sizes': (100, 100), 'scaler': StandardScaler(copy=True, with_mean=True, with_std=True)}

模型最佳得分是:0.88
