In [44]:
import numpy as np
import pandas as pd
import sklearn

In [45]:
# read data files
data = pd.read_csv('i.csv', header = None, float_precision='high', sep = ';', dtype = np.float64)
target = pd.read_csv('o.csv', header = None, float_precision='high', sep = ';', dtype = np.float64)


In [46]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# set some random
np.random.seed(76213)

# normalize features
scaler = StandardScaler()
scaler.fit(data) 
scaled_df = scaler.transform(data)

# make train data
X_train, X_test, y_train, y_test = train_test_split(scaled_df, target, test_size=0.33, random_state=42)
y_train = np.ravel(y_train)


In [47]:
from sklearn.linear_model import SGDRegressor

# SGD regressor (best: 0.00138)
# model = SGDRegressor(tol=0.0001, learning_rate='adaptive')
# model.fit(X_train, y_train)


In [48]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error


# MLPRegressor
mlpReg = MLPRegressor(activation='relu',            # ‘identity’, ‘logistic’, ‘tanh’, ‘relu’
                        solver='adam',             #‘lbfgs’, ‘sgd’, ‘adam’   
                        batch_size='auto',          
                        learning_rate='adaptive',   
                        max_iter=7000,
                        random_state=42,
                        tol=0.001)

mlpReg.fit(X_train, y_train)

train_mse_MLPRegressor = mean_squared_error(y_train, mlpReg.predict(X_train),squared=False)
test_mse_MLPRegressor = mean_squared_error(y_test, mlpReg.predict(X_test),squared=False)

print("Train MLPRegressor RMSE: {}".format((train_mse_MLPRegressor)))
print("Test MLPRegressor RMSE: {}".format((test_mse_MLPRegressor)))

Train SGDRegressor RMSE: 0.0014881220918612783
Test SGDRegressor RMSE: 0.001489141672638759


In [49]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer 

params = {
    "activation" : ['identity', 'logistic', 'tanh', 'relu'],
    "solver" : ['adam', 'lbfgs', 'sgd'],
    "learning_rate" : ['adaptive', 'invscaling']
}

# Find best hyper params and then refit on all training data:
reg = GridSearchCV(estimator=mlpReg, 
                    param_grid=params,
                    # cv=3, 
                    refit=True, 
                    verbose=5,
                    n_jobs=4,
                    scoring='neg_mean_squared_error')

# Поиск оптимальных параметров
reg.fit(X_train, y_train)


Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV 1/5] END activation=identity, learning_rate=adaptive, solver=adam;, score=-0.000 total time=  25.3s
[CV 4/5] END activation=identity, learning_rate=adaptive, solver=adam;, score=-0.000 total time=  25.3s
[CV 3/5] END activation=identity, learning_rate=adaptive, solver=adam;, score=-0.000 total time=  25.4s
[CV 2/5] END activation=identity, learning_rate=adaptive, solver=adam;, score=-0.000 total time=  25.4s
[CV 2/5] END activation=identity, learning_rate=adaptive, solver=lbfgs;, score=-0.000 total time=  21.7s
[CV 3/5] END activation=identity, learning_rate=adaptive, solver=lbfgs;, score=-0.001 total time=  23.6s
[CV 1/5] END activation=identity, learning_rate=adaptive, solver=lbfgs;, score=-0.001 total time=  23.8s
[CV 5/5] END activation=identity, learning_rate=adaptive, solver=adam;, score=-0.000 total time=  32.2s
[CV 4/5] END activation=identity, learning_rate=adaptive, solver=lbfgs;, score=-0.001 total time=  16.2

In [None]:

print("Лучшие параметры: {}".format(reg.best_params_)) 
print("Лучшая оценка RMSE: {}".format(reg.best_score_))


In [None]:
# sorted(list(zip(data.columns, model.coef_)), 
#        key=lambda x: abs(x[1]))

In [None]:
import pickle

# save the model to disk
filenameModel = 'finalized_model.sav'
pickle.dump(mlpReg, open(filenameModel, 'w'))
 
fiilenameParams = 'modelParams.sav'
pickle.dump(reg.best_params_, open(fiilenameParams, 'w'))
# some time later...
 
# load the model from disk
# loaded_model = pickle.load(open(filename, 'r'))
# result = loaded_model.score(X_test, y_test)
# print(result)