In [2]:
import numpy as np
import pandas as pd
import sklearn
import pickle
import matplotlib.pyplot as plt

from sklearn.linear_model import SGDRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer 

# Function for score by RMSE
def rmse(predict, actual):
    predict = np.array(predict)
    actual = np.array(actual)

    distance = predict - actual

    square_distance = distance ** 2

    mean_square_distance = square_distance.mean()

    score = np.sqrt(mean_square_distance)

    return score

rmse_score = make_scorer(rmse, greater_is_better = False)

In [3]:
# read data files
data = pd.read_csv('i.csv', header = None, float_precision='high', sep = ';', dtype = np.float64)
target = pd.read_csv('o.csv', header = None, float_precision='high', sep = ';', dtype = np.float64)

In [15]:
# set some random
np.random.seed(76213)

# normalize features
scaler = StandardScaler()
scaler.fit(data) 
scaled_df = scaler.transform(data)

# make train data
X_train, X_test, y_train, y_test = train_test_split(scaled_df, target, test_size=0.33, random_state=42)
y_train = np.ravel(y_train)


In [None]:
# Create mpl regressor cross-validation
mlp_reg = MLPRegressor(hidden_layer_sizes=(150,100,50),
                       max_iter = 300,activation = 'relu',
                       solver = 'adam')

param_grid = {
    'hidden_layer_sizes': [(24), (24,24), (24,12,12) ,(24,24,24)],
    'max_iter': [7000, 10000, 15000, 20000],
    'activation': ['tanh', 'relu', ''],
    'solver': ['lbfgs', 'adam', 'sgd'],
    'alpha': [0.0001, 0.05, 0.005],
    'learning_rate': ['adaptive'],
}

grid = GridSearchCV(mlp_reg, param_grid, n_jobs= -1, cv=3, scoring=rmse_score, refit=True, verbose=5)
grid.fit(X_train, y_train)

In [1]:
# save the model to disk
pickle.dump(mlp_reg, open('MlpBestModel.sav', 'wb'))

print("Лучшие параметры: {}".format(grid.best_params_)) 
print("Лучшая оценка RMSE: {}".format(grid.best_score_))

df_temp2 = pd.DataFrame({'Actual': np.ravel(y_test), 'Predicted': np.ravel(grid.predict(X_test))})
df_temp2.head()

df_temp2 = df_temp2.head(30)
df_temp2.plot(kind='bar',figsize=(10,6))
plt.grid(which='major', linestyle='-', linewidth='0.2', color='green')
plt.grid(which='minor', linestyle=':', linewidth='0.2', color='black')
plt.show()

NameError: name 'pickle' is not defined

In [None]:

# SGD regressor (best: 0.00138)
SGDReg = SGDRegressor(tol=0.0001, learning_rate='adaptive')
SGDReg.fit(X_train, y_train)


print("Train SGDReg RMSE: {}".format((mean_squared_error(y_train, SGDReg.predict(X_train),squared=False))))
print("Test SGDReg RMSE: {}".format((mean_squared_error(y_test, SGDReg.predict(X_test),squared=False))))

In [None]:


# # Define base regressor:
# base_reg = MLPRegressor(learning_rate='adaptive', max_iter=7000, random_state=42)

# # Define search space:
# params = {
#     'activation': ['logistic', 'relu', 'tanh'],  # <-- added 'tanh' as third non-linear activation function
#     'alpha': np.logspace(0.0001, 100, 10)
#     # 'hidden_layer_sizes': [
#     #     (10, 10), (20, 10), (30, 10)
#     #     # (40, 10), (90, 10), (90, 30, 10)  # <-- added more neurons or layers
#     # ]
# }

# # Find best hyper params and then refit on all training data:
# reg = GridSearchCV(estimator=base_reg, param_grid=params,
#                    n_jobs=4, cv=3, refit=True, verbose=5,
#                    scoring=mean_squared_error)  # <-- verbose=5
# reg.fit(X_train, y_train)

# print(reg.best_estimator_)

# print(reg.best_params_)

In [None]:



# # MLPRegressor
# mlpReg = MLPRegressor(alpha=1.0002302850208247, learning_rate='adaptive', max_iter=7000,
#              random_state=42,activation='relu')

# # MLPRegressor(activation='relu',            # ‘identity’, ‘logistic’, ‘tanh’, ‘relu’
# #                         solver='adam',             #‘lbfgs’, ‘sgd’, ‘adam’   
# #                         batch_size='auto',          
# #                         learning_rate='adaptive',   
# #                         max_iter=10000,
# #                         random_state=42,
# #                         tol=0.0001)

# mlpReg.fit(X_train, y_train)

# train_mse_MLPRegressor = mean_squared_error(y_train, mlpReg.predict(X_train),squared=False)
# test_mse_MLPRegressor = mean_squared_error(y_test, mlpReg.predict(X_test),squared=False)

# print("Train MLPRegressor RMSE: {}".format((train_mse_MLPRegressor)))
# print("Test MLPRegressor RMSE: {}".format((test_mse_MLPRegressor)))

In [None]:


# params = {
#     "activation" : ['identity', 'logistic', 'tanh', 'relu'],
#     "solver" : ['adam', 'lbfgs', 'sgd'],
#     "learning_rate" : ['adaptive', 'invscaling']
# }

# # Find best hyper params and then refit on all training data:
# reg = GridSearchCV(estimator=mlpReg, 
#                     param_grid=params,
#                     # cv=3, 
#                     refit=True, 
#                     verbose=5,
#                     n_jobs=4,
#                     scoring='neg_mean_squared_error')

# # Поиск оптимальных параметров
# reg.fit(X_train, y_train)


In [None]:

# print("Лучшие параметры: {}".format(reg.best_params_)) 
# print("Лучшая оценка RMSE: {}".format(reg.best_score_))


In [None]:
# # MLPRegressor
# optimalReg = MLPRegressor(activation='relu',            # ‘identity’, ‘logistic’, ‘tanh’, ‘relu’
#                         solver='adam',                   #‘lbfgs’, ‘sgd’, ‘adam’   
#                         learning_rate='adaptive',   
#                         max_iter=7000,
#                         random_state=42)

# optimalReg.fit(X_train, y_train)

# trainRmseOptimalReg = mean_squared_error(y_train, optimalReg.predict(X_train),squared=False)
# testRmseOptimalReg = mean_squared_error(y_test, optimalReg.predict(X_test),squared=False)

# print("optimalReg model train data RMSE: {}".format((trainRmseOptimalReg)))
# print("optimalReg model test data: {}".format((testRmseOptimalReg)))

In [None]:




# sorted(list(zip(data.columns, model.coef_)), 
#        key=lambda x: abs(x[1]))

In [None]:

# save the model to disk
# filenameModel = 'finalized_model.sav'
# pickle.dump(optimalReg, open(filenameModel, 'wb'))
 
# fiilenameParams = 'modelParams.sav'
# pickle.dump(reg.best_params_, open(fiilenameParams, 'wb'))
# some time later...
 
# load the model from disk
# loaded_model = pickle.load(open(filename, 'r'))
# result = loaded_model.score(X_test, y_test)
# print(result)

In [None]:
# loaded_model = pickle.load(open(filenameModel, 'rb'))

# trainRmseLoadadModel = mean_squared_error(y_train, loaded_model.predict(X_train),squared=False)
# testRmseLodedModel = mean_squared_error(y_test, loaded_model.predict(X_test),squared=False)

# print("Loadaed model train data RMSE: {}".format((trainRmseLoadadModel)))
# print("Loadaed model test data: {}".format((testRmseLodedModel)))

