In [36]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, train_test_split
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
import matplotlib.pyplot as plt
from data_normalizer import unscale_data_with_minmax
from sklearn.metrics import make_scorer
import joblib

In [67]:
filtered_data = np.load('./macroeconomic_data/filtered_data.npy', allow_pickle=True).item()

data = []

for date, data_frame in filtered_data.items():
    flattened_data = pd.json_normalize(data_frame, sep=".")
    flattened_data["10"] = str(date[1])
    data.append(flattened_data)

data = pd.concat(data, ignore_index=True)

X = data
X = X.iloc[:-1, :]

y = data.filter(regex='0.36', axis=1)  # '^0\.'
y = y.iloc[1:]
y = y.values.ravel()

random_state = np.random.RandomState()
random_state_tuple = random_state.get_state()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

In [38]:
base_estimator = MLPRegressor()
parameters = {'hidden_layer_sizes': [(200, 200, 200, 200, 200), (200,200,200,200), (100, 200, 200, 100), (200,200,200)],
              "activation": ['logistic', 'tanh'],
              "solver": ['sgd'], "alpha": [0.001, 0.0001, 0.00005],
              "learning_rate": ['invscaling', 'adaptive'],
              'max_iter': [100000], #'early_stopping': [True],
              'tol': [1e-6], 'momentum' : [0.8, 0.85, 0.9, 0.95], 'nesterovs_momentum': [True, False]
              }

In [None]:

mse_scorer = make_scorer(mean_squared_error, greater_is_better=False)

clf = GridSearchCV(base_estimator, param_grid=parameters, cv=5, error_score='raise', n_jobs=-1, verbose=2, scoring=mse_scorer)
clf.fit(X_train, y_train)

best_estimator = clf.best_estimator_

joblib.dump(best_estimator, './joblib/best_estimator.joblib', 3)
joblib.dump(random_state_tuple, './joblib/split_random_state.joblib', 3)

In [None]:
print(clf.cv_results_)

In [21]:
try:
    best_estimator
except:
    # be sure that loaded files are not replaced by someone (potential risk)
    best_estimator = joblib.load('./joblib/best_estimator.joblib')
    random_state = np.random.RandomState()
    random_state.set_state(joblib.load('./joblib/split_random_state.joblib'))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)    

In [22]:
print(best_estimator)

In [30]:
y_pred = best_estimator.predict(X_test)

y_pred_scaled = unscale_data_with_minmax(np.array(y_pred))
y_test_scaled = unscale_data_with_minmax(np.array(y_test))

# Print the predicted and actual values side by side
print("Predicted\t\tActual")
for pred, actual in zip(y_pred_scaled, y_test_scaled):
    print(f"{pred}\t\t{actual}")

In [31]:
mse = mean_squared_error(y_test_scaled, y_pred_scaled)
r2 = r2_score(y_test_scaled, y_pred_scaled)
print('Mean Squared Error:', mse)
print('R-squared Score:', r2)

Mean Squared Error: 0.05295912330211579
R-squared Score: 0.560456037637686


In [None]:
plt.scatter(y_pred_scaled, y_test_scaled)
plt.plot([99.3, 101], [99.3, 101], c='r')
plt.xlabel('prediced infaltion')
plt.ylabel('actual inflation')
plt.title("Test data")
plt.show()

y_train_scaled = unscale_data_with_minmax(y_train)
y_train_pred = unscale_data_with_minmax(best_estimator.predict(X_train))

plt.plot([99.3, 101], [99.3, 101], c='r')
plt.scatter(y_train_pred, y_train_scaled)
plt.xlabel('prediced infaltion')
plt.ylabel('actual inflation')
plt.title("Train data")
plt.show()

plt.figure()
plt.plot(best_estimator.loss_curve_)
plt.xlabel("iteration_no")
plt.ylabel("loss")
plt.show()

In [105]:
filtered_data_recent = np.load('./macroeconomic_data/filtered_data_recent.npy', allow_pickle=True).item()
normalized_data_recent = np.load('./macroeconomic_data/normalized_data_recent.npy', allow_pickle=True).item()

data_recent = []

for date, data_frame in filtered_data_recent.items():
    flattened_data = pd.json_normalize(data_frame, sep=".")
    flattened_data["10"] = str(date[1])
    data_recent.append(flattened_data)

data_recent = pd.concat(data_recent, ignore_index=True)

X_recent = data_recent

y_recent = []
for date, data_frame in normalized_data_recent.items():
    if 0 in data_frame:
        if 36 in data_frame[0]:
            if data_frame[0][36] is not None:
                y_recent.append(data_frame[0][36])

if (len(y_recent) > len(X_recent)):
    y_recent = y_recent[1:len(X_recent) + 1]
else:
    X_recent = X_recent.iloc[:-1, :]

    y_recent = data_recent.filter(regex='0.36', axis=1)
    y_recent = y_recent.iloc[1:]
    y_recent = y_recent.values.ravel()


y_recent_pred = best_estimator.predict(X_recent)

y_recent_scaled = unscale_data_with_minmax(np.array(y_recent))
y_recent_pred_scaled = unscale_data_with_minmax(y_recent_pred)

In [None]:
plt.scatter(y_recent_pred_scaled, y_recent_scaled)
plt.plot([100.5, 102], [100.5, 102], c='r')
plt.xlabel('prediced infaltion')
plt.ylabel('actual inflation')
plt.title("Recent data")
plt.show()