In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn import neural_network
from sklearn import metrics

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
TIME = 'time'
OPEN = 'open'
HIGH = 'high'
LOW = 'low'
CLOSE = 'close'
VWAP = 'vwap'
VOLUME = 'volume'
COUNT = 'count'


In [4]:
# Open the dataset
df = pd.read_csv("ohlc-btc-eur-day.csv")

In [5]:
# Select columns
df_num = df[[OPEN, HIGH, LOW, CLOSE, VOLUME]]

In [6]:
# Scale data
scaler = preprocessing.StandardScaler()
df_norm = pd.DataFrame(scaler.fit_transform(df_num.values), columns=df_num.columns, index=df_num.index)
print((df_num[OPEN] - np.mean(df_num[OPEN])) / np.std(df_num[OPEN]))
print(df_norm)

0      0.964592
1      0.816782
2      0.866182
3      0.790373
4      0.992514
         ...   
715   -0.392402
716   -0.260456
717   -0.310198
718   -0.005791
719    0.041608
Name: open, Length: 720, dtype: float64
         open      high       low     close    volume
0    0.964592  0.974086  1.052493  1.041922  0.205720
1    0.816782  0.878937  0.935004  0.962462 -0.026795
2    0.866182  0.802733  0.877752  0.814761 -0.077718
3    0.790373  0.797997  0.874209  0.864125  0.177442
4    0.992514  0.935581  0.905391  0.788421 -0.157471
..        ...       ...       ...       ...       ...
715 -0.392402 -0.434673 -0.486658 -0.513376  1.120267
716 -0.260456 -0.331426 -0.360108 -0.393528  0.199677
717 -0.310198 -0.318022 -0.294252 -0.261680  0.337827
718 -0.005791 -0.084342 -0.282457 -0.311385  1.036503
719  0.041608 -0.014248  0.070161 -0.007252 -0.935718

[720 rows x 5 columns]


In [7]:
# Unscale data
a_close_price = 0.95
the_unsacled_close_prices = a_close_price * np.std(df_num[CLOSE]) + np.mean(df_num[OPEN])
print(the_unsacled_close_prices)
def unscale(value):
    return value * np.std(df_num[CLOSE]) + np.mean(df_num[OPEN])

8336.448457312077


In [8]:
# Define variables
delta = 2
X = df_norm.iloc[:-delta,:]
y = df_norm[CLOSE].iloc[delta:]

In [14]:
# Evaluate model

from scorer import percentage_error, percentage_error_score, aon_scorer

def absolute_error_score(y_true, y_pred):
    diff = np.abs(unscale(y_true) - unscale(y_pred)) * -1
    return np.average(diff, axis=0)

def absolute_error(estimator, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict(X_test)
    return absolute_error_score(y_test, y_pred)

model = neural_network.MLPRegressor(hidden_layer_sizes=(100,),
                                    activation='relu',
                                    solver='lbfgs',
                                    alpha=1
                                   )

scores = cross_validate(model, X, y,
                        scoring={
                            'percentage_error': percentage_error,
                            'absolute_error': absolute_error,
                            'aon_scorer': aon_scorer
                        })

print(np.mean(scores['test_percentage_error']))
print(np.mean(scores['test_absolute_error']))
print(np.mean(scores['test_aon_scorer']))
scores


-25.180431451195453
-200.8318704744841
0.42777777777777776


{'fit_time': array([0.69627905, 0.62980366, 0.7350564 , 0.57336998, 0.74087501]),
 'score_time': array([0.76591182, 0.80534387, 0.70194602, 0.40788078, 0.27413058]),
 'test_percentage_error': array([-34.86780719, -40.38475307, -14.70109253,  -4.87862618,
        -31.06987829]),
 'test_absolute_error': array([-377.39067357, -205.99680677, -208.55507397,  -89.86407943,
        -122.35271863]),
 'test_aon_scorer': array([0.41666667, 0.52777778, 0.22222222, 0.44444444, 0.52777778])}

In [38]:
# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y)
model.fit(X_train, y_train)

MLPRegressor(alpha=1, solver='lbfgs')

In [39]:
# Evaluate the model
y_pred = model.predict(X_test)
print(percentage_error_score(y_pred, y_test))
print(absolute_error_score(y_pred, y_test))

-22.196158788491402
-153.6115132212434


In [40]:
# Use model
print(unscale(model.predict([df_norm.iloc[0]]))) # predict the first raw

[8369.20548834]
