In [35]:
from datasets import ForexPricePredictionDataset
import mplfinance as mpf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [36]:
dataset = ForexPricePredictionDataset("./data/GBPJPY_H1.csv", header=0, data_order="ohlc", input_duration=30, output_duration=1, normalize=False)

In [37]:
data = dataset._data

In [38]:
data["Time"] = pd.to_datetime(data["Time"])
data = data.set_index("Time")

In [39]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-04-23 08:00:00,237.300,237.420,237.150,237.180,6081
2007-04-23 09:00:00,237.180,237.410,237.160,237.310,7012
2007-04-23 10:00:00,237.320,237.380,237.235,237.320,3865
2007-04-23 11:00:00,237.310,237.400,237.220,237.400,3998
2007-04-23 12:00:00,237.390,237.410,236.920,237.175,5306
...,...,...,...,...,...
2023-04-28 07:00:00,170.926,171.059,170.809,170.946,10917
2023-04-28 08:00:00,170.947,171.083,170.849,170.974,6550
2023-04-28 09:00:00,170.974,171.152,170.954,171.034,8335
2023-04-28 10:00:00,171.031,171.061,170.900,171.059,7538


In [40]:
def savecandle(data, root, label, name):
    fig,ax = mpf.plot(data.head(30), type='candle', returnfig=True, scale_padding=0, style='charles')
    ax[0].set_axis_off()
    if label == 1:
        path = f"./{root}/up/"
    elif label == 2:
        path = f"./{root}/down/"
    else:
        path = f"./{root}/stationary/"
    fig.savefig(path + name + ".png", pad_inches=0)
    ax[0].cla()
    ax[1].cla()

In [41]:
def saveall(data, root):
    for i in range(31, len(data.index), 15):
        d = data[i-31:i]
        y = d.iloc[30]
        x = d.iloc[:30]

        if y["Close"] - x["Close"][-1] > 0.03:
            label = 1
        elif y["Close"] - x["Close"][-1] < -0.03:
            label = 2
        else:
            label = 0
        savecandle(x, root, label, str(x.iloc[0].name))
        plt.figure().clear()
        plt.close('all')
        plt.cla()
        plt.clf()


In [42]:
train, test = train_test_split(data, test_size=0.2, random_state=42, shuffle=False)

In [47]:
diff = 0.05

In [48]:
train[(train["Open"].diff() > -diff) & (train["Open"].diff() < diff)]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-04-23 11:00:00,237.310,237.400,237.220,237.400,3998
2007-04-23 14:00:00,237.140,237.340,236.840,237.030,7825
2007-04-23 22:00:00,236.600,236.940,236.420,236.920,18033
2007-04-24 08:00:00,237.320,237.370,237.250,237.250,7880
2007-04-24 12:00:00,237.370,237.470,237.100,237.295,4694
...,...,...,...,...,...
2020-02-14 04:00:00,143.118,143.155,142.775,142.871,13617
2020-02-14 06:00:00,142.885,142.943,142.762,142.856,9514
2020-02-14 07:00:00,142.856,143.120,142.817,143.106,7089
2020-02-14 09:00:00,143.092,143.168,143.059,143.070,3298


In [49]:
train[train["Open"].diff() > diff]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-04-23 10:00:00,237.320,237.380,237.235,237.320,3865
2007-04-23 12:00:00,237.390,237.410,236.920,237.175,5306
2007-04-23 19:00:00,236.635,236.990,236.580,236.780,8902
2007-04-23 20:00:00,236.790,236.800,236.530,236.620,11710
2007-04-23 23:00:00,236.920,236.930,236.520,236.825,14762
...,...,...,...,...,...
2020-02-13 07:00:00,143.341,143.475,143.224,143.300,9569
2020-02-13 16:00:00,143.313,143.320,143.189,143.264,7412
2020-02-13 18:00:00,143.350,143.399,143.224,143.246,4409
2020-02-13 23:00:00,143.349,143.403,143.094,143.183,7966


In [50]:
train[train["Open"].diff() < -diff]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-04-23 09:00:00,237.180,237.410,237.160,237.310,7012
2007-04-23 13:00:00,237.185,237.345,237.079,237.140,3889
2007-04-23 15:00:00,237.020,237.160,236.710,236.800,10494
2007-04-23 16:00:00,236.790,236.840,236.405,236.415,16736
2007-04-23 17:00:00,236.410,236.450,235.970,236.285,19313
...,...,...,...,...,...
2020-02-13 19:00:00,143.245,143.257,143.182,143.184,2456
2020-02-13 20:00:00,143.184,143.251,143.166,143.229,3720
2020-02-14 00:00:00,143.180,143.205,142.964,143.119,7667
2020-02-14 01:00:00,143.121,143.188,142.998,143.099,6444


In [51]:
saveall(train, "images/train")
saveall(test, "images/test")

<Figure size 640x480 with 0 Axes>