In [19]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as data
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

In [20]:
N_LOOK_BACK = 10 # how far the algorithm looks back into the past to try and predict the future price
N_LOOK_AHEAD = 20 # how far into the future the algorithm tries to predict price movements
TRADE_THRESHOLD = 0.002 # strength of the price movement to trigger a trade signal
LIMIT = 10000 # limit the number of data points for testing

In [21]:
eurusd = pd.read_csv("data_sample.csv")

In [22]:
eurusd["dt"] = pd.to_datetime(eurusd["dt"])
eurusd.set_index("dt", inplace=True)

In [None]:
trades = pd.read_csv("trades_sample.csv")
trades["dt"] = pd.to_datetime(trades["dt"])
trades.set_index("dt", inplace=True)

trades_u = trades.drop_duplicates("Order")

trades_u["SL_p"] = trades_u["SL"] / trades_u["Price"] - 1
trades_u["TP_p"] = trades_u["TP"] / trades_u["Price"] - 1

tp_sl = trades_u.groupby("Type").agg({"SL_p": "mean", "TP_p": "mean"})

In [None]:
eurusd.Close[:1000].plot(figsize=(20, 10))

In [24]:
rel_change = eurusd.Close.diff() / eurusd.Close.shift(1)
rel_change = rel_change[1:][:(len(eurusd) - len(eurusd) % N_LOOK_BACK)][:LIMIT]
N = len(rel_change)

bins = rel_change.quantile(np.arange(0,1.1, 0.1))

le = LabelEncoder()

y = np.digitize(rel_change, bins.values)[N_LOOK_BACK-1::N_LOOK_BACK] - 1
y_enc = le.fit_transform(y)

In [26]:
cutoff_idx = N - N % N_LOOK_BACK

In [29]:
N%N_LOOK_BACK

9

In [30]:
N

9999

In [35]:
X = rel_change[:(cutoff_idx)].values.reshape(int((cutoff_idx) / N_LOOK_BACK), N_LOOK_BACK)[:,:-1]
prices = eurusd.Close.values[1:(cutoff_idx)+1][:LIMIT].reshape(int((cutoff_idx) / N_LOOK_BACK), N_LOOK_BACK)[:,:-1]
dates = eurusd.index.values[1:(cutoff_idx)+1][:LIMIT].reshape(int((cutoff_idx) / N_LOOK_BACK), N_LOOK_BACK)[:,:-1]


TRAIN_FRAC = 0.7

N_train = int(len(X) * TRAIN_FRAC)

X_train = X[:N_train]
y_train = y_enc[:N_train]
X_test = X[N_train:]
y_test = y_enc[N_train:]

In [36]:
xgb_clf = xgb.XGBClassifier(objective='multi:softmax', 
                            num_class=len(np.unique(y_enc)), 
                            # missing=1, 
                            early_stopping_rounds=10, 
                            eval_metric=['merror','mlogloss'], 
                            seed=42)

In [None]:
xgb_clf.fit(X_train, 
            y_train,
            verbose=1,
            eval_set=[(X_train, y_train), (X_test, y_test)]
    )
# set to 1 to see xgb training round intermediate results

In [38]:
def predict_ar(prices, bins, model, X, n=10, beam_width=3):
    X_c = X.copy()
    
    y_pred = bins.iloc[model.predict(X)]

    groups = []

    exp_price = prices[0][-1]

    for i in range(1,n+1):
        X_c = np.append(X_c, y_pred)
        y_pred = bins.iloc[model.predict([X_c[i:]])]
        exp_price *= float(y_pred) + 1.0
        groups.append(y_pred)
    
    cum_change = sum([i.values[0] for i in groups])
    
    return X_c, groups, cum_change, exp_price

In [None]:
for i in range(0, X_test.shape[0]-1):
    pred, groups, cum_change, exp_price = predict_ar(prices[i:i+1], bins, xgb_clf, X_test[i:i+1], n=N_LOOK_AHEAD)
    curr_price = prices[i:i+1][0][-1]
    curr_date = dates[i:i+1][0][-1]
    if abs(cum_change) > TRADE_THRESHOLD:
        trade_type = "Sell" if np.sign(cum_change) else "Buy"
        sl_rel = tp_sl.loc[trade_type]["SL_p"]
        
        sl =  curr_price * (1+sl_rel)

        print(f"""
              expected {N_LOOK_AHEAD}min cum_change: {round(cum_change*100, 2)}%, 
              trade type: {trade_type}, 
              target price/TP: {exp_price}, 
              current pirce: {curr_price},
              current date: {curr_date}
              SL: {sl}""")

Old Approach with Neural Network

In [29]:
def prepare_data(stock_prices, n_look_back, train_frac=0.8):
    n = len(stock_prices)

    data = stock_prices.values[:(n - n % n_look_back)].reshape(int((n - n % n_look_back)/n_look_back), n_look_back, stock_prices.shape[1])

    print(data.shape)

    N_train = int(len(data) * train_frac)

    train_data = data[:N_train,:,:]
    test_data = data[N_train:,:,:]

    x_train = train_data[:,:-1,1]
    y_train = train_data[:,-1,1].reshape(-1,1)
    
    x_test = test_data[:,:-1,1]
    y_test = test_data[:,-1,1].reshape(-1,1)
    
    return x_train, y_train, x_test, y_test

In [30]:
n = len(eurusd.Close.values)

In [31]:
x_train, y_train, x_test, y_test = prepare_data(eurusd, N_LOOK_BACK)

x_train = torch.from_numpy(x_train).type(torch.Tensor)[:,:,None]
x_test = torch.from_numpy(x_test).type(torch.Tensor)[:,:,None]
y_train = torch.from_numpy(y_train).type(torch.Tensor)
y_test = torch.from_numpy(y_test).type(torch.Tensor)

(632453, 10, 4)


In [32]:
input_dim = 1
hidden_dim = 32
num_layers = 2 
output_dim = 1
BATCH_SIZE = 32

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.num_layers = num_layers

        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)

        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = x[:, -1, :]
        x = self.fc(x)

        return x

In [33]:
loader = data.DataLoader(data.TensorDataset(x_train, y_train), shuffle=True, batch_size=BATCH_SIZE) 
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
N_EPOCHS = 1

hist = np.zeros(N_EPOCHS)

for epoch in range(N_EPOCHS):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("Epoch ", epoch, "MSE: ", loss.item())
    hist[epoch] = loss.item()

In [177]:
def pred_ar(X, n_frames=10):
    inp = X[None,:,:]

    print(inp.shape)

    # for i in  range(n_frames):
    for i in range(n_frames):
        y_pred = model(inp[:,i:,:]).detach()[None]
    # print(inp[i:], y_pred)
        inp = torch.concat((inp, y_pred), dim=1)
        # series = np.append(series, y_pred.detach().numpy())

    return inp

In [191]:
ar_predictions = pred_ar(x_test[0], n_frames=100)

torch.Size([1, 9, 1])


In [None]:
plt.plot(ar_predictions[0,:,0])