In [1]:
from datahandler import *
from playground import *
from gui import *
from torchutils import *
from models import *
from torch.utils.data import WeightedRandomSampler
import matplotlib.pyplot as plt

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
seq_length = 15
agent_horizon = 10
crossover_horizon = 5

### Data Loading and Preprocessing

In [4]:
data = None
if not os.path.exists('./BTCUSDT_DB.csv'):
    data = pd.read_csv("BTCUSDT_1m.csv")
    data = get_dollar_bars(data)
    data.to_csv('./BTCUSDT_DB.csv', sep=',')
else:
    data = pd.read_csv('./BTCUSDT_DB.csv', sep=',')

handler = NewDataHandler(dataset=data)

ma1, ma2 = 5, 10
handler.data = ma_crossover_labelling(
    handler.data, ma1, ma2, crossover_horizon)
handler.data.rename({'Label': 'Crossover'}, axis=1, inplace=True)
dummies = pd.get_dummies(handler.data['Crossover'], prefix='Crossover') * 1
# handler.data.drop('Crossover', axis=1, inplace=True)
handler.data = pd.concat([handler.data, dummies], axis=1)
handler.data = triple_barrier_labelling(handler.data, time_limit=agent_horizon)

handler.add_indicators([Indicators.RSI, Indicators.MACD,
                       Indicators.ADX, Indicators.OBV, Indicators.TICK_DENSITY])

handler.create_var_indicator([Indicators.RSI, Indicators.MACD, Indicators.ADX, Indicators.OBV, Indicators.PERC_RET,
                              Indicators.TICK_DENSITY])

# display(handler.data[handler.data.isnull().any(axis=1)]) # Displaying all the rows that contain missing values to see if they are spread across the dataframe

handler.data.replace([np.inf, -np.inf], 0, inplace=True)

# handler.standardize_data()

handler.data.dropna(axis=0, inplace=True)

handler.create_predict_data()

predict_data = handler.predict_data
predict_data['Target'] = handler.data['Label']
predict_data[f'MA{ma1} Var'], predict_data[f'MA{ma2} Var'] = handler.data[f'MA{ma1} Var'], handler.data[f'MA{ma2} Var']
predict_data[dummies.columns] = handler.data[dummies.columns]
predict_data['Crossover'] = handler.data['Crossover']
predict_data[f'MA{ma1} UP'], predict_data[f'MA{ma2} UP'] = (
    handler.data[f'MA{ma1}'] > handler.data[f'MA{ma2}']) * 1, (handler.data[f'MA{ma1}'] < handler.data[f'MA{ma2}']) * 1
predict_data.reset_index(drop=True, inplace=True)

Could not add indicator OBV
Error message 'Volume'
Ignoring indicator OBV. Reason: Not found in the list of indicators


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  predict_data['Target'] = handler.data['Label']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  predict_data[f'MA{ma1} Var'], predict_data[f'MA{ma2} Var'] = handler.data[f'MA{ma1} Var'], handler.data[f'MA{ma2} Var']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  predict_data[f'MA{ma1} Var'], predict_

### Loading pretrained models

In [5]:
crossover_lstm_model = torch.load(
    f'./models/crossover_{seq_length}_to_{crossover_horizon}.pt')
lstm_model = torch.load(f'./models/agent_{seq_length}_to_{agent_horizon}.pt')


### Creating Torch Dataset

In [6]:
trade_columns = predict_data.drop(['Crossover'], axis=1).columns
crossover_columns = predict_data.drop(dummies.columns, axis=1).columns
ma_dataset = TSDataset(
    predict_data[crossover_columns], seq_length, 'Crossover')

### Replacing real crossover predictions by infered crossover predictions

In [7]:
predicted_crossover_outputs, predicted_crossover_targets = eval_lstm(crossover_lstm_model, ma_dataset, len(
    ma_dataset), crossover_lstm_model.num_layers, crossover_lstm_model.hidden_size, device)

predicted_crossover_outputs = pd.Series(
    (torch.argmax(predicted_crossover_outputs, axis=-1) - 1).cpu()).shift(seq_length)

crossover_prediction_dummies = pd.get_dummies(
    predicted_crossover_outputs, prefix='Crossover') * 1

transition_predict_data = predict_data.copy()
transition_predict_data[dummies.columns] = crossover_prediction_dummies
transition_predict_data.dropna(axis=0, inplace=True)

Accuracy : 0.5937994573818675 || Loss : 0.9271247982978821
Confusion matrix : 
[[7.231e+03 5.180e+02 1.000e+00]
 [7.375e+03 7.080e+03 5.425e+03]
 [1.500e+01 1.039e+03 6.700e+03]]


In [8]:
dataset = TSDataset(transition_predict_data[trade_columns], seq_length)

In [9]:
predict_data

Unnamed: 0,Unix,RSI Var,MACD Var,MACD_H Var,ADX14 Var,-DM Var,+DM Var,TICK_DENSITY Var,PERC_RET,Target,MA5 Var,MA10 Var,Crossover_-1,Crossover_0,Crossover_1,Crossover,MA5 UP,MA10 UP
0,1513428720000,0.048401,0.029123,-0.405401,0.015614,-0.081688,0.192699,0.140838,0.080168,0.0,0.039638,0.017862,0,1,0,0,1,0
1,1513501980000,-0.020026,0.005391,-0.265815,0.016594,-0.062304,-0.024363,0.123226,-0.008869,-1.0,0.035187,0.017930,1,0,0,-1,1,0
2,1513550040000,-0.033514,-0.015804,0.064192,-0.031174,0.580984,-0.112892,0.026073,-0.014388,0.0,0.028830,0.014729,1,0,0,-1,1,0
3,1513582380000,-0.034835,-0.032725,0.305927,-0.028909,-0.060343,-0.060343,-0.059421,-0.014598,0.0,0.007641,0.011799,1,0,0,-1,1,0
4,1513633980000,-0.159429,-0.083628,0.757608,-0.067444,0.359299,-0.112219,0.010391,-0.074901,0.0,-0.007263,0.008164,1,0,0,-1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35394,1685407200000,0.086249,-0.060071,-0.103070,-0.014485,-0.098990,0.057578,0.083067,0.004759,-1.0,0.000256,-0.001026,0,0,1,1,0,1
35395,1685420640000,0.009836,-0.052760,-0.110785,-0.013648,-0.057964,-0.057964,0.150442,0.000612,-1.0,0.000918,-0.000541,0,0,1,1,0,1
35396,1685435400000,0.094888,0.042184,-0.275985,0.009149,-0.086009,0.155933,0.251282,0.006472,-1.0,0.002406,0.000461,0,1,0,0,0,1
35397,1685443800000,-0.057433,-0.019390,-0.149725,0.008564,-0.059869,-0.058333,0.066598,-0.002737,-1.0,0.001621,-0.000033,1,0,0,-1,1,0


In [10]:
transition_predict_data

Unnamed: 0,Unix,RSI Var,MACD Var,MACD_H Var,ADX14 Var,-DM Var,+DM Var,TICK_DENSITY Var,PERC_RET,Target,MA5 Var,MA10 Var,Crossover_-1,Crossover_0,Crossover_1,Crossover,MA5 UP,MA10 UP
0,1513428720000,0.048401,0.029123,-0.405401,0.015614,-0.081688,0.192699,0.140838,0.080168,0.0,0.039638,0.017862,0.0,0.0,0.0,0,1,0
1,1513501980000,-0.020026,0.005391,-0.265815,0.016594,-0.062304,-0.024363,0.123226,-0.008869,-1.0,0.035187,0.017930,0.0,0.0,0.0,-1,1,0
2,1513550040000,-0.033514,-0.015804,0.064192,-0.031174,0.580984,-0.112892,0.026073,-0.014388,0.0,0.028830,0.014729,0.0,0.0,0.0,-1,1,0
3,1513582380000,-0.034835,-0.032725,0.305927,-0.028909,-0.060343,-0.060343,-0.059421,-0.014598,0.0,0.007641,0.011799,0.0,0.0,0.0,-1,1,0
4,1513633980000,-0.159429,-0.083628,0.757608,-0.067444,0.359299,-0.112219,0.010391,-0.074901,0.0,-0.007263,0.008164,0.0,0.0,0.0,-1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35379,1685309400000,0.085803,0.182759,0.281196,0.080652,-0.167365,0.322755,-0.076316,0.015833,-1.0,0.006766,0.004553,0.0,1.0,0.0,0,1,0
35380,1685311380000,0.003448,0.115898,0.081708,0.069302,-0.034564,-0.034564,-0.233618,0.000844,-1.0,0.006209,0.003724,0.0,1.0,0.0,0,1,0
35381,1685314680000,0.002886,0.074445,-0.013330,0.065175,-0.064475,0.025090,-0.260223,0.000675,-1.0,0.003024,0.003342,0.0,1.0,0.0,0,1,0
35382,1685318520000,0.029080,0.087796,0.039732,0.063541,-0.099579,0.037882,-0.148241,0.007278,-1.0,0.005360,0.004051,0.0,1.0,0.0,-1,1,0


In [11]:
argmax_dummy = np.argmax(transition_predict_data[dummies.columns].values, axis=1)
argmax_values = pd.Series(argmax_dummy) - 1
(transition_predict_data['Crossover'] == argmax_values).value_counts() / argmax_values.shape[0]

True     0.593602
False    0.406398
Name: count, dtype: float64

#### Splitting regular model dataset

In [12]:
train_indices = range(round(0.7 * len(dataset)), round(0.9 * len(dataset)), int(seq_length / 5))
val_indices = range(round(0.9 * len(dataset)), len(dataset) - seq_length)

train_set = torch.utils.data.Subset(dataset, train_indices)
# Validation set will be training set for Meta Labelling
val_set = torch.utils.data.Subset(dataset, val_indices)

#### Training set label distribution

In [13]:
label_distribution = (transition_predict_data.loc[train_set.indices, 'Target'].value_counts(
) / transition_predict_data.loc[train_set.indices, 'Target'].shape[0]).sort_index().to_list()
display(transition_predict_data.loc[train_set.indices, 'Target'].value_counts())
label_distribution

Target
 0.0    1566
-1.0     631
 1.0     161
Name: count, dtype: int64

[0.2675996607294317, 0.6641221374045801, 0.06827820186598813]

#### Validation set label distribution

In [14]:
val_label_distribution = (transition_predict_data.loc[val_set.indices, 'Target'].value_counts(
) / transition_predict_data.loc[val_set.indices, 'Target'].shape[0]).sort_index().to_list()
display(transition_predict_data.loc[val_set.indices, 'Target'].value_counts())
val_label_distribution

Target
 0.0    1759
-1.0    1325
 1.0     438
Name: count, dtype: int64

[0.37620670073821694, 0.4994321408290744, 0.12436115843270869]

### Creating Sampler

In [15]:
class_weights = [1 / p for p in label_distribution]
print(class_weights)
weights = [class_weights[torch.argmax(label)] for _, label in train_set]
train_sampler = WeightedRandomSampler(
    weights=weights, num_samples=len(train_set), replacement=True)
# train_sampler = None

[3.736925515055468, 1.5057471264367817, 14.645962732919255]


### Creating LSTM Model

In [16]:
input_size = lstm_model.input_size
hidden_size = lstm_model.hidden_size
batch_size = 64
num_layers = lstm_model.num_layers
output_size = transition_predict_data['Target'].unique().size
new_lstm_model = LSTMModel(input_size, hidden_size,
                       num_layers, output_size).to(device)

### Training Model

In [17]:
# eval_lstm(lstm_model, val_set, len(val_set), num_layers, hidden_size, device)

In [18]:
weights = torch.Tensor(label_distribution).to(device)
train_lstm(lstm_model, train_set, val_set, 60, 0.0001,
           batch_size, lstm_model.num_layers, lstm_model.hidden_size, device, train_sampler, class_weights=None)

Best accuracy : 0.4750141964792731 || Best confusion matrix : 
 [[2.73e+02 1.04e+03 0.00e+00]
 [3.63e+02 1.40e+03 1.00e+00]
 [9.40e+01 3.51e+02 0.00e+00]]
Last confusion matrix : 
 [[371. 537. 405.]
 [498. 782. 484.]
 [135. 158. 152.]]


([1.1035125803303074,
  1.1027524954563863,
  1.1015965390849758,
  1.0996199266330615,
  1.0989999996649253,
  1.0975281966699135,
  1.0980317334871035,
  1.096760807810603,
  1.094642838916263,
  1.0921493704254563,
  1.0856633411871421,
  1.0808984911119617,
  1.079243009154861,
  1.0720000782528438,
  1.0639379926629968,
  1.0644855048205402,
  1.0629471153826326,
  1.0626411534644462,
  1.0675591198173728,
  1.057789963644904,
  1.0530537769601152,
  1.0563101317431476,
  1.0481929456865466,
  1.0433996529192537,
  1.047333226010606,
  1.042408688648327,
  1.0378619029715255,
  1.04803282989038,
  1.0322108945331059,
  1.0369529063637193,
  1.0349283186165061,
  1.035492172112336,
  1.0386268270982277,
  1.0305017748394527,
  1.0291829270285529,
  1.0164774624077049,
  1.022626197015917,
  1.0229348137572005,
  1.007338206510286,
  1.0103116824820235,
  1.011279019149574,
  1.01320817180582,
  1.0030798525423616,
  1.020885957253946,
  1.0117830634117126,
  0.9930426526714016,
  0

#### Updating sampler mid training

In [19]:
class_weights = []
print(class_weights)
weights = [class_weights[torch.argmax(label)] for _, label in train_set]
train_sampler = WeightedRandomSampler(
    weights=weights, num_samples=len(train_set), replacement=True)

[]


IndexError: list index out of range