In [1]:
from datahandler import *
from playground import *
from gui import *
from torchutils import *
from models import *
from torch.utils.data import WeightedRandomSampler
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'

In [3]:
seq_length = 15
agent_horizon = 7
crossover_horizon = 3

In [4]:
minute = True

### Data Loading and Preprocessing

In [5]:
data = None
if minute:
    data = pd.read_csv("BTCUSDT_15m.csv")
    data.drop(["Unnamed: 0", "Close Unix"], axis=1, inplace=True)

else:
    if not os.path.exists('./BTCUSDT_DB.csv'):
        data = pd.read_csv("BTCUSDT_1m.csv")
        data = get_dollar_bars(data)
        data.to_csv('./BTCUSDT_DB.csv', sep=',')
    else:
        data = pd.read_csv('./BTCUSDT_DB.csv', sep=',')

handler = NewDataHandler(dataset=data)

In [6]:
ma1, ma2 = 5, 10
k1, k2 = 10, 30
handler.data = ma_crossover_labelling(handler.data, ma1, ma2, crossover_horizon)
handler.data.rename({'Label': 'Crossover'}, axis=1, inplace=True)
dummies = pd.get_dummies(handler.data['Crossover'], prefix='Crossover') * 1
# handler.data.drop('Crossover', axis=1, inplace=True)
handler.data = pd.concat([handler.data, dummies], axis=1)
handler.data = triple_barrier_labelling(handler.data, upper_barrier=1.004, lower_barrier=0.996, time_limit=agent_horizon)
handler.data = add_kendall_tau(handler.data, k1)
handler.data = add_kendall_tau(handler.data, k2)

handler.add_indicators([Indicators.RSI, Indicators.MACD,
                       Indicators.ADX, Indicators.OBV, Indicators.NTRADES])

handler.create_var_indicator([Indicators.RSI, Indicators.MACD, Indicators.ADX, Indicators.OBV, Indicators.LOG_RET, Indicators.NTRADES])


# display(handler.data[handler.data.isnull().any(axis=1)]) # Displaying all the rows that contain missing values to see if they are spread across the dataframe

handler.data.replace([np.inf, -np.inf], 0, inplace=True)

# handler.standardize_data()

handler.data.dropna(axis=0, inplace=True)

handler.create_predict_data()

predict_data = handler.predict_data
predict_data['Target'] = handler.data['Label']
predict_data['Crossover'] = handler.data['Crossover']
predict_data[[f'Kendall_{k1}', f'Kendall_{k2}']] = handler.data[[f'Kendall_{k1}', f'Kendall_{k2}']]
predict_data[f'MA{ma1} Var'], predict_data[f'MA{ma2} Var'] = handler.data[f'MA{ma1} Var'], handler.data[f'MA{ma2} Var']
predict_data[dummies.columns] = handler.data[dummies.columns]
predict_data[f'MA{ma1} UP'], predict_data[f'MA{ma2} UP'] = (handler.data[f'MA{ma1}'] > handler.data[f'MA{ma2}']) * 1, (handler.data[f'MA{ma1}'] < handler.data[f'MA{ma2}']) * 1
predict_data.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  predict_data['Target'] = handler.data['Label']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  predict_data['Crossover'] = handler.data['Crossover']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  predict_data[[f'Kendall_{k1}', f'Kendall_{k2}']] = handler.data[[f'Kendall_{k1}', f'Kendall_{k2}']]
A va

In [7]:
from sklearn.metrics import confusion_matrix



confusion_matrix(handler.data['Crossover'], handler.data['Label'], labels=[-1, 0, 1])

array([[17398,  6357,  6848],
       [52868, 27474, 51800],
       [ 7271,  6612, 16728]], dtype=int64)

### Loading pretrained models

In [8]:
crossover_lstm_model = torch.load(
    f'./models/crossover_{seq_length}_to_{crossover_horizon}.pt').to(device)
lstm_model = torch.load(f'./models/agent_{seq_length}_to_{agent_horizon}.pt').to(device)


### Creating Torch Dataset

In [9]:
trade_columns = predict_data.drop(['Crossover'], axis=1).columns
crossover_columns = predict_data.drop(dummies.columns, axis=1).columns
ma_dataset = TSDataset(
    predict_data[crossover_columns], seq_length, 'Crossover')

In [10]:
trade_columns

Index(['Unix', 'RSI Var', 'MACD Var', 'MACD_H Var', 'ADX14 Var', '-DM Var',
       '+DM Var', 'OBV Var', 'NTrades Var', 'RSI_30-', 'RSI_BTW', 'RSI_70+',
       'LOG_RET', 'Target', 'Kendall_10', 'Kendall_30', 'MA5 Var', 'MA10 Var',
       'Crossover_-1', 'Crossover_0', 'Crossover_1', 'MA5 UP', 'MA10 UP'],
      dtype='object')

In [11]:
crossover_columns

Index(['Unix', 'RSI Var', 'MACD Var', 'MACD_H Var', 'ADX14 Var', '-DM Var',
       '+DM Var', 'OBV Var', 'NTrades Var', 'RSI_30-', 'RSI_BTW', 'RSI_70+',
       'LOG_RET', 'Target', 'Crossover', 'Kendall_10', 'Kendall_30', 'MA5 Var',
       'MA10 Var', 'MA5 UP', 'MA10 UP'],
      dtype='object')

### Replacing real crossover predictions by infered crossover predictions

In [12]:
predicted_crossover_outputs, predicted_crossover_targets = eval_lstm(crossover_lstm_model, ma_dataset, 64, crossover_lstm_model.num_layers, crossover_lstm_model.hidden_size, device)

predicted_crossover_outputs = pd.Series(
    (torch.argmax(predicted_crossover_outputs, axis=-1) - 1).cpu()).shift(seq_length)

crossover_prediction_dummies = pd.get_dummies(
    predicted_crossover_outputs, prefix='Crossover') * 1

transition_predict_data = predict_data.copy()
transition_predict_data['Crossover'] = predicted_crossover_outputs
transition_predict_data[dummies.columns] = crossover_prediction_dummies
transition_predict_data.dropna(axis=0, inplace=True)

Accuracy : 0.7482168810547168 || Loss : 0.795134961605072
Confusion matrix : 
[[1.73730e+04 1.32240e+04 3.00000e+00]
 [1.23580e+04 1.09383e+05 1.03920e+04]
 [1.00000e+00 1.27020e+04 1.79050e+04]]


In [13]:
from sklearn.metrics import confusion_matrix



m = confusion_matrix(transition_predict_data['Crossover'], transition_predict_data['Target'], labels=[-1, 0, 1])
accuracy = (m.diagonal().sum()) / m.sum()
m, accuracy

(array([[11898,  6364, 11466],
        [54653, 27891, 52757],
        [10972,  6186, 11139]], dtype=int64),
 0.2634306818534496)

In [14]:
m = confusion_matrix(predict_data['Crossover'], predict_data['Target'], labels=[-1, 0, 1])
accuracy = (m.diagonal().sum()) / m.sum()
m, accuracy

(array([[17398,  6357,  6848],
        [52868, 27474, 51800],
        [ 7271,  6612, 16728]], dtype=int64),
 0.31858333850514076)

In [15]:
dataset = TSDataset(transition_predict_data[trade_columns], seq_length)

In [16]:
argmax_dummy = np.argmax(transition_predict_data[dummies.columns].values, axis=1)
argmax_values = pd.Series(argmax_dummy) - 1
(transition_predict_data['Crossover'].to_list() == argmax_values).value_counts() / argmax_values.shape[0]

True    1.0
Name: count, dtype: float64

#### Splitting regular model dataset

In [17]:
train_indices = range(round(0.7 * len(dataset)), round(0.9 * len(dataset)), int(seq_length / 5))
val_indices = range(round(0.9 * len(dataset)), len(dataset) - seq_length)

train_set = torch.utils.data.Subset(dataset, train_indices)
# Validation set will be training set for Meta Labelling
val_set = torch.utils.data.Subset(dataset, val_indices)

#### Training set label distribution

In [18]:
label_distribution = (transition_predict_data.loc[train_set.indices, 'Target'].value_counts(
) / transition_predict_data.loc[train_set.indices, 'Target'].shape[0]).sort_index().to_list()
display(transition_predict_data.loc[train_set.indices, 'Target'].value_counts())
label_distribution

Target
-1.0    5830
 1.0    5534
 0.0    1524
Name: count, dtype: int64

[0.45235878336436997, 0.11824953445065177, 0.42939168218497825]

#### Validation set label distribution

In [19]:
val_label_distribution = (transition_predict_data.loc[val_set.indices, 'Target'].value_counts(
) / transition_predict_data.loc[val_set.indices, 'Target'].shape[0]).sort_index().to_list()
display(transition_predict_data.loc[val_set.indices, 'Target'].value_counts())
val_label_distribution

Target
 0.0    7222
-1.0    6095
 1.0    5999
Name: count, dtype: int64

[0.31554151998343344, 0.37388693311244564, 0.3105715469041209]

### Creating Sampler

In [20]:
class_weights = [1 / p for p in label_distribution]
print(class_weights)
weights = [class_weights[torch.argmax(label)] for _, label in train_set]
train_sampler = WeightedRandomSampler(
    weights=weights, num_samples=len(train_set), replacement=True)
# train_sampler = None

[2.2106346483704975, 8.456692913385826, 2.3288760390314422]


### Creating LSTM Model

In [21]:
input_size = lstm_model.input_size
hidden_size = lstm_model.hidden_size
batch_size = 64
num_layers = lstm_model.num_layers
output_size = transition_predict_data['Target'].unique().size
new_lstm_model = LSTMModel(input_size, hidden_size,
                       num_layers, output_size).to(device)

### Training Model

In [22]:
# eval_lstm(lstm_model, val_set, len(val_set), num_layers, hidden_size, device)

In [23]:
weights = torch.Tensor(label_distribution).to(device)
train_lstm(lstm_model, train_set, val_set, 15, 0.0001,
           batch_size, lstm_model.num_layers, lstm_model.hidden_size, device, train_sampler, class_weights=None)

Best accuracy : 0.3923172499482295 || Best confusion matrix : 
 [[ 604. 3962. 1526.]
 [ 507. 5276. 1451.]
 [ 591. 3701. 1698.]]
Last confusion matrix : 
 [[1011. 2071. 3010.]
 [1076. 2869. 3289.]
 [1005. 1962. 3023.]]


([1.1057418838585957,
  1.099307108633589,
  1.0981216306733612,
  1.0971987200255442,
  1.0975265497028237,
  1.0961679438553233,
  1.0921372966010972,
  1.0950072832626871,
  1.090131932556039,
  1.0923701265070698,
  1.0906509639012931,
  1.0868557597150896,
  1.0865245609000178,
  1.0852835715407192,
  1.0851137118764442],
 [0.3335661080074488,
  0.33123836126629425,
  0.33511793916821847,
  0.353584729981378,
  0.353584729981378,
  0.3612662942271881,
  0.37468963376784603,
  0.3578522656734947,
  0.3787243947858473,
  0.3754655493482309,
  0.38050900062073245,
  0.39020794537554315,
  0.3861731843575419,
  0.3929236499068901,
  0.3914494103041589],
 [tensor(1.1004, device='cuda:0'),
  tensor(1.0996, device='cuda:0'),
  tensor(1.0988, device='cuda:0'),
  tensor(1.0965, device='cuda:0'),
  tensor(1.0979, device='cuda:0'),
  tensor(1.0973, device='cuda:0'),
  tensor(1.0935, device='cuda:0'),
  tensor(1.0921, device='cuda:0'),
  tensor(1.1000, device='cuda:0'),
  tensor(1.0953, devic

#### Updating sampler mid training

In [24]:
class_weights = []
print(class_weights)
weights = [class_weights[torch.argmax(label)] for _, label in train_set]
train_sampler = WeightedRandomSampler(
    weights=weights, num_samples=len(train_set), replacement=True)

[]


IndexError: list index out of range