In [47]:
import datetime
import stock_marker_binance
import model_design
import pandas as pd
from sklearn.utils import class_weight
import numpy as np
from sklearn.preprocessing import OneHotEncoder

In [48]:
EPOCHS =  2000
TIME_FRAME = [2, 3, 18, 21, 24]
TRAIN_FRACTION = 0.75

In [49]:
def dataframe_to_tensor(df, timestep_length: int):
    
    '''
    Converts a pandas dataframe to a tensor
    
    Input: Pandas dataframe
            timestep_length (int) - the length of the timestep
    Output: Tensor
    
    '''
    
    # Convert the dataframe to a numpy array
    df = np.array(df)

    n_seq = len(df) - timestep_length + 1
    return np.array([df[i:(i+timestep_length)] for i in range(n_seq)])

In [50]:
def get_train_test_sets(data, train_frac):
    n_train = int(data.shape[0] * train_frac)
    x_train = data[:n_train, :, :-4]
    y_train = data[:n_train, -1:, -4:].reshape(-1,4)
    x_test = data[n_train:, :, :-4]
    y_test = data[n_train:, -1:, -4:].reshape(-1,4)
    return x_train, y_train, x_test, y_test

In [51]:
# physical_devices = tf.config.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [52]:
df = pd.read_csv('ETH_1h_history as of 09-01-2024.csv', index_col=0)

In [53]:
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,num_trades
0,2020-01-01 00:00:00,129.16,129.19,128.68,128.87,7769.17336,1577840399999,1000930.0,2504
1,2020-01-01 01:00:00,128.87,130.65,128.78,130.64,11344.65516,1577843999999,1474278.0,4885
2,2020-01-01 02:00:00,130.63,130.98,130.35,130.85,7603.35623,1577847599999,994025.6,3046
3,2020-01-01 03:00:00,130.85,130.89,129.94,130.2,4968.55433,1577851199999,647361.0,2818
4,2020-01-01 04:00:00,130.21,130.74,130.15,130.2,3397.90747,1577854799999,443006.7,2264


In [54]:
df_result = stock_marker_binance.sorting_timestamp(df)

df_result['hour_sin'] = np.sin(df_result['timestamp'].dt.hour / 24 * 2 * np.pi)
df_result['hour_cos'] = np.cos(df_result['timestamp'].dt.hour / 24 * 2 * np.pi)

df_result['day_sin'] = np.sin(df_result['timestamp'].dt.day / 30 * 2 * np.pi)
df_result['day_cos'] = np.cos(df_result['timestamp'].dt.day / 30 * 2 * np.pi)

df_result['mon_sin'] = np.sin(df_result['timestamp'].dt.month / 12 * 2 * np.pi)
df_result['mon_cos'] = np.cos(df_result['timestamp'].dt.month / 12 * 2 * np.pi)

df_result['weekday_sin'] = np.sin(df_result['timestamp'].dt.weekday / 7 * 2 * np.pi)
df_result['weekday_cos'] = np.cos(df_result['timestamp'].dt.weekday / 7 * 2 * np.pi)

df_result['year'] = df_result['timestamp'].dt.year


In [55]:
df_result.head(10)

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year
0,2020-01-01 00:00:00,129.16,129.19,128.68,128.87,7769.17336,1577840399999,1000930.0,2504,0.0,1.0,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
1,2020-01-01 01:00:00,128.87,130.65,128.78,130.64,11344.65516,1577843999999,1474278.0,4885,0.258819,0.9659258,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
2,2020-01-01 02:00:00,130.63,130.98,130.35,130.85,7603.35623,1577847599999,994025.6,3046,0.5,0.8660254,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
3,2020-01-01 03:00:00,130.85,130.89,129.94,130.2,4968.55433,1577851199999,647361.0,2818,0.707107,0.7071068,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
4,2020-01-01 04:00:00,130.21,130.74,130.15,130.2,3397.90747,1577854799999,443006.7,2264,0.866025,0.5,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
5,2020-01-01 05:00:00,130.2,130.47,130.11,130.3,4243.6064,1577858399999,552972.7,2426,0.965926,0.258819,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
6,2020-01-01 06:00:00,130.31,130.75,130.26,130.44,3668.90166,1577861999999,478944.2,2347,1.0,6.123234000000001e-17,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
7,2020-01-01 07:00:00,130.47,130.71,130.14,130.24,4147.17413,1577865599999,540770.7,2568,0.965926,-0.258819,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
8,2020-01-01 08:00:00,130.24,130.41,129.87,130.36,7541.44497,1577869199999,980894.0,3039,0.866025,-0.5,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
9,2020-01-01 09:00:00,130.4,130.62,130.13,130.17,4808.20496,1577872799999,626819.0,2696,0.707107,-0.7071068,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020


In [56]:

df_result = stock_marker_binance.std(df_result, 'close')
print("Complete adding STDs")

df_result = stock_marker_binance.adding_MAs(df_result, ['close', 'quote_asset_volume', 'num_trades'])

print("Complete adding MAs")


df_result = stock_marker_binance.adding_ratio(df_result, ['close', 'quote_asset_volume', 'num_trades'], ['MA'])
df_result = stock_marker_binance.adding_ratio(df_result, ['close'], ['STD'])

print("Complete adding ratios")

df_result = stock_marker_binance.rsi(df_result, 'close')
print("Complete adding RSI")

df_result = stock_marker_binance.marking_buy_sell_actions(df_result)
print("Complete marking buy sell actions")
df_result = stock_marker_binance.marking_adjacent_actions(df_result)
print("Complete marking adjacent actions")
df_result = stock_marker_binance.marking_hold_wait_actions(df_result)
print("Complete marking hold wait actions")

df_result = df_result.drop(columns=['timestamp','open', 'high', 'low' ,'close_time', 'volume',
                                    'close',  'quote_asset_volume', 'num_trades'
                                    ])
df_result=df_result.dropna()
df_result.reset_index(inplace=True, drop=True)
# df_result = stock_marker_binance.renaming_cols(df_result, STOCK_TICKER + "_", ['timestamp', 'action'])

Complete adding STDs
Complete adding MAs
Complete adding ratios
Complete adding RSI
Complete marking buy sell actions
Complete marking adjacent actions
Complete marking hold wait actions


In [57]:
df_result.groupby('action').count()

Unnamed: 0_level_0,close,quote_asset_volume,num_trades,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,...,RATIO_close_and_STD192,RATIO_close_and_STD384,RSI_3,RSI_6,RSI_12,RSI_24,RSI_48,RSI_96,RSI_192,RSI_384
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B,2341,2341,2341,2341,2341,2341,2341,2341,2341,2341,...,2341,2341,2341,2341,2341,2341,2341,2341,2341,2341
H,11121,11121,11121,11121,11121,11121,11121,11121,11121,11121,...,11121,11121,11121,11121,11121,11121,11121,11121,11121,11121
S,2650,2650,2650,2650,2650,2650,2650,2650,2650,2650,...,2650,2650,2650,2650,2650,2650,2650,2650,2650,2650
W,18832,18832,18832,18832,18832,18832,18832,18832,18832,18832,...,18832,18832,18832,18832,18832,18832,18832,18832,18832,18832


In [58]:
# Dropping cols
drop_cols_list =[]
for col in df_result.columns:
    if col[:2]=="MA" or col[:3]=="STD":
        drop_cols_list.append(col)
df_result.drop(columns=drop_cols_list, inplace=True)

In [59]:
MA_list

['MA3_close',
 'MA6_close',
 'MA12_close',
 'MA24_close',
 'MA48_close',
 'MA96_close',
 'MA192_close',
 'MA384_close',
 'MA3_quote_asset_volume',
 'MA6_quote_asset_volume',
 'MA12_quote_asset_volume',
 'MA24_quote_asset_volume',
 'MA48_quote_asset_volume',
 'MA96_quote_asset_volume',
 'MA192_quote_asset_volume',
 'MA384_quote_asset_volume',
 'MA3_num_trades',
 'MA6_num_trades',
 'MA12_num_trades',
 'MA24_num_trades',
 'MA48_num_trades',
 'MA96_num_trades',
 'MA192_num_trades',
 'MA384_num_trades']

In [60]:
df_result

Unnamed: 0,close,quote_asset_volume,num_trades,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,...,RATIO_close_and_STD384,RSI_3,RSI_6,RSI_12,RSI_24,RSI_48,RSI_96,RSI_192,RSI_384,action
0,163.78,3.856151e+06,8981,0.000000,1.000000e+00,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,15.659188,23.976608,53.723404,48.656163,44.550749,48.632353,57.758776,56.648599,56.004265,B
1,164.86,1.920838e+06,5321,0.258819,9.659258e-01,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,15.703456,81.545064,61.251863,60.982659,52.007058,46.631016,58.980385,57.188922,56.167106,S
2,165.67,4.761351e+06,10545,0.500000,8.660254e-01,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,15.708093,82.008368,64.383562,69.186047,62.968516,48.762646,59.161267,57.122261,56.022419,W
3,165.14,5.222252e+06,11685,0.707107,7.071068e-01,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,15.591139,66.315789,46.404110,63.910506,61.916462,48.588710,60.168406,57.135441,55.888717,W
4,166.66,3.202825e+06,7788,0.866025,5.000000e-01,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,15.659499,86.548223,81.502890,67.906574,65.888941,53.258755,60.551124,57.659672,56.242723,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34939,2309.13,2.168060e+07,20175,0.707107,7.071068e-01,0.951057,-0.309017,0.5,0.866025,0.781831,...,42.464665,16.398612,16.022925,64.655541,72.740467,57.264507,54.490455,50.460646,49.832741,W
34940,2315.20,2.015283e+07,21479,0.866025,5.000000e-01,0.951057,-0.309017,0.5,0.866025,0.781831,...,42.577817,79.143258,25.966608,64.142636,72.639208,57.628004,54.738265,51.192459,49.977866,W
34941,2308.77,2.103808e+07,20845,0.965926,2.588190e-01,0.951057,-0.309017,0.5,0.866025,0.781831,...,42.459482,48.380011,21.926070,63.956221,70.387045,56.278739,54.191957,50.841503,50.001657,W
34942,2302.00,1.988220e+07,21123,1.000000,6.123234e-17,0.951057,-0.309017,0.5,0.866025,0.781831,...,42.331585,27.293165,22.106709,48.076616,68.685078,56.153156,53.567713,50.750594,50.245837,W


In [61]:
# le = sklearn_prep.LabelEncoder()
# df_result['action'] = le.fit_transform(df_result['action'])

enc = OneHotEncoder()

OHE_array = enc.fit_transform(df_result[['action']]).toarray()

class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                 classes=np.unique(df_result['action']),
                                                 y=df_result['action'])



class_weights_dict = dict(zip(enc.categories_[0], class_weights))

print(class_weights)
print(class_weights_dict.keys())


[3.73173857 0.78554087 3.29660377 0.46389125]
dict_keys(['B', 'H', 'S', 'W'])


In [62]:
class_weights_dict=dict(zip(range(0,4), class_weights))

In [63]:
class_weights_dict

{0: 3.7317385732592907,
 1: 0.7855408686269221,
 2: 3.2966037735849056,
 3: 0.4638912489379779}

In [64]:
OHE_df = pd.DataFrame(OHE_array, columns=enc.categories_[0])

In [65]:
df_result = pd.merge(df_result, OHE_df, left_index=True, right_index=True)
df_result.drop(columns=['action'], inplace=True)

In [66]:
print(class_weights_dict)

{0: 3.7317385732592907, 1: 0.7855408686269221, 2: 3.2966037735849056, 3: 0.4638912489379779}


In [67]:
df_result.head()

Unnamed: 0,close,quote_asset_volume,num_trades,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,...,RSI_12,RSI_24,RSI_48,RSI_96,RSI_192,RSI_384,B,H,S,W
0,163.78,3856151.0,8981,0.0,1.0,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,48.656163,44.550749,48.632353,57.758776,56.648599,56.004265,1.0,0.0,0.0,0.0
1,164.86,1920838.0,5321,0.258819,0.965926,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,60.982659,52.007058,46.631016,58.980385,57.188922,56.167106,0.0,0.0,1.0,0.0
2,165.67,4761351.0,10545,0.5,0.866025,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,69.186047,62.968516,48.762646,59.161267,57.122261,56.022419,0.0,0.0,0.0,1.0
3,165.14,5222252.0,11685,0.707107,0.707107,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,63.910506,61.916462,48.58871,60.168406,57.135441,55.888717,0.0,0.0,0.0,1.0
4,166.66,3202825.0,7788,0.866025,0.5,-0.406737,-0.913545,0.5,0.866025,-0.433884,...,67.906574,65.888941,53.258755,60.551124,57.659672,56.242723,0.0,0.0,0.0,1.0


In [68]:
df_result.columns

Index(['close', 'quote_asset_volume', 'num_trades', 'hour_sin', 'hour_cos',
       'day_sin', 'day_cos', 'mon_sin', 'mon_cos', 'weekday_sin',
       'weekday_cos', 'year', 'RATIO_close_and_MA3', 'RATIO_close_and_MA6',
       'RATIO_close_and_MA12', 'RATIO_close_and_MA24', 'RATIO_close_and_MA48',
       'RATIO_close_and_MA96', 'RATIO_close_and_MA192',
       'RATIO_close_and_MA384', 'RATIO_quote_asset_volume_and_MA3',
       'RATIO_quote_asset_volume_and_MA6', 'RATIO_quote_asset_volume_and_MA12',
       'RATIO_quote_asset_volume_and_MA24',
       'RATIO_quote_asset_volume_and_MA48',
       'RATIO_quote_asset_volume_and_MA96',
       'RATIO_quote_asset_volume_and_MA192',
       'RATIO_quote_asset_volume_and_MA384', 'RATIO_num_trades_and_MA3',
       'RATIO_num_trades_and_MA6', 'RATIO_num_trades_and_MA12',
       'RATIO_num_trades_and_MA24', 'RATIO_num_trades_and_MA48',
       'RATIO_num_trades_and_MA96', 'RATIO_num_trades_and_MA192',
       'RATIO_num_trades_and_MA384', 'RATIO_close_and_

In [69]:
max_val_PR = 0.6

for time_frame in TIME_FRAME:
    df_tensor = dataframe_to_tensor(df_result, time_frame)
    x_train, y_train, x_test, y_test = get_train_test_sets(df_tensor, TRAIN_FRACTION)
    
    df_tensor.shape

    print(x_train.shape)
    print(y_train.shape)
    print(x_test.shape)
    print(y_test.shape)
    model, history = model_design.model_design(EPOCHS, x_train, y_train, x_test, y_test, class_weights_dict)

    val_PR = round(max(history.history['val_PR']),3)
    
    x = datetime.datetime.now()
    
    model_name = "models/" + x.strftime("%d-%m-%Y %H-%M-%S") +" val_PR " + str(val_PR) + " time frame " + str(time_frame) + ".hd5"

    if val_PR > max_val_PR:
        max_val_PR = val_PR
        model.save(model_name,  save_format = 'h5')
    
    print(model_name)
    print(val_PR)
    print(150 * "-")


(26207, 2, 52)
(26207, 4)
(8736, 2, 52)
(8736, 4)


  saving_api.save_model(


models/21-01-2024 19-20-54 val_PR 0.644 time frame 2.hd5
0.644
------------------------------------------------------------------------------------------------------------------------------------------------------
(26206, 3, 52)
(26206, 4)
(8736, 3, 52)
(8736, 4)
models/21-01-2024 19-22-18 val_PR 0.629 time frame 3.hd5
0.629
------------------------------------------------------------------------------------------------------------------------------------------------------
(26195, 18, 52)
(26195, 4)
(8732, 18, 52)
(8732, 4)
models/21-01-2024 19-25-44 val_PR 0.549 time frame 18.hd5
0.549
------------------------------------------------------------------------------------------------------------------------------------------------------
(26193, 21, 52)
(26193, 4)
(8731, 21, 52)
(8731, 4)
models/21-01-2024 19-29-28 val_PR 0.549 time frame 21.hd5
0.549
-------------------------------------------------------------------------------------------------------------------------------------------

In [70]:
model.summary()

Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 24, 52)]          0         
                                                                 
 normalization_10 (Normaliz  (None, 24, 52)            105       
 ation)                                                          
                                                                 
 lstm_20 (LSTM)              (None, 24, 52)            21840     
                                                                 
 lstm_21 (LSTM)              (None, 52)                21840     
                                                                 
 dense_10 (Dense)            (None, 26)                1378      
                                                                 
 output (Dense)              (None, 4)                 108       
                                                          