In [33]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import stock_marker_binance
import pandas as pd
from sklearn import preprocessing as sklearn_prep
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import numpy as np
from sklearn.preprocessing import OneHotEncoder

In [34]:
EPOCHS =  2000
TIME_FRAME = 12
TRAIN_FRACTION = 0.8

In [35]:
def dataframe_to_tensor(df, timestep_length: int):
    
    '''
    Converts a pandas dataframe to a tensor
    
    Input: Pandas dataframe
            timestep_length (int) - the length of the timestep
    Output: Tensor
    
    '''
    
    # Convert the dataframe to a numpy array
    df = np.array(df)

    n_seq = len(df) - timestep_length + 1
    return np.array([df[i:(i+timestep_length)] for i in range(n_seq)])

In [36]:
def get_train_test_sets(data, train_frac):
    n_train = int(data.shape[0] * train_frac)
    x_train = data[:n_train, :, :-4]
    y_train = data[:n_train, -1:, -4:].reshape(-1,4)
    x_test = data[n_train:, :, :-4]
    y_test = data[n_train:, -1:, -4:].reshape(-1,4)
    return x_train, y_train, x_test, y_test

In [37]:
# physical_devices = tf.config.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [38]:
df = pd.read_csv('ETH_1h_history as of 09-01-2024.csv', index_col=0)

In [39]:
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,num_trades
0,2020-01-01 00:00:00,129.16,129.19,128.68,128.87,7769.17336,1577840399999,1000930.0,2504
1,2020-01-01 01:00:00,128.87,130.65,128.78,130.64,11344.65516,1577843999999,1474278.0,4885
2,2020-01-01 02:00:00,130.63,130.98,130.35,130.85,7603.35623,1577847599999,994025.6,3046
3,2020-01-01 03:00:00,130.85,130.89,129.94,130.2,4968.55433,1577851199999,647361.0,2818
4,2020-01-01 04:00:00,130.21,130.74,130.15,130.2,3397.90747,1577854799999,443006.7,2264


In [40]:
df_result = stock_marker_binance.sorting_timestamp(df)

df_result['hour_sin'] = np.sin(df_result['timestamp'].dt.hour / 24 * 2 * np.pi)
df_result['hour_cos'] = np.cos(df_result['timestamp'].dt.hour / 24 * 2 * np.pi)

df_result['day_sin'] = np.sin(df_result['timestamp'].dt.day / 30 * 2 * np.pi)
df_result['day_cos'] = np.cos(df_result['timestamp'].dt.day / 30 * 2 * np.pi)

df_result['mon_sin'] = np.sin(df_result['timestamp'].dt.month / 12 * 2 * np.pi)
df_result['mon_cos'] = np.cos(df_result['timestamp'].dt.month / 12 * 2 * np.pi)

df_result['weekday_sin'] = np.sin(df_result['timestamp'].dt.weekday / 7 * 2 * np.pi)
df_result['weekday_cos'] = np.cos(df_result['timestamp'].dt.weekday / 7 * 2 * np.pi)

df_result['year'] = df_result['timestamp'].dt.year


In [41]:
df_result.head(10)

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year
0,2020-01-01 00:00:00,129.16,129.19,128.68,128.87,7769.17336,1577840399999,1000930.0,2504,0.0,1.0,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
1,2020-01-01 01:00:00,128.87,130.65,128.78,130.64,11344.65516,1577843999999,1474278.0,4885,0.258819,0.9659258,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
2,2020-01-01 02:00:00,130.63,130.98,130.35,130.85,7603.35623,1577847599999,994025.6,3046,0.5,0.8660254,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
3,2020-01-01 03:00:00,130.85,130.89,129.94,130.2,4968.55433,1577851199999,647361.0,2818,0.707107,0.7071068,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
4,2020-01-01 04:00:00,130.21,130.74,130.15,130.2,3397.90747,1577854799999,443006.7,2264,0.866025,0.5,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
5,2020-01-01 05:00:00,130.2,130.47,130.11,130.3,4243.6064,1577858399999,552972.7,2426,0.965926,0.258819,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
6,2020-01-01 06:00:00,130.31,130.75,130.26,130.44,3668.90166,1577861999999,478944.2,2347,1.0,6.123234000000001e-17,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
7,2020-01-01 07:00:00,130.47,130.71,130.14,130.24,4147.17413,1577865599999,540770.7,2568,0.965926,-0.258819,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
8,2020-01-01 08:00:00,130.24,130.41,129.87,130.36,7541.44497,1577869199999,980894.0,3039,0.866025,-0.5,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
9,2020-01-01 09:00:00,130.4,130.62,130.13,130.17,4808.20496,1577872799999,626819.0,2696,0.707107,-0.7071068,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020


In [42]:

df_result = stock_marker_binance.adding_MAs(df_result, 'close')
df_result = stock_marker_binance.adding_MAs(df_result, 'quote_asset_volume')
df_result = stock_marker_binance.adding_MAs(df_result, 'num_trades')
print("Complete adding MAs")
df_result = stock_marker_binance.adding_ratio(df_result, 'close')
df_result = stock_marker_binance.adding_ratio(df_result, 'quote_asset_volume')
df_result = stock_marker_binance.adding_ratio(df_result, 'num_trades')
print("Complete adding ratios")
df_result = stock_marker_binance.marking_buy_sell_actions(df_result)
print("Complete marking buy sell actions")
df_result = stock_marker_binance.marking_adjacent_actions(df_result)
print("Complete marking adjacent actions")
df_result = stock_marker_binance.marking_hold_wait_actions(df_result)
print("Complete marking hold wait actions")

df_result = df_result.drop(columns=['timestamp','open', 'high', 'low' ,'close_time', 'close', 'volume', 'quote_asset_volume', 'num_trades'])
df_result=df_result.dropna()
df_result.reset_index(inplace=True, drop=True)
# df_result = stock_marker_binance.renaming_cols(df_result, STOCK_TICKER + "_", ['timestamp', 'action'])

Complete adding MAs
Complete adding ratios
Complete marking buy sell actions
Complete marking adjacent actions
Complete marking hold wait actions


In [43]:
df_result.groupby('action').count()

Unnamed: 0_level_0,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year,MA3_close,...,RATIO_quote_asset_volume_and_MA768,RATIO_num_trades_and_MA3,RATIO_num_trades_and_MA6,RATIO_num_trades_and_MA12,RATIO_num_trades_and_MA24,RATIO_num_trades_and_MA48,RATIO_num_trades_and_MA96,RATIO_num_trades_and_MA192,RATIO_num_trades_and_MA384,RATIO_num_trades_and_MA768
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B,2312,2312,2312,2312,2312,2312,2312,2312,2312,2312,...,2312,2312,2312,2312,2312,2312,2312,2312,2312,2312
H,11013,11013,11013,11013,11013,11013,11013,11013,11013,11013,...,11013,11013,11013,11013,11013,11013,11013,11013,11013,11013
S,2619,2619,2619,2619,2619,2619,2619,2619,2619,2619,...,2619,2619,2619,2619,2619,2619,2619,2619,2619,2619
W,18617,18617,18617,18617,18617,18617,18617,18617,18617,18617,...,18617,18617,18617,18617,18617,18617,18617,18617,18617,18617


In [44]:
# Dropping cols with MAs
MA_list =[]
for col in df_result.columns:
    if col[:2]=="MA":
        MA_list.append(col)
df_result.drop(columns=MA_list, inplace=True)

In [45]:
df_result

Unnamed: 0,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year,RATIO_close_and_MA3,...,RATIO_num_trades_and_MA3,RATIO_num_trades_and_MA6,RATIO_num_trades_and_MA12,RATIO_num_trades_and_MA24,RATIO_num_trades_and_MA48,RATIO_num_trades_and_MA96,RATIO_num_trades_and_MA192,RATIO_num_trades_and_MA384,RATIO_num_trades_and_MA768,action
0,-0.258819,9.659258e-01,0.207912,0.978148,0.866025,0.500000,-0.974928,-0.222521,2020,1.002457,...,1.153534,1.088149,0.894068,0.824493,0.726612,0.664388,0.703261,0.643099,0.640876,H
1,0.000000,1.000000e+00,0.406737,0.913545,0.866025,0.500000,-0.781831,0.623490,2020,0.993543,...,1.428888,1.601651,1.343553,1.274274,1.133245,1.043686,1.102091,1.005296,1.000096,H
2,0.258819,9.659258e-01,0.406737,0.913545,0.866025,0.500000,-0.781831,0.623490,2020,0.997697,...,0.871697,1.030789,0.946256,0.880169,0.763269,0.705367,0.742252,0.675604,0.671939,H
3,0.500000,8.660254e-01,0.406737,0.913545,0.866025,0.500000,-0.781831,0.623490,2020,0.995460,...,1.235835,1.558693,1.563405,1.523372,1.326959,1.226040,1.288234,1.179012,1.170263,H
4,0.707107,7.071068e-01,0.406737,0.913545,0.866025,0.500000,-0.781831,0.623490,2020,0.997588,...,0.860081,0.948998,1.000084,0.960054,0.840083,0.776353,0.813204,0.747692,0.739866,H
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34556,0.707107,7.071068e-01,0.951057,-0.309017,0.500000,0.866025,0.781831,0.623490,2024,0.999538,...,0.811817,0.644617,0.455961,0.478128,0.582740,0.692765,0.591776,0.602909,0.614290,W
34557,0.866025,5.000000e-01,0.951057,-0.309017,0.500000,0.866025,0.781831,0.623490,2024,1.001322,...,1.046072,0.677817,0.500209,0.510001,0.617640,0.738253,0.630463,0.642304,0.654311,W
34558,0.965926,2.588190e-01,0.951057,-0.309017,0.500000,0.866025,0.781831,0.623490,2024,0.999021,...,1.000576,0.682097,0.522844,0.495423,0.596806,0.715863,0.611617,0.624796,0.634999,W
34559,1.000000,6.123234e-17,0.951057,-0.309017,0.500000,0.866025,0.781831,0.623490,2024,0.997117,...,0.998771,0.918378,0.640296,0.516737,0.602169,0.727579,0.619264,0.633773,0.643478,W


In [46]:
# le = sklearn_prep.LabelEncoder()
# df_result['action'] = le.fit_transform(df_result['action'])

enc = OneHotEncoder()

OHE_array = enc.fit_transform(df_result[['action']]).toarray()

class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                 classes=np.unique(df_result['action']),
                                                 y=df_result['action'])



class_weights_dict = dict(zip(enc.categories_[0], class_weights))

print(class_weights)
print(class_weights_dict.keys())


[3.73713235 0.78455008 3.29906453 0.46410539]
dict_keys(['B', 'H', 'S', 'W'])


In [47]:
class_weights_dict=dict(zip(range(0,4), class_weights))

In [48]:
class_weights_dict

{0: 3.7371323529411766,
 1: 0.7845500771815127,
 2: 3.299064528445972,
 3: 0.4641053875490143}

In [49]:
OHE_df = pd.DataFrame(OHE_array, columns=enc.categories_[0])

In [50]:
df_result = pd.merge(df_result, OHE_df, left_index=True, right_index=True)
df_result.drop(columns=['action'], inplace=True)

In [51]:
df_result

Unnamed: 0,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year,RATIO_close_and_MA3,...,RATIO_num_trades_and_MA24,RATIO_num_trades_and_MA48,RATIO_num_trades_and_MA96,RATIO_num_trades_and_MA192,RATIO_num_trades_and_MA384,RATIO_num_trades_and_MA768,B,H,S,W
0,-0.258819,9.659258e-01,0.207912,0.978148,0.866025,0.500000,-0.974928,-0.222521,2020,1.002457,...,0.824493,0.726612,0.664388,0.703261,0.643099,0.640876,0.0,1.0,0.0,0.0
1,0.000000,1.000000e+00,0.406737,0.913545,0.866025,0.500000,-0.781831,0.623490,2020,0.993543,...,1.274274,1.133245,1.043686,1.102091,1.005296,1.000096,0.0,1.0,0.0,0.0
2,0.258819,9.659258e-01,0.406737,0.913545,0.866025,0.500000,-0.781831,0.623490,2020,0.997697,...,0.880169,0.763269,0.705367,0.742252,0.675604,0.671939,0.0,1.0,0.0,0.0
3,0.500000,8.660254e-01,0.406737,0.913545,0.866025,0.500000,-0.781831,0.623490,2020,0.995460,...,1.523372,1.326959,1.226040,1.288234,1.179012,1.170263,0.0,1.0,0.0,0.0
4,0.707107,7.071068e-01,0.406737,0.913545,0.866025,0.500000,-0.781831,0.623490,2020,0.997588,...,0.960054,0.840083,0.776353,0.813204,0.747692,0.739866,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34556,0.707107,7.071068e-01,0.951057,-0.309017,0.500000,0.866025,0.781831,0.623490,2024,0.999538,...,0.478128,0.582740,0.692765,0.591776,0.602909,0.614290,0.0,0.0,0.0,1.0
34557,0.866025,5.000000e-01,0.951057,-0.309017,0.500000,0.866025,0.781831,0.623490,2024,1.001322,...,0.510001,0.617640,0.738253,0.630463,0.642304,0.654311,0.0,0.0,0.0,1.0
34558,0.965926,2.588190e-01,0.951057,-0.309017,0.500000,0.866025,0.781831,0.623490,2024,0.999021,...,0.495423,0.596806,0.715863,0.611617,0.624796,0.634999,0.0,0.0,0.0,1.0
34559,1.000000,6.123234e-17,0.951057,-0.309017,0.500000,0.866025,0.781831,0.623490,2024,0.997117,...,0.516737,0.602169,0.727579,0.619264,0.633773,0.643478,0.0,0.0,0.0,1.0


In [52]:
print(class_weights_dict)

{0: 3.7371323529411766, 1: 0.7845500771815127, 2: 3.299064528445972, 3: 0.4641053875490143}


In [53]:
df_result.head()

Unnamed: 0,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year,RATIO_close_and_MA3,...,RATIO_num_trades_and_MA24,RATIO_num_trades_and_MA48,RATIO_num_trades_and_MA96,RATIO_num_trades_and_MA192,RATIO_num_trades_and_MA384,RATIO_num_trades_and_MA768,B,H,S,W
0,-0.258819,0.965926,0.207912,0.978148,0.866025,0.5,-0.974928,-0.222521,2020,1.002457,...,0.824493,0.726612,0.664388,0.703261,0.643099,0.640876,0.0,1.0,0.0,0.0
1,0.0,1.0,0.406737,0.913545,0.866025,0.5,-0.781831,0.62349,2020,0.993543,...,1.274274,1.133245,1.043686,1.102091,1.005296,1.000096,0.0,1.0,0.0,0.0
2,0.258819,0.965926,0.406737,0.913545,0.866025,0.5,-0.781831,0.62349,2020,0.997697,...,0.880169,0.763269,0.705367,0.742252,0.675604,0.671939,0.0,1.0,0.0,0.0
3,0.5,0.866025,0.406737,0.913545,0.866025,0.5,-0.781831,0.62349,2020,0.99546,...,1.523372,1.326959,1.22604,1.288234,1.179012,1.170263,0.0,1.0,0.0,0.0
4,0.707107,0.707107,0.406737,0.913545,0.866025,0.5,-0.781831,0.62349,2020,0.997588,...,0.960054,0.840083,0.776353,0.813204,0.747692,0.739866,0.0,1.0,0.0,0.0


In [54]:
df_tensor = dataframe_to_tensor(df_result, TIME_FRAME)

In [55]:
df_tensor.shape

(34550, 12, 40)

In [56]:
x_train, y_train, x_test, y_test = get_train_test_sets(df_tensor, TRAIN_FRACTION)

In [57]:
print(x_train.shape)
print(y_train.shape)

print(x_test.shape)
print(y_test.shape)

(27640, 12, 36)
(27640, 4)
(6910, 12, 36)
(6910, 4)


In [58]:
normal_layer = preprocessing.Normalization()
normal_layer.adapt(x_train)
    
inputs = keras.Input(shape=(x_train.shape[1],x_train.shape[2]), name='Input')

In [59]:
x = normal_layer(inputs)
x = layers.LSTM(72, activation="tanh", return_sequences=True)(x)
x = layers.LSTM(36, activation="tanh")(x)
output_layer = layers.Dense(len(class_weights_dict), name='output', activation='softmax' ) (x)
# output_layer = layers.Dense(1, name='output', activation='softmax') (x)

In [60]:
model = keras.Model(
                    inputs=inputs,
                    outputs=output_layer
                    )

In [64]:
model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.000001),
        loss={
            "output": keras.losses.CategoricalCrossentropy(name="loss")
            },
        weighted_metrics = [keras.metrics.CategoricalAccuracy(name="cat_acc", dtype=None),
                  keras.metrics.AUC(name="PR", curve='PR')],
        # metrics=[keras.metrics.CategoricalAccuracy(name="cat_acc", dtype=None),
        #           keras.metrics.AUC(name="auc")]
            
                )
 

In [65]:
history = model.fit(
        {"Input": x_train},
        {"output":y_train},
        # class_weight=class_weights_dict,
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_PR', patience=20, mode='max', restore_best_weights=True)],
        epochs=EPOCHS,
        validation_data = (x_test, y_test)
            )

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000

In [63]:
import datetime

x = datetime.datetime.now()
model.save("models/" + STOCK_TICKER + " " + x.strftime("%d-%m-%Y") + ".hd5",  save_format = 'h5')

NameError: name 'STOCK_TICKER' is not defined

: 

: 

: 

: 