In [51]:
import datetime
import stock_marker_binance
import model_design
import pandas as pd
from sklearn.utils import class_weight
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from imblearn.over_sampling import SMOTE
import plotly.graph_objects as go
import winsound


In [2]:
EPOCHS =  2000
TIME_FRAME_LIST = [2, 3, 6, 12]
TRAIN_FRACTION = 0.8

In [37]:
def plot_model_history(model_history):

    fig = go.Figure()

    # Add traces
    fig.add_trace(go.Scatter(y=model_history['sens_at_spec'],
                        mode='lines',
                        name='sens_at_spec'))
    fig.add_trace(go.Scatter(y=model_history['val_sens_at_spec'],
                        mode='lines',
                        name='val_sens_at_spec'))

    fig.show()

In [3]:
def dataframe_to_tensor(df, timestep_length: int):
    
    '''
    Converts a pandas dataframe to a tensor
    
    Input: Pandas dataframe
            timestep_length (int) - the length of the timestep
    Output: Tensor
    
    '''
    
    # Convert the dataframe to a numpy array
    df = np.array(df)

    n_seq = len(df) - timestep_length + 1
    return np.array([df[i:(i+timestep_length)] for i in range(n_seq)])

In [4]:
def smote(df, train_frac):
    '''
    Oversample the minority class using SMOTE prior to OHE.
    '''

    n_train = int(df.shape[0] * train_frac)
    n_test = df.shape[0] - n_train

    train_data = df.iloc[:n_train, :]
    test_data = df.iloc[n_train:, :]

    x_train = train_data.drop(columns = ['action'])
    y_train = train_data['action']

    x_test = test_data.drop(columns = ['action'])
    y_test = test_data['action']


    smote_inst = SMOTE(sampling_strategy='auto')
    
    
    
    x_train , y_train = smote_inst.fit_resample(x_train, y_train)
    
    train_data = pd.concat([x_train, y_train], axis=1)
    test_data = pd.concat([x_test, y_test], axis=1)

    df = pd.concat([train_data, test_data], axis=0)

    return df, n_test

In [5]:
def OneHotEncoding(df_result):
    enc = OneHotEncoder()

    OHE_array = enc.fit_transform(df_result[['action']]).toarray()

    class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                    classes=np.unique(df_result['action']),
                                                    y=df_result['action'])



    class_weights_dict = dict(zip(enc.categories_[0], class_weights))

    print(class_weights)
    print(class_weights_dict.keys())

    
    OHE_df = pd.DataFrame(OHE_array, columns=enc.categories_[0])
    df_result = pd.merge(df_result, OHE_df, left_index=True, right_index=True)
    df_result.drop(columns=['action'], inplace=True)
    return df_result, class_weights_dict

In [6]:
def get_train_test_sets(data, train_frac):
    
    n_train = int(data.shape[0] * train_frac)
    
    x_train = data[:n_train, :, :-4]
    y_train = data[:n_train, -1:, -4:].reshape(-1,4)
    x_test = data[n_train:, :, :-4]
    y_test = data[n_train:, -1:, -4:].reshape(-1,4)
    return x_train, y_train, x_test, y_test

In [7]:
# import tensorflow as tf
# physical_devices = tf.config.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [8]:
df = pd.read_csv('ETH_1h_history as of 09-01-2024.csv', index_col=0)

In [9]:
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,num_trades
0,01/01/2020 00:00,129.16,129.19,128.68,128.87,7769.17336,1577840000000.0,1000930.0,2504
1,01/01/2020 01:00,128.87,130.65,128.78,130.64,11344.65516,1577840000000.0,1474278.0,4885
2,01/01/2020 02:00,130.63,130.98,130.35,130.85,7603.35623,1577850000000.0,994025.6,3046
3,01/01/2020 03:00,130.85,130.89,129.94,130.2,4968.55433,1577850000000.0,647361.0,2818
4,01/01/2020 04:00,130.21,130.74,130.15,130.2,3397.90747,1577850000000.0,443006.7,2264


In [10]:
df.dtypes

timestamp              object
open                  float64
high                  float64
low                   float64
close                 float64
volume                float64
close_time            float64
quote_asset_volume    float64
num_trades              int64
dtype: object

In [11]:
df_result = stock_marker_binance.sorting_timestamp(df)

df_result['hour_sin'] = np.sin(df_result['timestamp'].dt.hour / 24 * 2 * np.pi)
df_result['hour_cos'] = np.cos(df_result['timestamp'].dt.hour / 24 * 2 * np.pi)

df_result['day_sin'] = np.sin(df_result['timestamp'].dt.day / 30 * 2 * np.pi)
df_result['day_cos'] = np.cos(df_result['timestamp'].dt.day / 30 * 2 * np.pi)

df_result['mon_sin'] = np.sin(df_result['timestamp'].dt.month / 12 * 2 * np.pi)
df_result['mon_cos'] = np.cos(df_result['timestamp'].dt.month / 12 * 2 * np.pi)

df_result['weekday_sin'] = np.sin(df_result['timestamp'].dt.weekday / 7 * 2 * np.pi)
df_result['weekday_cos'] = np.cos(df_result['timestamp'].dt.weekday / 7 * 2 * np.pi)

df_result['year'] = df_result['timestamp'].dt.year


In [12]:
df_result.head(10)

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year
0,2020-01-01 00:00:00,129.16,129.19,128.68,128.87,7769.17336,1577840000000.0,1000930.0,2504,0.0,1.0,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
1,2020-01-01 01:00:00,128.87,130.65,128.78,130.64,11344.65516,1577840000000.0,1474278.0,4885,0.258819,0.9659258,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
2,2020-01-01 02:00:00,130.63,130.98,130.35,130.85,7603.35623,1577850000000.0,994025.6,3046,0.5,0.8660254,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
3,2020-01-01 03:00:00,130.85,130.89,129.94,130.2,4968.55433,1577850000000.0,647361.0,2818,0.707107,0.7071068,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
4,2020-01-01 04:00:00,130.21,130.74,130.15,130.2,3397.90747,1577850000000.0,443006.7,2264,0.866025,0.5,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
5,2020-01-01 05:00:00,130.2,130.47,130.11,130.3,4243.6064,1577860000000.0,552972.7,2426,0.965926,0.258819,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
6,2020-01-01 06:00:00,130.31,130.75,130.26,130.44,3668.90166,1577860000000.0,478944.2,2347,1.0,6.123234000000001e-17,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
7,2020-01-01 07:00:00,130.47,130.71,130.14,130.24,4147.17413,1577870000000.0,540770.7,2568,0.965926,-0.258819,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
8,2020-01-01 08:00:00,130.24,130.41,129.87,130.36,7541.44497,1577870000000.0,980894.0,3039,0.866025,-0.5,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020
9,2020-01-01 09:00:00,130.4,130.62,130.13,130.17,4808.20496,1577870000000.0,626819.0,2696,0.707107,-0.7071068,0.207912,0.978148,0.5,0.866025,0.974928,-0.222521,2020


In [13]:

df_result = stock_marker_binance.std(df_result, ['close', 'quote_asset_volume', 'num_trades'])
print("Complete adding STDs")

df_result = stock_marker_binance.adding_MAs(df_result, ['close', 'quote_asset_volume', 'num_trades'])
print("Complete adding MAs")


df_result = stock_marker_binance.adding_ratio(df_result, ['close', 'quote_asset_volume', 'num_trades'], ['MA'])
df_result = stock_marker_binance.adding_ratio(df_result, ['close'], ['STD'])
print("Complete adding ratios")

df_result = stock_marker_binance.rsi(df_result, ['close', 
                                                  'quote_asset_volume', 'num_trades'
                                                 ])
print("Complete adding RSI")

df_result = stock_marker_binance.marking_buy_sell_actions(df_result)
print("Complete marking buy sell actions")

df_result = stock_marker_binance.marking_adjacent_actions(df_result)
print("Complete marking adjacent actions")

df_result = stock_marker_binance.marking_hold_wait_actions(df_result)
print("Complete marking hold wait actions")

df_result = df_result.drop(columns=[
                                    'timestamp',
                                    'open', 'high', 'low' ,'close_time', 'volume',
                                    'close',  
                                    'quote_asset_volume', 'num_trades'
                                    ])
df_result=df_result.dropna()
df_result.reset_index(inplace=True, drop=True)
# df_result = stock_marker_binance.renaming_cols(df_result, STOCK_TICKER + "_", ['timestamp', 'action'])

Complete adding STDs
Complete adding MAs
Complete adding ratios


  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) + '_' + col_label] = 100.0 - (100.0 / (1.0 + RS))
  df['RSI_'+ str(period) 

Complete adding RSI


Complete marking buy sell actions
Complete marking adjacent actions
Complete marking hold wait actions


In [14]:
df_result.groupby('action').count()

Unnamed: 0_level_0,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year,STD3_close,...,RSI_192_quote_asset_volume,RSI_384_quote_asset_volume,RSI_3_num_trades,RSI_6_num_trades,RSI_12_num_trades,RSI_24_num_trades,RSI_48_num_trades,RSI_96_num_trades,RSI_192_num_trades,RSI_384_num_trades
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B,2332,2332,2332,2332,2332,2332,2332,2332,2332,2332,...,2332,2332,2332,2332,2332,2332,2332,2332,2332,2332
H,11110,11110,11110,11110,11110,11110,11110,11110,11110,11110,...,11110,11110,11110,11110,11110,11110,11110,11110,11110,11110
S,2628,2628,2628,2628,2628,2628,2628,2628,2628,2628,...,2628,2628,2628,2628,2628,2628,2628,2628,2628,2628
W,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778,...,18778,18778,18778,18778,18778,18778,18778,18778,18778,18778


In [15]:
# Dropping cols
drop_cols_list =[]
for col in df_result.columns:
    if col[:2]=="MA" or col[:3]=="STD":
        drop_cols_list.append(col)
df_result.drop(columns=drop_cols_list, inplace=True)

In [16]:
df_result

Unnamed: 0,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year,RATIO_close_and_MA3,...,RSI_384_quote_asset_volume,RSI_3_num_trades,RSI_6_num_trades,RSI_12_num_trades,RSI_24_num_trades,RSI_48_num_trades,RSI_96_num_trades,RSI_192_num_trades,RSI_384_num_trades,action
0,0.000000,1.000000e+00,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,0.998395,...,50.285725,55.233549,62.375595,44.246383,47.372031,41.390263,50.373816,50.212888,50.352010,B
1,0.258819,9.659258e-01,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.003510,...,50.044557,57.042254,53.231167,43.468397,40.757926,44.977155,50.329082,49.961415,50.023663,S
2,0.500000,8.660254e-01,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.005462,...,50.374138,71.936820,60.894401,54.737831,53.922093,51.046656,50.508626,50.688208,50.405497,W
3,0.707107,7.071068e-01,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,0.999496,...,50.454235,63.487630,59.629177,57.361954,54.634917,49.887067,50.916972,50.645574,50.478997,W
4,0.866025,5.000000e-01,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.005046,...,50.273035,62.021245,59.762526,46.674281,52.189555,47.257474,50.685501,50.334074,50.297334,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34843,0.707107,7.071068e-01,0.951057,-0.309017,0.5,0.866025,0.781831,0.623490,2024,0.999538,...,49.917523,0.491800,45.842406,44.909560,46.166448,50.426042,50.048528,49.884557,49.932173,W
34844,0.866025,5.000000e-01,0.951057,-0.309017,0.5,0.866025,0.781831,0.623490,2024,1.001322,...,49.923296,9.573140,51.228791,46.818007,49.772624,50.584997,49.856044,49.908062,49.903714,W
34845,0.965926,2.588190e-01,0.951057,-0.309017,0.5,0.866025,0.781831,0.623490,2024,0.999021,...,49.684454,70.756458,46.134609,41.865474,49.885167,50.572507,50.125561,50.051341,49.662115,W
34846,1.000000,6.123234e-17,0.951057,-0.309017,0.5,0.866025,0.781831,0.623490,2024,0.997117,...,49.859221,71.389892,3.699243,27.179893,46.358171,50.568465,49.547127,50.109445,49.851729,W


In [17]:
# Do NOT use SMOTE for LSTM!!!
# df_result, n_test = smote(df_result, TRAIN_FRACTION)

In [18]:
df_result.head()

Unnamed: 0,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year,RATIO_close_and_MA3,...,RSI_384_quote_asset_volume,RSI_3_num_trades,RSI_6_num_trades,RSI_12_num_trades,RSI_24_num_trades,RSI_48_num_trades,RSI_96_num_trades,RSI_192_num_trades,RSI_384_num_trades,action
0,0.0,1.0,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,0.998395,...,50.285725,55.233549,62.375595,44.246383,47.372031,41.390263,50.373816,50.212888,50.35201,B
1,0.258819,0.965926,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.00351,...,50.044557,57.042254,53.231167,43.468397,40.757926,44.977155,50.329082,49.961415,50.023663,S
2,0.5,0.866025,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.005462,...,50.374138,71.93682,60.894401,54.737831,53.922093,51.046656,50.508626,50.688208,50.405497,W
3,0.707107,0.707107,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,0.999496,...,50.454235,63.48763,59.629177,57.361954,54.634917,49.887067,50.916972,50.645574,50.478997,W
4,0.866025,0.5,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.005046,...,50.273035,62.021245,59.762526,46.674281,52.189555,47.257474,50.685501,50.334074,50.297334,W


In [19]:
df_result, class_weights_dict = OneHotEncoding(df_result)

[3.73584906 0.78415842 3.31506849 0.46394717]
dict_keys(['B', 'H', 'S', 'W'])


In [20]:
df_result.head()

Unnamed: 0,hour_sin,hour_cos,day_sin,day_cos,mon_sin,mon_cos,weekday_sin,weekday_cos,year,RATIO_close_and_MA3,...,RSI_12_num_trades,RSI_24_num_trades,RSI_48_num_trades,RSI_96_num_trades,RSI_192_num_trades,RSI_384_num_trades,B,H,S,W
0,0.0,1.0,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,0.998395,...,44.246383,47.372031,41.390263,50.373816,50.212888,50.35201,1.0,0.0,0.0,0.0
1,0.258819,0.965926,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.00351,...,43.468397,40.757926,44.977155,50.329082,49.961415,50.023663,0.0,0.0,1.0,0.0
2,0.5,0.866025,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.005462,...,54.737831,53.922093,51.046656,50.508626,50.688208,50.405497,0.0,0.0,0.0,1.0
3,0.707107,0.707107,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,0.999496,...,57.361954,54.634917,49.887067,50.916972,50.645574,50.478997,0.0,0.0,0.0,1.0
4,0.866025,0.5,-0.406737,-0.913545,0.5,0.866025,-0.433884,-0.900969,2020,1.005046,...,46.674281,52.189555,47.257474,50.685501,50.334074,50.297334,0.0,0.0,0.0,1.0


In [21]:
df_result.columns

Index(['hour_sin', 'hour_cos', 'day_sin', 'day_cos', 'mon_sin', 'mon_cos',
       'weekday_sin', 'weekday_cos', 'year', 'RATIO_close_and_MA3',
       'RATIO_close_and_MA6', 'RATIO_close_and_MA12', 'RATIO_close_and_MA24',
       'RATIO_close_and_MA48', 'RATIO_close_and_MA96', 'RATIO_close_and_MA192',
       'RATIO_close_and_MA384', 'RATIO_quote_asset_volume_and_MA3',
       'RATIO_quote_asset_volume_and_MA6', 'RATIO_quote_asset_volume_and_MA12',
       'RATIO_quote_asset_volume_and_MA24',
       'RATIO_quote_asset_volume_and_MA48',
       'RATIO_quote_asset_volume_and_MA96',
       'RATIO_quote_asset_volume_and_MA192',
       'RATIO_quote_asset_volume_and_MA384', 'RATIO_num_trades_and_MA3',
       'RATIO_num_trades_and_MA6', 'RATIO_num_trades_and_MA12',
       'RATIO_num_trades_and_MA24', 'RATIO_num_trades_and_MA48',
       'RATIO_num_trades_and_MA96', 'RATIO_num_trades_and_MA192',
       'RATIO_num_trades_and_MA384', 'RATIO_close_and_STD3',
       'RATIO_close_and_STD6', 'RATIO_close_a

In [50]:
import importlib

importlib.reload(model_design)

max_val_PR = 0.6

for time_step in TIME_FRAME_LIST:
    df_tensor = dataframe_to_tensor(df_result, time_step)
    x_train, y_train, x_test, y_test = get_train_test_sets(df_tensor, TRAIN_FRACTION)
    
    df_tensor.shape

    print(x_train.shape)
    print(y_train.shape)
    print(x_test.shape)
    print(y_test.shape)
    model, history = model_design.model_design(EPOCHS,  x_train, y_train, x_test, y_test, class_weights_dict)

    plot_model_history(history.history)

    val_PR = round(max(history.history['val_sens_at_spec']),3)
    
    x = datetime.datetime.now()
    
    model_name = "models/" + x.strftime("%Y-%m-%d %H-%M-%S") +" val_sens_at_spec " + str(val_PR) + " time frame " + str(time_step) + ".hd5"

    if val_PR > max_val_PR:
        max_val_PR = val_PR
        model.save(model_name,  save_format = 'h5')
    
    print(model_name)
    print(val_PR)
    print(150 * "-")


(26135, 2, 65)
(26135, 4)
(8712, 2, 65)
(8712, 4)


models/2024-02-12 12-37-02 val_sens_at_spec 0.531 time frame 2.hd5
0.531
------------------------------------------------------------------------------------------------------------------------------------------------------
(26134, 3, 65)
(26134, 4)
(8712, 3, 65)
(8712, 4)


models/2024-02-12 12-56-23 val_sens_at_spec 0.51 time frame 3.hd5
0.51
------------------------------------------------------------------------------------------------------------------------------------------------------
(26132, 6, 65)
(26132, 4)
(8711, 6, 65)
(8711, 4)


models/2024-02-12 13-05-17 val_sens_at_spec 0.485 time frame 6.hd5
0.485
------------------------------------------------------------------------------------------------------------------------------------------------------
(26127, 12, 65)
(26127, 4)
(8710, 12, 65)
(8710, 4)


models/2024-02-12 13-18-58 val_sens_at_spec 0.476 time frame 12.hd5
0.476
------------------------------------------------------------------------------------------------------------------------------------------------------


In [None]:
model.summary()

Model: "model_40"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 12, 65)]          0         
                                                                 
 normalization_40 (Normaliz  (None, 12, 65)            131       
 ation)                                                          
                                                                 
 dropout_35 (Dropout)        (None, 12, 65)            0         
                                                                 
 lstm_80 (LSTM)              (None, 12, 260)           339040    
                                                                 
 lstm_81 (LSTM)              (None, 195)               355680    
                                                                 
 output (Dense)              (None, 4)                 784       
                                                          

In [None]:
frequency = 2500  # Set Frequency To 2500 Hertz
duration = 1000  # Set Duration To 1000 ms == 1 second
winsound.Beep(frequency, duration)