In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime as dt, timedelta
from keras.models import load_model

import sqlite_io as sio
import model_mngr as modmgr
import add_indicators as indic
import split_merge as sm
import joblib


SET VARIABLES

In [2]:
PATH_MODEL = "../../Data/Models/"
PATH_DB_CW8="C:\Projets\Data\sqlite\candle_CW8.db"
PATH_DB_FWK="C:\Projets\Data\sqlite\dataset_market.db"

symb = "CW8"
model_type= "LSTM_CLASS"
dts_name = "DCA_CLOSE_1D_21D_V1"
lab_studied = "lab_perf_21d"
update = "202310"
timeframe=1440 #1D=1440
delta_date=3 #nb days added to the last_date
seq_len=20 
file_name=symb+"_"+model_type+"_"+dts_name+"_"+update+".h5"
scaler_name=symb+"_"+model_type+"_"+dts_name+"_"+update+"_scaler.save"

Connexion and symbol info

In [3]:
if "con_cw8" in locals():
        sio.close_connection(con_cw8)
con_cw8 = sio.get_connection(str_db_path=PATH_DB_CW8)

if "con_fwk" in locals():
        sio.close_connection(con_fwk)
con_fwk = sio.get_connection(str_db_path=PATH_DB_FWK)

symb_info=sio.get_symbol_info(symbol=symb)
symb_yf=symb_info.CODE_YAHOO.iloc[0]#select first line

dt_last_candle=sio.get_last_candle_date(con=con_cw8, symbol=symb,timeframe=timeframe) 
# print(f"{type(dt_last_candle)=} -- {dt_last_candle=}")
dt_delta=dt_last_candle-timedelta(days=delta_date)
print(f" {symb_yf=} {dt_last_candle=} {dt_delta=}")

type(dt_last_candle)=<class 'pandas._libs.tslibs.timestamps.Timestamp'> -- dt_last_candle=Timestamp('2023-12-04 00:00:00')
 symb_yf='CW8.PA' dt_last_candle=Timestamp('2023-12-04 00:00:00') dt_delta=Timestamp('2023-12-01 00:00:00')


Get Yahoo Data and check last months

In [4]:
ticker_yf = yf.Ticker(symb_yf)
hist_yf = ticker_yf.history(start=dt_delta,period="1d")

hist_yf['Adj Close']=hist_yf['Close']
res=sio.load_yahoo_df_into_sql(con=con_cw8, df_yahoo=hist_yf, symbol=symb, timeframe=timeframe, del_duplicate=True)

print(sio.check_candles_last_months(con=con_cw8, symbol=symb, timeframe=timeframe))

         NB          MIN(OPEN_DATETIME)          MAX(OPEN_DATETIME)  \
MONTH                                                                 
2023-12   2  2023-12-01 00:00:00.000000  2023-12-04 00:00:00.000000   
2023-11  26         2023-11-01 00:00:00  2023-11-30 00:00:00.000000   
2023-10  22         2023-10-02 00:00:00         2023-10-31 00:00:00   
2023-09  21         2023-09-01 00:00:00         2023-09-29 00:00:00   
2023-08  23         2023-08-01 00:00:00         2023-08-31 00:00:00   

         MIN(CLOSE)  MAX(CLOSE)  
MONTH                            
2023-12  437.289795  438.069397  
2023-11  414.119385  433.976501  
2023-10  405.379913  429.454407  
2023-09  421.415100  436.457703  
2023-08  415.574707  433.315796  


Get last year of data and add indicators

In [7]:
df = sio.get_candles_to_df(con=con_cw8, symbol=symb,timeframe=timeframe, only_close=True,date_start=dt_delta-timedelta(days=365))
df_indic = indic.add_indicators_to_df(con=con_fwk, df_in=df, dts_name=dts_name)
df_indic.sort_index(inplace=True)

df_clean = indic.drop_indicators_not_selected(con=con_fwk, df_in=df_indic, dts_name=dts_name, symbol=symb,label=lab_studied,algo=model_type,organize=True)

df_clean = indic.drop_indicators_by_type(
    con=con_fwk, df_in=df_clean, dts_name=dts_name, symbol=symb, ind_type=2) # drop of labels

df_clean.dropna(axis=0,how='any',inplace=True)

print(f"{df.shape=} {df_indic.shape=} {df_clean.shape=}")

df.shape=(263, 2) df_indic.shape=(263, 48) df_clean.shape=(64, 23)


Normalisation and preparation

In [8]:
scaler = joblib.load(filename=PATH_MODEL + scaler_name)
df_norm= pd.DataFrame(scaler.transform(df_clean), columns=df_clean.columns, index=df_clean.index)

print(df_norm.describe())

x_eval_lstm,df_predict = sm.prepare_sequences_with_df(df_in=df_norm,sequence_length=seq_len)
print(f"{df_norm=} {x_eval_lstm.shape=} {df_predict.shape=}")

       pos_top20  pos_bb20_lo  pos_rsi14_sma20     ret_5d  pos_sma20_50  \
count  64.000000    64.000000        64.000000  64.000000     64.000000   
mean    0.881975    -0.343071        -0.022330   0.186633      0.403394   
std     0.105329     0.175912         0.288317   0.113406      0.085371   
min     0.635901    -0.610885        -0.519386  -0.071388      0.287227   
25%     0.796994    -0.515000        -0.302758   0.113507      0.323878   
50%     0.915744    -0.347621         0.040841   0.195051      0.406853   
75%     0.972515    -0.203910         0.234720   0.258552      0.473186   
max     1.000000    -0.011783         0.451969   0.449114      0.571006   

       pos_top_200  stdev20_sma20  pos_sma200  pos_bot20  pos_bot_200  ...  \
count    64.000000      64.000000   64.000000  64.000000    64.000000  ...   
mean      0.845375      -0.596411    0.251315  -0.773029    -0.505495  ...   
std       0.105081       0.082262    0.084874   0.177318     0.093817  ...   
min       0.

Load model, score and print last N days

In [9]:
saved_model = load_model(PATH_MODEL+file_name)
arr_res = saved_model.predict(x_eval_lstm)

df_predict["predict_score"] = np.concatenate([np.array([np.nan] * (df_predict.shape[0] - len(arr_res))), np.argmax(arr_res, axis=1)]) 

print(df_predict["predict_score"][-20:])

2023-11-13 00:00:00           1.0
2023-11-14 00:00:00           1.0
2023-11-15 00:00:00           1.0
2023-11-16 00:00:00           4.0
2023-11-17 00:00:00           4.0
2023-11-20 00:00:00           0.0
2023-11-21 00:00:00           0.0
2023-11-22 00:00:00           1.0
2023-11-23 00:00:00           3.0
2023-11-24 00:00:00           3.0
2023-11-27 00:00:00           1.0
2023-11-27 00:00:00.000000    1.0
2023-11-28 00:00:00           2.0
2023-11-28 00:00:00.000000    3.0
2023-11-29 00:00:00           3.0
2023-11-29 00:00:00.000000    3.0
2023-11-30 00:00:00           3.0
2023-11-30 00:00:00.000000    3.0
2023-12-01 00:00:00.000000    3.0
2023-12-04 00:00:00.000000    3.0
Name: predict_score, dtype: float64
