In [1]:
import pandas as pd
from sqlalchemy import create_engine


In [2]:
def select_historical_model_data(strat):
    """_summary_
    Select raw historical data based on ticker ID
    
    Args:
        stratagy (int): specify strategy based on strategy
        ticker_label (string): specify ticker label to run strategy
        
    Returns:
        pd.Dataframe: raw historical data
    """
    if strat == 1:
        table_name1 = 'historical_data_30_table'
        table_name2 = 'psar_macd_ema_table'
        query = f"""
            SELECT 
                h.datetime, h.ticker_id, h.open, 
                h.high, h.low, h.close,
                h.volume, s.psarl, s.psars,
                s.psaraf, s.psarr, s.macd,
                s.macdh, s.macds, s.ema_200
            FROM 
                {table_name1} AS h
            LEFT JOIN 
                {table_name2} AS s
            ON 
                h.datetime = s.datetime AND
                h.ticker_id = s.ticker_id
            ORDER BY 
                h.datetime;
        """
    elif strat == 2:
        table_name1 = 'historical_data_15_table'
        table_name2 = 'st_rsi_ema_table'
        query = f"""
            SELECT 
                h.datetime, h.ticker_id, h.open, 
                h.high, h.low, h.close,
                h.volume, s.supert, s.supertd, s.supertl,
                s.superts, s.rsi_14, s.rsi_14_a_70, s.rsi_14_b_30,
                s.ema_200
            FROM 
                {table_name1} AS h
            LEFT JOIN 
                {table_name2} AS s
            ON 
                h.datetime = s.datetime AND
                h.ticker_id = s.ticker_id
            ORDER BY 
                h.datetime;
        """
    elif strat == 4:
        table_name1 = 'historical_data_15_table'
        table_name2 = 'hoffman_table'
        query = f"""
            SELECT 
                h.datetime, h.ticker_id, h.open, 
                h.high, h.low, h.close,
                h.volume, s.sma_5, s.ema_18, s.ema_20, 
                s.sma_50, s.sma_89, s.ema_144, s.ema_35, 
                s.ku, s.a, s.b, s.c, s.rv, s.y, s.x, s.sl,
                s.ss
            FROM 
                {table_name1} AS h
            LEFT JOIN 
                {table_name2} AS s
            ON 
                h.datetime = s.datetime AND
                h.ticker_id = s.ticker_id
            ORDER BY 
                h.datetime;
        """

    # open connection
    myeng = create_engine("postgresql://%s:%s@%s:%s/stock_crypto" % (DB_USERNAME, DB_PWD, DB_HOSTNAME, DB_PORT_ID))
    dbConnection = myeng.connect()

    # get raw historical data table as pandas df
    exsisting_df = pd.read_sql(query, dbConnection)
    dbConnection.close()
    # close connection
    return exsisting_df

In [3]:
df_30_strat1 = select_historical_model_data(1)
df_15_strat2 = select_historical_model_data(2)
df_15_strat4 = select_historical_model_data(4)


In [4]:
df_15_strat2

Unnamed: 0,datetime,ticker_id,open,high,low,close,volume,supert,supertd,supertl,superts,rsi_14,rsi_14_a_70,rsi_14_b_30,ema_200
0,2022-03-15 17:00:00+00:00,BTCUSDT,3.928033e+04,3.943000e+04,3.920545e+04,3.935601e+04,4.612325e+02,,,,,,,,
1,2022-03-15 17:00:00+00:00,VETUSDT,4.586000e-02,4.604000e-02,4.575000e-02,4.600000e-02,6.449062e+06,,,,,,,,
2,2022-03-15 17:00:00+00:00,ONEUSDT,1.240300e-01,1.249700e-01,1.237100e-01,1.249400e-01,9.499131e+05,,,,,,,,
3,2022-03-15 17:00:00+00:00,ADAUSDT,8.000000e-01,8.020000e-01,7.980000e-01,8.010000e-01,9.833043e+05,,,,,,,,
4,2022-03-15 17:15:00+00:00,BTCUSDT,3.935601e+04,3.935601e+04,3.916000e+04,3.928670e+04,4.151408e+02,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62860,2022-06-07 14:00:00+00:00,LTCBTC,2.074000e-03,2.074000e-03,2.074000e-03,2.074000e-03,9.239000e+00,,,,,,,,
62861,2022-06-07 14:00:00+00:00,VETBTC,9.900000e-07,9.900000e-07,9.900000e-07,9.900000e-07,1.021030e+05,,,,,,,,
62862,2022-06-07 14:00:00+00:00,HBARBTC,2.960000e-06,2.960000e-06,2.960000e-06,2.960000e-06,0.000000e+00,,,,,,,,
62863,2022-06-07 14:00:00+00:00,ONEBTC,1.290000e-06,1.290000e-06,1.290000e-06,1.290000e-06,0.000000e+00,,,,,,,,


# EDA CLEANING

In [5]:
df_30_strat1.fillna(0, inplace=True)
df_15_strat2.fillna(0, inplace=True)
df_15_strat4.fillna(0, inplace=True)

# To CSV

In [6]:
# df_30_strat1.to_csv('../Models/Data/df_30_strat1.csv', index=False)
# df_15_strat2.to_csv('../Models/Data/df_15_strat2.csv', index=False)
# df_15_strat4.to_csv('../Models/Data/df_15_strat4.csv', index=False)