In [1]:
import os 
import pickle
import json
import pandas as pd
import numpy as np

from utils.optimize import test_and_save_xgb
from data.data_utils import get_vn30f, add_features, add_finance_features


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def test(data, cwd, top_10_features_per_cluster, selected_columns_cluster, best_params_list):
    ### Testing and saving
    if 'Unnamed: 0' in data.columns:
        drop_list = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'Unnamed: 0']
    else:
        drop_list = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return']
    df = test_and_save_xgb(data, cwd, top_10_features_per_cluster, selected_columns_cluster, best_params_list, drop_list)
    return df

In [3]:
### Prepare variables
cwd = f'{os.getcwd()}\\' + 'model\XGB\day\\' # Save path

with open(cwd + 'top_10_list.pkl', 'rb') as f:
    selected_columns_cluster = pickle.load(f)
with open(cwd + 'top_10_features_per_cluster.pkl', 'rb') as f:
    top_10_features_per_cluster = pickle.load(f)
with open(cwd + 'best_params_list.pkl', 'rb') as f:
    best_params_list = pickle.load(f)

In [4]:
resolution = '1D'
start_time='2024-08-31'
now_time = '2024-10-31'
symbol='VN30F1M'
new_data = get_vn30f(symbol, resolution, start_time, now_time)

===> Data VN30F1M from 2024-08-30 17:00:00 to 2024-10-30 17:00:00 has been appended 


In [5]:
new_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2024-09-04 09:00:00,1320.8,1321.5,1307.3,1314.0,174577
1,2024-09-05 09:00:00,1316.1,1318.6,1304.7,1307.7,168132
2,2024-09-06 09:00:00,1307.6,1311.3,1305.6,1309.0,150932
3,2024-09-09 09:00:00,1299.2,1308.9,1298.6,1307.7,154452
4,2024-09-10 09:00:00,1310.8,1313.6,1289.4,1294.1,201089


In [6]:
# Load the JSON parameter files
with open('data/momentum_params.json', 'r') as f:
    momentum_params = json.load(f)

with open('data/volume_params.json', 'r') as f:
    volume_params = json.load(f)

with open('data/volatility_params.json', 'r') as f:
    volatility_params = json.load(f)
 
with open('data/trend_params.json', 'r') as f:
    trend_params = json.load(f)

params = [momentum_params, volume_params, volatility_params, trend_params]

In [7]:
ti_features = add_features(new_data, params, "day", drop=False)

Applying ADX to day dataset...
Applying Aroon Indicator to day dataset...
Applying CCI to day dataset...
Applying DPO to day dataset...
Applying EMA to day dataset...
Applying SMA to day dataset...
Applying PSAR to day dataset...
Applying TRIX to day dataset...
Applying Ichimoku to day dataset...
Applying Vortex Indicator to day dataset...
Applying KST to day dataset...
Applying Mass Index to day dataset...
Applying WMA to day dataset...
Applying Accumulation/Distribution Index (ADI) to day dataset...
Applying Chaikin Money Flow (CMF) to day dataset...
Applying Ease of Movement (EoM) to day dataset...
Applying Force Index (FI) to day dataset...
Applying Money Flow Index (MFI) to day dataset...
Applying Negative Volume Index (NVI) to day dataset...
Applying On-Balance Volume (OBV) to day dataset...
Applying Volume-Price Trend (VPT) to day dataset...
Applying Volume Weighted Average Price (VWAP) to day dataset...
Applying Average True Range (ATR) to day dataset...
Applying Bollinger Band

  data_model = data.pivot(index = 'Date', columns = 'time', values = ['Open','High','Low','Close','Volume']).ffill(axis = 1).stack().reset_index() # Handling missing values


In [8]:
financial_statements = pd.read_csv('financial_indicators.csv')
# Rename column 'period' in financial_statements to match 'quarter_label'
financial_statements = financial_statements.rename(columns={'period': 'quarter_label'})
financial_statements['quarter_label'] = financial_statements['quarter_label'].astype(str)

In [9]:
data_day_combined = add_finance_features(ti_features, financial_statements)

  merged_df = merged_df.fillna(method='ffill')


In [10]:
data_day_combined =  data_day_combined.fillna(0)
try:
    data_day_combined['Unnamed: 0'] = pd.to_datetime(data_day_combined['Date'] + ' ' + data_day_combined['time'])
    data_day_combined = data_day_combined.drop(columns=['Date', 'time'])
except:
    data_day_combined = data_day_combined.drop(columns=['Date', 'time'])

In [11]:
data_day_combined.head()

Unnamed: 0,Open,High,Low,Close,Volume,ADX_window_14_0,ADX_window_14_1,ADX_window_14_2,ADX_window_20_0,ADX_window_20_1,...,from_financial,from_sale,invest_cost,debt_to_equity_ratio,net_profit_margin,operating_margin,asset_turnover_ratio,cash_ratio,earnings_yield,roce
2024-09-04,1320.8,1321.5,1307.3,1314.0,174577.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-09-05,1316.1,1318.6,1304.7,1307.7,168132.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-09-06,1307.6,1311.3,1305.6,1309.0,150932.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-09-09,1299.2,1308.9,1298.6,1307.7,154452.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-09-10,1310.8,1313.6,1289.4,1294.1,201089.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
df = test(data_day_combined, cwd, top_10_features_per_cluster, selected_columns_cluster, best_params_list)

Start                     2024-10-14 00:00:00
End                       2024-10-30 00:00:00
Duration                     16 days 00:00:00
Exposure Time [%]                   84.615385
Equity Final [$]                  1026.830902
Equity Peak [$]                        1057.0
Return [%]                            2.68309
Buy & Hold Return [%]                -1.25092
Return (Ann.) [%]                   79.320355
Volatility (Ann.) [%]               24.265354
Sharpe Ratio                         3.268873
Sortino Ratio                       10.189138
Calmar Ratio                         2.754179
Max. Drawdown [%]                   -2.724693
Avg. Drawdown [%]                   -1.779076
Max. Drawdown Duration        6 days 00:00:00
Avg. Drawdown Duration        5 days 00:00:00
# Trades                                    3
Win Rate [%]                        66.666667
Best Trade [%]                       0.842572
Worst Trade [%]                     -0.317179
Avg. Trade [%]                    

  return bound(*args, **kwds)
  super().__init__(*args, cash=cash, margin=margin, **kwargs)
  return bound(*args, **kwds)
  super().__init__(*args, cash=cash, margin=margin, **kwargs)


In [14]:
df

Unnamed: 0,Top 10 Feature,Best params,Best sharpe,Return (Ann.) [%],Volatility
0,9 11 17 21 23 26 28 0 8 10,"{'max_depth': 6, 'learning_rate': 0.0889274058...",3.268873,79.320355,24.265354
1,4 9 12 13 14 16 18 19 20 21,"{'max_depth': 8, 'learning_rate': 0.0247068046...",-0.95117,-21.063546,22.14487
