In [78]:
import pandas as pd

from copy import deepcopy
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.base import BaseEstimator
from sklearn.preprocessing import StandardScaler

import numpy as np
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from keras.regularizers import l1, l2

import tensorflow as tf
from tensorflow.keras import backend as K

import yfinance as yf

import warnings
warnings.filterwarnings("ignore")


In [79]:
def construct_features_single_asset(df,k,h, linear = False):
    df = df.dropna(how='any',axis=0) 
    df['Cummulative Return'] = (1+ df['Return Daily']).cumprod(axis = 0)
    df['Next H Return'] = df['Cummulative Return'].pct_change(h).shift(-h)
    df['Mean H Return'] = df["Return Daily"].rolling(h).apply(lambda x: x.iloc[range(0,h)].mean()).shift(-h + 1)
    df['Square Sum Return'] = df["Return Daily"].rolling(h).apply(lambda x: x.iloc[range(0,h)].pow(2).sum()).shift(-h + 1)
    df['STD H Return'] = df["Return Daily"].rolling(h).apply(lambda x: x.iloc[range(0,h)].std(ddof = 1)).shift(-h + 1)

    for temp in range(k,0,-1):
        df["Before " + str(temp) + " Day" ] = df['Return Daily'].shift(periods = int(temp))

    if linear == True:
        df['Signal'] = [1 if x > 0 else -1 for x in df['Mean H Return']]

    df = df.dropna(how='any',axis=0)
    df = df[1:]

        

    return df

In [80]:
def sharpe_loss(h):
    def calculation(y_target, y_pred):

        mean = K.reshape(y_target[:, 0], (-1, 1))
        square_sum =  K.reshape(y_target[:, 1], (-1, 1))

        sum_pofolio = mean * h * tf.math.sign(y_pred)
        mean_pofolio = K.mean(mean * y_pred * tf.math.sign(y_pred))

        std_pofolio = tf.math.sqrt(K.mean(square_sum - 2 * sum_pofolio * mean_pofolio + mean_pofolio ** 2)/h)

    
        return  - (mean_pofolio / std_pofolio) *np.sqrt(252)
    
    return calculation



In [98]:
def train_MLP_Loss(data, k, h):

    model = Sequential([
    Dropout(0, input_shape=(k+1,)),
    Dense(20,activation = 'tanh'),
    Dense(1,activation = 'tanh'),
    ])

    model.compile(optimizer='adam', loss=sharpe_loss(h = h))

    company = data.columns
    features = []
    for i in range(k,0,-1):
        features.append("Before " + str(i) + " Day")
    features.append("Return Daily")

    X_train = pd.DataFrame(columns=features)
    y_train = pd.DataFrame(columns=["Mean H Return","Square Sum Return"])
    for oo in company:
        flag_h = 0
        flag_k = k+1
        df = data[[oo]].copy()
        
        df.columns = ["Return Daily"]

        df = construct_features_single_asset(df,k,h,linear = False)
        

        X_train = pd.concat([X_train,df[features]],axis = 0)
        y_train = pd.concat([y_train,df[["Mean H Return","Square Sum Return"]]],axis = 0)

    model.fit(X_train, y_train, epochs=100, batch_size = 128, verbose=1)
    
    return model

In [99]:
model = train_MLP_Loss(pd.read_csv('Data/data_close.csv' , index_col= 'Date'),10,15)


Epoch 1/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: -0.1880
Epoch 2/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: -0.4256
Epoch 3/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: -0.4413
Epoch 4/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: -0.4564
Epoch 5/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: -0.4420
Epoch 6/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: -0.4522
Epoch 7/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: -0.4559
Epoch 8/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: -0.4477
Epoch 9/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: -0.4561
Epoch 10/100
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1

In [100]:
time_range = '6mo' # khoảng thời gian làm backtest 
start_time = '2024-01-01'
end_time = '2024-07-01'

def EU_Stock_data():
    """Lấy dữ liệu giá Close của 50 công ty trên sàn Euro_STOXX 50 vào thời gian cho trước"""

    stock_list = pd.read_html( 'https://en.wikipedia.org/wiki/EURO_STOXX_50')[4]['Ticker'][1:].to_list()
    futures = pd.DataFrame(columns= stock_list) # danh sách mã
    
    # đặt index 
    time_index = list(yf.Ticker(stock_list[0]).history(period = time_range,start = start_time, end = end_time).index) 

    # xét từng mã
    for symbol in stock_list:
        df = yf.Ticker(symbol).history(period = time_range, start = start_time, end = end_time)
        df = pd.DataFrame(df['Close']) # lấy giá close
        i = 0
        daily_return = []
        # tinh daily return, = 0 trong ngày đầu tiên 
        for k in df['Close']:
            if i != 0:
                daily_return.append(float((k-i)/i))
            else:
                daily_return.append(float(0))
            i = k
        try:
            futures[symbol] = daily_return
        except:
            while len(daily_return) < len(futures):
                daily_return.insert(0,np.nan)
            futures[symbol] = daily_return

    futures.index = time_index

    futures['Date'] = pd.to_datetime(futures.index, format='%Y-%m-%d')
    futures.set_index('Date', inplace=True)

    return futures

In [101]:
def test_model_TSMOM(data, model,k,h):

    company = data.columns

    signal = pd.DataFrame(index = data.index, columns= company)

    features = []
    for i in range(k,0,-1):
        features.append("Before " + str(i) + " Day")
    features.append("Return Daily")

    for oo in company:
        df = data[[oo]].copy()
        
        df.columns = ["Return Daily"]
        df = construct_features_single_asset(df,k,h,linear = False)

        
        X_test = df[features]
        X_test['prediction'] = np.sign(model.predict(X_test))
        for x,v in enumerate(X_test.index):
            signal.loc[v,oo] = X_test.loc[v,'prediction']
        
        signal[oo] = signal[oo].ffill()
        signal[oo] = signal[oo].fillna(0)


    return signal

In [102]:
signal = test_model_TSMOM(pd.read_csv('Data/data_close.csv' , index_col= 'Date'),model,10,15)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [103]:
signal.to_csv("test_signal.csv")

In [104]:
def Volatility_scale(data, ignore_na=False, adjust = True, com = 60, min_periods=0):
    """Scale data using ex ante volatility"""

    # Lưu trữ index, tức thời gian 
    std_index = data.index

    # chứa kết quả
    daily_index = pd.DataFrame(index=std_index)

    # xét từng cổ phiếu
    for oo in data.columns:
        returns = data[oo]  # Lấy ra các return
        returns.dropna(inplace=True)  # xử lý null bằng zero

        # Tính cumulative (cum) return , nhưng ko có thành phần - 1
        ret_index = (1 + returns).cumprod()

        # Tính daily volatility (vol)
        day_vol = returns.ewm(ignore_na=ignore_na,
                              adjust=adjust,
                              com=com,
                              min_periods=min_periods).std(bias=False)
        
        vol = day_vol * np.sqrt(252)  # scale lại theo 252 ngày active trading

        # Join cum return và vol
        ret_index = pd.concat([ret_index, vol], axis=1)
        ret_index.columns = [oo, oo + '_Vol']  # Đặt tên cột cum return là tên cổ phiếu, bên cạnh là vol 

        # Join 
        daily_index = pd.concat([daily_index, ret_index], axis=1)

    return daily_index


In [105]:
def backtest(data,signal,k,h,  vol_flag = 1, target_vol = 0.2, ignore_na = False, adjust = True, com = 60, min_periods = 0):
    
    pnl = pd.DataFrame(index=data.index)
    leverage = pd.DataFrame(index = data.index)

    # gọi hàm Volatility scale
    daily_index = Volatility_scale(data,ignore_na=ignore_na,
                          adjust=adjust,
                          com=com,   
                          min_periods = min_periods)

    company = data.columns

    # Volatility settings
    vol_flag = vol_flag    # Set flag to 1 for vol targeting
    if vol_flag == 1:
        target_vol = target_vol 
    else:
        target_vol = 'no target vol'
    

    for oo in company:
        flag_h = 0
        flag_k = k+1
        df = pd.concat([daily_index[oo], daily_index[oo+"_Vol"]], axis=1)

        df['pnl'] = 0. 
        df['leverage'] = 0.
        for x, v in enumerate(df['pnl']):
            if flag_h != 0:
                # Bỏ qua giai đoạn hold, tránh bị tính lặp lại
                flag_h = flag_h - 1
                continue
            # Bỏ qua thời gian cty chưa được lên sàn (nêu có)
            if df[oo].isnull().iloc[x] == False:
                # bỏ qua k ngày đầu vì chưa đủ k lookback
                if flag_k != 0:
                    flag_k = flag_k - 1
                    continue
            else: continue
            try:
                if signal[oo].iloc[x] == -1:
                    for h_period in range(0,h):
                        if vol_flag == 1:
                            df['pnl'].iloc[x + h_period] = (1 - df[oo].iloc[x + h_period] / df[oo].iloc[x - 1 + h_period]) * \
                                target_vol / df[oo+"_Vol"].iloc[x -1] 
                            df['leverage'].iloc[x + h_period] = target_vol / df[oo+"_Vol"].iloc[x -1]
                        else:
                            df['pnl'].iloc[x + h_period] = (1 - df[oo].iloc[x + h_period] / df[oo].iloc[x - 1 + h_period])
                            df['leverage'].iloc[x+h_period] = 1
                elif signal[oo].iloc[x] == 1:
                    for h_period in range(0,h):
                        if vol_flag == 1:
                            df['pnl'].iloc[x + h_period] = (df[oo].iloc[x + h_period] / df[oo].iloc[x - 1 + h_period] - 1) * \
                                    target_vol / df[oo+"_Vol"].iloc[x - 1]
                            df['leverage'].iloc[x+h_period] = target_vol / df[oo+"_Vol"].iloc[x -1]
                        else:
                            df['pnl'].iloc[x + h_period] = (df[oo].iloc[x + h_period] / df[oo].iloc[x - 1 + h_period] - 1)
                            df['leverage'].iloc[x+h_period] = 1
            except:pass
            
            if signal[oo].iloc[x] == 1 or signal[oo].iloc[x] == -1 : flag_h = h - 1


        leverage = pd.concat([leverage, df['leverage']], axis = 1)
        pnl = pd.concat([pnl, df['pnl']], axis=1)

    pnl.columns = data.columns
    leverage.columns = data.columns

    return [pnl,leverage]

In [106]:
df1,df2 = backtest(pd.read_csv('Data/data_close.csv' , index_col= 'Date'), signal, 10,15)

In [107]:
df1

Unnamed: 0_level_0,ADYEN.AS,AD.AS,AI.PA,AIR.PA,ALV.DE,ABI.BR,ASML.AS,CS.PA,BAS.DE,BAYN.DE,...,SGO.PA,SAN.PA,SAP.DE,SU.PA,SIE.DE,STLAM.MI,TTE.PA,DG.PA,UCG.MI,VOW.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-12-31 00:00:00+01:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2020-01-02 00:00:00+01:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2020-01-03 00:00:00+01:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2020-01-06 00:00:00+01:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2020-01-07 00:00:00+01:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-21 00:00:00+01:00,-0.001070,-0.003247,-0.005441,0.004980,-0.003271,-0.008909,0.003614,0.006348,0.000336,0.000965,...,-0.003474,-0.001351,-0.012272,-0.002656,-0.003236,0.000190,-0.002317,0.000240,-0.000151,-0.017147
2023-12-22 00:00:00+01:00,0.000279,-0.005174,0.004704,0.007126,0.005191,0.014786,0.001124,0.004842,0.001174,0.001444,...,0.006976,0.002451,0.005059,0.002389,-0.001969,-0.000759,0.004645,0.008876,0.002412,-0.001030
2023-12-27 00:00:00+01:00,0.002069,-0.003275,-0.004083,-0.000154,-0.004899,-0.001477,0.001459,-0.002516,0.001842,0.012162,...,0.000932,-0.001092,0.012205,0.004306,0.003651,0.000190,0.002619,0.000715,0.000902,-0.000688
2023-12-28 00:00:00+01:00,-0.001735,0.000000,-0.004702,0.002001,-0.005737,0.001109,0.001120,-0.006304,-0.001169,0.000391,...,-0.004122,0.000579,-0.001508,-0.002461,-0.001670,-0.001708,-0.016288,-0.010482,-0.001801,-0.019614


In [109]:
def train_Lasso_Loss(data, k, h,lambda_val = .7):

    model = Sequential([
        Dense(1, input_shape = (k+1,),kernel_regularizer = l1(lambda_val))
    ])

    model.compile(optimizer='adam', loss=sharpe_loss(h = h))

    company = data.columns

    features = []
    for i in range(k,0,-1):
        features.append("Before " + str(i) + " Day")
    features.append("Return Daily")

    X_train = pd.DataFrame(columns=features)
    y_train = pd.DataFrame(columns=["Mean H Return","Square Sum Return"])
    for oo in company:
        flag_h = 0
        flag_k = k+1
        df = data[[oo]].copy()
        
        df.columns = ["Return Daily"]

        df = construct_features_single_asset(df,k,h,linear = False)
        

        X_train = pd.concat([X_train,df[features]],axis = 0)
        y_train = pd.concat([y_train,df[["Mean H Return","Square Sum Return"]]],axis = 0)

    model.fit(X_train, y_train, epochs=100, batch_size = 64, verbose=1)
    
    return model

In [110]:
model = train_Lasso_Loss(pd.read_csv('Data/data_close.csv' , index_col= 'Date'),10,15)


Epoch 1/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 1.6949
Epoch 2/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -0.2861
Epoch 3/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -0.5383
Epoch 4/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: -0.7911
Epoch 5/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -1.0060
Epoch 6/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -1.1770
Epoch 7/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -1.5226
Epoch 8/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -1.6216
Epoch 9/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -1.8368
Epoch 10/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s