In [1]:
from C4I import benchmarks, clustering, evaluation, investment, risk_factors_nnpca_super_daily_only, utils
import pandas as pd
import numpy as np
import pickle

Source


In [2]:
def An_Unsupervised_Learning_Framework(X, n_pcs, len_fold, n_win,temp_path=None,
                                scaler_pre='MinMax', coll_threshold=0.5, SEED=1,
                                lasso_fold=5, min_rfs=2, max_rfs=4, n_round=3,
                                scaler='Standard',
                                opt_hyper_port={'maxiter':200, 'disp':True},
                                port_select=3, verbose=True):
    '''
    INPUT:
        X: DataFrame, contains assets daily prices
        n_pcs: number of Principal Components to consider
        len_fold: int, length of each fold
        n_win: int, number of windows for the pooled regression
        temp_path: None or str, path for temp savings. If None, there are no
            temp savings. Default=None
        scaler_pre: str representing the scaler to use in preprocessing, either
            'Standard' or 'MinMax'. Other values will result in no scaling.
            Default='Standard'
        coll_threshold: float, correlation threshold for collinearity filter
        SEED: int, seed to set random values
        lasso_fold: int, number of folds for the cross validation. Default = 5
        min_rfs: int, minimum number of risk factors to be saved. Default = 2
        max_rfs: int, maximum number of risk factors to be saved. Default = 4
        n_round: int, number of decimals considered (if <=0, no round).Default=3
        scaler: str representing the scaler to use, either 'Standard' or
            'MinMax'. Other values will result in no scaling. Default='Standard'
        opt_hyper_port: dcit, hyperparameters for the minimizer, containing two
            keys: 'maxiter' and 'disp'. Default={'maxiter':200, 'disp':True}
        port_select: int, number of portfolios to select. Default=3
        verbose: bool, manages the verbosity. Default=True
    OUTPUT:
        target_portfolios: list, containing weights of the optimal portfolio
    '''
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    
    #----- From prices to returns
    X.index = pd.to_datetime(X.index)
    # Monthly returns
    XM = utils.DailyPrice2MonthlyReturn(X)
    # Daily returns
    XD = utils.DailyPrice2DailyReturn(X)
    # Scale data
    if scaler_pre == 'Standard':
        XD = pd.DataFrame( StandardScaler().fit_transform(XD.values),
                          index = XD.index, columns = XD.columns )
        XM = pd.DataFrame( StandardScaler().fit_transform(XM.values),
                          index = XM.index, columns = XM.columns )
    elif scaler_pre == 'MinMax':
        XD = pd.DataFrame( MinMaxScaler().fit_transform(XD.values),
                          index = XD.index, columns = XD.columns )
        XM = pd.DataFrame( MinMaxScaler().fit_transform(XM.values),
                          index = XM.index, columns = XM.columns )
    
    #----- Create Risk Factors
    if temp_path != None:
        try:
            PCs = pd.read_csv(f'{temp_path}/RiskFactors.csv', index_col=0)
            PCs.index = pd.to_datetime(PCs.index)
        except:
            PCs = risk_factors_nnpca_super_daily_only.CreateRiskFactors([XD, XM], n_pcs,
                                                 gran_names=['Daily','Monthly'],
                                                 threshold=coll_threshold,
                                                 SEED=SEED)
            PCs.to_csv(f'{temp_path}/RiskFactors.csv')
    else:
        PCs = risk_factors_nnpca_super_daily_only.CreateRiskFactors([XD, XM], n_pcs,
                                             gran_names=['Daily', 'Monthly'],
                                             threshold=coll_threshold,SEED=SEED)
    if verbose:
        print('Risk Factors successfully extracted!\n')

    #----- Apply Features Selection
    #Check if there are previous savings
    if temp_path != None:
        try:
            with open(f'{temp_path}/Saved_RFs.pickle','rb') as f:
                saved_rf = pickle.load(f)
        except:
            saved_rf = dict()
    else:
        saved_rf = dict()
    #Go on with features selection
    Grid = {'alpha':np.logspace(-11, -2, num=25, base=np.e),
        'tau':np.linspace(0.5, 1.5, 11)}
    
    cnt = 0
    for col in XD.columns[len(saved_rf.keys()):]:
        cnt += 1
        if verbose:
            print(f'Optimizing hyperparameters for asset {col}')
            print(f'Optimizing hyperparemeters for asset num {cnt} remaining {len(XD.columns[len(saved_rf.keys()):])}')

        flag, res = clustering.A_Lasso_Grid_Search(series=XD[col],
                                                   Exog=PCs.values,
                                                   grid=Grid,
                                                   n_fold=lasso_fold,
                                                   len_fold=len_fold,
                                                   min_rfs=min_rfs,
                                                   max_rfs=max_rfs,
                                                   scaler=scaler)
        #If the result is positive, store the hyperparameters
        if flag:
            saved_rf[col] = res
            if verbose:
                print(f'Optimization result: {res}')
        #Otherwise, make a new, more accurate search
        else:
            if res == -1:
                temp_Grid = {'alpha':np.logspace(-16, -11,num=25,base=np.e),
                            'tau':np.linspace(0.5, 1.5, 11)}
            elif res == 1:
                temp_Grid = {'alpha':np.logspace(-3, 3, num=25, base=np.e),
                            'tau':np.linspace(0.5, 1.5, 11)}
            else:
                temp_Grid = {'alpha':np.logspace(-11, -2,num=40,base=np.e),
                            'tau':np.linspace(0.5, 2, 20)}
            flag, res = clustering.A_Lasso_Grid_Search(series=XD[col],
                                                       Exog=PCs.values,
                                                       grid=temp_Grid,
                                                       n_fold=lasso_fold,
                                                       len_fold=len_fold,
                                                       min_rfs=min_rfs,
                                                       max_rfs=max_rfs,
                                                       scaler=scaler)
            if flag:
                saved_rf[col] = res
                if verbose:
                    print(f'Optimization result: {res}')
        if temp_path != None:
            with open(f'{temp_path}/Saved_RFs.pickle','wb') as f:
                pickle.dump(saved_rf, f)

    #----- Create Clustering
    #Create clusters
    clusters = clustering.Clustering(saved_rf)
    if verbose:
        print('Total clusters:')
        for n_clust, cluster in enumerate(clusters):
            print(f'Cluster {n_clust}, Risk factors: {cluster[0]},\
            Cluster Dimension: {len(cluster[1])}')
            print(f'Assets in the cluster: {cluster[1]}')
    #Save useful clusters
    clusters = clustering.UsefulClusters(clusters, saved_rf)
    if verbose:
        print('\nUseful clusters:')
        for n_clust, cluster in enumerate(clusters):
            print(f'Cluster {n_clust}, Risk factors: {cluster[0]}')
            print(f'Assets in the cluster: {cluster[1]}')

    #----- Investment Strategy
    target_portfolios = investment.Create_Portfolios(XD=XD, Exog=PCs.values,
                                        clusters=clusters,
                                        target_function=utils.Expected_Sharpe,
                                        windows_number=n_win,
                                        scaler=scaler, opt_hyper=opt_hyper_port,
                                        port_to_select=port_select)
    #Adjust portfolios weight to obtain one output portfolio
    for port in target_portfolios:
        port[1] = port[1]/port_select
    return target_portfolios


Modified code

In [3]:
import matplotlib.pyplot as plt
import pandas as pd 

path = 'data'
base_line_data = 'Dataset'
X = pd.read_csv(f'{path}/close_price.csv')
X['Date'] = X['Date'].apply(lambda x: pd.to_datetime(x).date())

X.set_index('Date', inplace=True)
X.index = pd.to_datetime(X.index)
print(X.index)
print(X.head())
X.dropna(axis=1, how='any', inplace=True)
X = X.iloc[:, :100]
print(X.head())
test_start = '2022-12-21'
XD = utils.DailyPrice2DailyReturn(X)

DatetimeIndex(['2016-01-04', '2016-01-05', '2016-01-06', '2016-01-07',
               '2016-01-08', '2016-01-11', '2016-01-12', '2016-01-13',
               '2016-01-14', '2016-01-15',
               ...
               '2023-08-17', '2023-08-18', '2023-08-21', '2023-08-22',
               '2023-08-23', '2023-08-24', '2023-08-25', '2023-08-28',
               '2023-08-29', '2023-08-30'],
              dtype='datetime64[ns]', name='Date', length=1928, freq=None)
                   MMM        AOS        ABT       ABBV       ABMD        ACN  \
Date                                                                            
2016-01-04  113.048523  33.202068  37.160885  41.186237  85.239998  89.974754   
2016-01-05  113.541328  33.016586  37.152229  41.014664  85.000000  90.443031   
2016-01-06  111.254501  32.362957  36.840607  41.021812  85.300003  90.266327   
2016-01-07  108.544174  30.781895  35.957676  40.900269  81.919998  87.615585   
2016-01-08  108.174561  29.686659  35.204590  39.

In [4]:
len(XD.columns)

100

In [5]:
test_start_12fold = ['2022-09-01', 
                     '2022-10-01', 
                     '2022-11-01', 
                     '2022-12-01', 
                     '2023-01-01', 
                     '2023-02-01', 
                     '2023-03-01', 
                     '2023-04-01', 
                     '2023-05-01', 
                     '2023-06-01', 
                     '2023-07-01', 
                     '2023-08-01',
                     '2023-09-01']
                     

Training and Testing

In [6]:
portfolios = []
for i in range (12):
    print("fold: ", i)
    X_test = XD[ XD.index >= test_start_12fold[i]]
    X_test_each_fold = X_test[ X_test.index < test_start_12fold[i + 1]]
    X_each_fold = X[ X.index < test_start_12fold[i] ]
    #Obtain portfolio value in the test set
    target_portfolio = An_Unsupervised_Learning_Framework(X_each_fold, n_pcs=7,
                                                            len_fold=42, n_win=50)
    portfolios.append(target_portfolio)
                                              

fold:  0
Risk Factors successfully extracted!

Optimizing hyperparameters for asset MMM
Optimizing hyperparemeters for asset num 1 remaining 100


  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [1, 5]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [1, 5]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [5, 6]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [5, 6]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [1, 4, 5, 6]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [1, 3, 5]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [1, 3, 4, 5]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [1, 3, 5]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters for asse

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [2, 5, 6]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [2, 6]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [2, 6]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [2, 6]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [2, 6]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [2, 5, 6]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [2, 6]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [2, 5, 6]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters for asset num 9 r

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [1, 2, 4, 5]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [4, 5]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [2, 4]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [2, 4]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [2, 4]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [1, 4]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [1, 5]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [1, 2, 4]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters for asset num 9 r

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [2, 4, 5, 6]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [2, 4]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [4, 6]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [2, 4, 5, 6]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [4, 6]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [2, 4, 5, 6]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [2, 4, 6]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [2, 4, 5, 6]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [5, 6]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [1, 2, 5, 6]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [1, 2, 5, 6]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [0, 2, 5, 6]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [2, 5, 6]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [2, 5]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [0, 1, 6]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [1, 2, 5]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [0, 3, 4, 5]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [0, 3, 4, 5]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [2, 3, 4, 5]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [3, 4]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [0, 2, 3, 4]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [2, 4, 5]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [0, 3]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [2, 4]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [3, 5, 6]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [5, 6]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [5, 6]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [3, 5, 6]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [1, 5, 6]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [5, 6]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [1, 3, 5, 6]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [5, 6]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters for asset n

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [0, 5]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [0, 5]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [0, 5]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [0, 5]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [0, 5]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [0, 5]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [0, 5]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [0, 5]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters for asset num 9 remaining 

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [3, 4, 5]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [3, 4, 5, 6]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [3, 6]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [3, 4, 5, 6]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [3, 4, 5, 6]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [0, 4, 5, 6]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [3, 4, 5]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [5, 6]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemet

  c /= stddev[:, None]
  c /= stddev[None, :]


Optimization result: [3, 5]
Optimizing hyperparameters for asset AOS
Optimizing hyperparemeters for asset num 2 remaining 99
Optimization result: [3, 4, 5]
Optimizing hyperparameters for asset ABT
Optimizing hyperparemeters for asset num 3 remaining 98
Optimization result: [3, 4, 5, 6]
Optimizing hyperparameters for asset ABBV
Optimizing hyperparemeters for asset num 4 remaining 97
Optimization result: [3, 4, 5, 6]
Optimizing hyperparameters for asset ACN
Optimizing hyperparemeters for asset num 5 remaining 96
Optimization result: [4, 5, 6]
Optimizing hyperparameters for asset ATVI
Optimizing hyperparemeters for asset num 6 remaining 95
Optimization result: [5, 6]
Optimizing hyperparameters for asset ADM
Optimizing hyperparemeters for asset num 7 remaining 94
Optimization result: [3, 5]
Optimizing hyperparameters for asset ADBE
Optimizing hyperparemeters for asset num 8 remaining 93
Optimization result: [3, 4, 5]
Optimizing hyperparameters for asset AAP
Optimizing hyperparemeters for a

In [7]:
portfolios

[[[['AAL', 'AJG', 'BWA', 'CPB', 'CHD'],
   array([-7.01574537e-02, -8.26071795e-02,  1.04742637e-01,  7.58260625e-02,
           9.78511140e-10])],
  [['LNT', 'AEE', 'AEP', 'ATO', 'AVB', 'CNP'],
   array([ 3.47753368e-05,  7.69854603e-02,  6.84866241e-03, -1.53348381e-01,
           2.28614133e-08,  9.61160314e-02])],
  [['ALK', 'AXP', 'AIG', 'BK', 'BXP', 'COF', 'CCL'],
   array([-2.02586066e-08, -3.80045009e-09,  1.73278263e-01, -1.13619119e-01,
           7.15684821e-09,  6.37987711e-03, -4.00560973e-02])]],
 [[['AAL', 'ABC', 'BBWI', 'BK', 'BA', 'BMY', 'COF', 'CCL', 'CAT', 'SCHW'],
   array([-4.81029242e-09, -3.96207221e-06, -1.09368975e-06,  3.36247080e-04,
          -3.81123978e-04,  1.11718173e-01, -1.41765345e-01, -4.66634569e-04,
           7.86038509e-02,  5.69200872e-05])]],
 [[['AFL', 'ALK', 'AMCR', 'AXP', 'AIG', 'BKR', 'BAC', 'BA', 'BWA', 'COF'],
   array([-1.80948560e-03, -2.97726694e-04,  3.57142210e-07, -7.99972234e-02,
           1.47164647e-06, -6.94326427e-03, -2.99156

In [21]:
 #Evaluate results
reduce_earning = 0.9
port_test_12fold = list()
for i in range (12):
    X_test = XD[ XD.index >= test_start_12fold[i]]
    X_test_each_fold = X_test[ X_test.index < test_start_12fold[i + 1]]
    # port_test = investment.InvestmnetWithEarningFilter(X_test_each_fold, target_portfolio, earnings_by_tickers, pre_period_by_ticker, post_period_by_ticker, 
    #                                                    initial_capital=1000, reduce_earning=reduce_earning)
    port_test = investment.Investment(X_test_each_fold, target_portfolio, initial_capital=1000)
    print('\nProposed strategy for ' + str(i + 1) + ' fold')
    port_test_12fold.append(port_test)
    evaluation.Strategy_Evaluation(port_test)
    print('\n')


Proposed strategy for 1 fold
Percentage Profit: 0.547%
Profit Factor: 1.857
Percentage Drawdown: 0.19%
Recovery Factor: 2.862
Sharpe Ratio: 3.916
Sortino Ratio: 6.115



Proposed strategy for 2 fold
Percentage Profit: -0.256%
Profit Factor: 0.845
Percentage Drawdown: 0.618%
Recovery Factor: -0.413
Sharpe Ratio: -0.933
Sortino Ratio: -2.028



Proposed strategy for 3 fold
Percentage Profit: -1.671%
Profit Factor: 0.482
Percentage Drawdown: 2.479%
Recovery Factor: -0.668
Sharpe Ratio: -3.352
Sortino Ratio: -3.16



Proposed strategy for 4 fold
Percentage Profit: -0.238%
Profit Factor: 0.77
Percentage Drawdown: 0.553%
Recovery Factor: -0.43
Sharpe Ratio: -1.403
Sortino Ratio: -1.96



Proposed strategy for 5 fold
Percentage Profit: -0.151%
Profit Factor: 0.81
Percentage Drawdown: 0.393%
Recovery Factor: -0.384
Sharpe Ratio: -1.178
Sortino Ratio: -1.443



Proposed strategy for 6 fold
Percentage Profit: -0.178%
Profit Factor: 0.883
Percentage Drawdown: 0.978%
Recovery Factor: -0.182
Sharp

In [22]:
with open('./results/benchmark/nnpca_100_port_test_daily.txt', 'w') as file:
    for item in port_test_12fold:
        file.write(str(item)+"\n")
    file.close()

In [23]:
port_test

Date
2023-08-01    1000.000000
2023-08-02     998.807210
2023-08-03    1000.507272
2023-08-04     997.783584
2023-08-07     996.661803
2023-08-08     994.794263
2023-08-09     995.428973
2023-08-10     994.343699
2023-08-11     996.257694
2023-08-14     993.968105
2023-08-15     992.677443
2023-08-16     993.326465
2023-08-17     995.022025
2023-08-18     994.805199
2023-08-21     993.406179
2023-08-22     994.896633
2023-08-23     994.301098
2023-08-24     995.600637
2023-08-25     991.153024
2023-08-28     990.488118
2023-08-29     990.509695
2023-08-30     989.767859
Name: Portfolio, dtype: float64

In [14]:
## Earnings calendars
earnings_by_tickers = {}
earnings_file_path = '../data/earnings_data.txt'
with open(earnings_file_path, 'r') as file:
    for line in file:
        ticker = line.split(':')[0]
        dates = line.split(':')[1].strip().split(',')
        earnings_by_tickers[ticker] = dates
        
optimized_earnings_file_path = './data/earnings_period.txt'
pre_period_by_ticker = {}
post_period_by_ticker = {}
with open(optimized_earnings_file_path, 'r') as fr:
    for line in fr:
        parts = line.split(',')
        ticker = parts[0]
        pre = float(parts[1])
        post = float(parts[2])
        pre_period_by_ticker[ticker] = pre
        post_period_by_ticker[ticker] = post

In [15]:
 #Evaluate results
for reduce in range(1, 20):
    reduce_earning = reduce / 10
    port_test_12fold = list()
    print('reduce earning: ', reduce_earning)
    for i in range (12):
        X_test = XD[ XD.index >= test_start_12fold[i]]
        X_test_each_fold = X_test[ X_test.index < test_start_12fold[i + 1]]
        port_test = investment.InvestmnetWithEarningFilter(X_test_each_fold, target_portfolio, earnings_by_tickers, pre_period_by_ticker, post_period_by_ticker, 
                                                        initial_capital=1000, reduce_earning=reduce_earning)
        # port_test = investment.Investment(X_test_each_fold, target_portfolio, initial_capital=1000)
        print('\nProposed strategy with filter for ' + str(i + 1) + ' fold')
        port_test_12fold.append(port_test)
        evaluation.Strategy_Evaluation(port_test)
        print('\n')

    with open(f'../results/earning_filter/benchmark_with_earning_filter_{reduce_earning}_100_port_test.txt', 'w') as file:
        for item in port_test_12fold:
            file.write(str(item)+"\n")
        file.close()

reduce earning:  0.1


AttributeError: module 'C4I.investment' has no attribute 'InvestmnetWithEarningFilter'

In [None]:
#Evaluate benchmark
min_test_12fold = list()
for i in range (12):
    X_test = XD[ XD.index >= test_start_12fold[i]]
    X_test_each_fold = X_test[ X_test.index < test_start_12fold[i + 1]]
    X_each_fold = X[ X.index < test_start_12fold[i]]
    XD_each_fold = utils.DailyPrice2DailyReturn(X_each_fold)
    min_test = benchmarks.Minimal_Variance_Portfolio(XD_each_fold, X_test_each_fold)
    print('Minimal Variance Portfolio for ' + str(i + 1) + ' fold')
    evaluation.Strategy_Evaluation(min_test)
    min_test_12fold.append(min_test)
    evaluation.Strategy_Evaluation(min_test)
    print('\n')

In [None]:
file = open('./results/benchmark/min_100_test_model_and_statistics_last.txt','w')
for item in min_test_12fold:
    file.write(str(item)+"\n")
file.close()

In [None]:
mean_var_test_12fold = list()
for i in range (12):
    print('Start New')
    X_test = XD[ XD.index >= test_start_12fold[i]]
    X_test_each_fold = X_test[ X_test.index < test_start_12fold[i + 1]]
    X_each_fold = X[ X.index < test_start_12fold[i]]
    XD_each_fold = utils.DailyPrice2DailyReturn(X_each_fold)
    print('Start optimization')
    mean_var_test = benchmarks.Mean_Variance_Portfolio(XD_each_fold, X_test_each_fold)
    print('Mean-Variance Portfolio for ' + str(i + 1) + ' fold')
    mean_var_test_12fold.append(mean_var_test)
    print('\n')

In [None]:
file = open('./results/benchmark/mean_100_test_model_and_statistics_last.txt','w')
for item in mean_var_test_12fold:
    file.write(str(item)+"\n")
file.close()

In [None]:
proposed = pd.read_csv('./results/benchmark/benchmark_100_port_test.txt', header = None, sep = '\t')
i = 0
skip_rows = list()
for item in proposed[0]:
    if proposed[0][i] == "Name: Portfolio, dtype: float64" or proposed[0][i] == "Date":
        skip_rows.append(i)
    i = i + 1
proposed_deleted = pd.read_csv('proposed_model_and_statistics.txt', header = None, sep = '    ', skiprows = skip_rows)
proposed_deleted[0] = pd.to_datetime(proposed_deleted[0])
proposed_deleted.columns =['Date', 'Return_proposed']
proposed_deleted = proposed_deleted.set_index('Date')
proposed_12_fold_read = list()
for i in range(12):
    month = i + 9
    if month > 12:
        month = month - 12
        
    temp = proposed_deleted.loc[
         proposed_deleted.index.month == month
    ]
    series = temp.iloc[:,0]
    temp = pd.Series(series, index = temp.index) 
    proposed_12_fold_read.append(temp)

In [None]:
mean = pd.read_csv('./results/benchmark/mean_100_test_model_and_statistics_last.txt', header = None, sep = '\t')
i = 0
skip_rows = list()
for item in mean[0]:
    if mean[0][i] == "Name: Mean-Variance, dtype: float64" or mean[0][i] == "Date":
        skip_rows.append(i)
    i = i + 1
mean_deleted = pd.read_csv('mean_var_test_model_and_statistics.txt', header = None, sep = '    ', skiprows = skip_rows)
mean_deleted[0] = pd.to_datetime(mean_deleted[0])
mean_deleted.columns =['Date', 'Return_mean']
mean_deleted = mean_deleted.set_index('Date')
mean_12_fold_read = list()
for i in range(12):
    month = i + 9
    if month > 12:
        month = month - 12
        
    temp = mean_deleted.loc[
         mean_deleted.index.month == month
    ]
    series = temp.iloc[:,0]
    temp = pd.Series(series, index = temp.index) 
    mean_12_fold_read.append(temp)

In [None]:
min = pd.read_csv('./results/benchmark/min_100_test_model_and_statistics_last.txt', header = None, sep = '\t')
i = 0
skip_rows = list()
for item in min[0]:
    if min[0][i] == "Name: Minimal Variance, dtype: float64" or min[0][i] == "Date":
        skip_rows.append(i)
    i = i + 1
min_deleted = pd.read_csv('min_test_model_and_statistics.txt', header = None, sep = '    ', skiprows = skip_rows)
min_deleted[0] = pd.to_datetime(min_deleted[0])
min_deleted.columns =['Date', 'Return_min']
min_deleted = min_deleted.set_index('Date')
min_12_fold_read = list()
for i in range(12):
    month = i + 9
    if month > 12:
        month = month - 12
        
    temp = min_deleted.loc[
         min_deleted.index.month == month
    ]
    Series = temp.iloc[:,0]
    temp = pd.Series(Series, index = temp.index) 
    min_12_fold_read.append(temp)

In [None]:
#Plot the results
for i in range (12):
    proposed_12_fold_read[i].plot(figsize=(8,3))
    mean_12_fold_read[i].plot(figsize=(8,3))
    min_12_fold_read[i].plot(figsize=(8,3))
    plt.grid()
    plt.legend()
    plt.show()

In [None]:
for i in range (12):
    print("fold " + str(i + 1))
    print("\n Proposed method \n")
    evaluation.Strategy_Evaluation(proposed_12_fold_read[i])
    print("\n Mean Variance method \n")
    evaluation.Strategy_Evaluation(mean_12_fold_read[i])
    print("\n Min Variance method \n")
    evaluation.Strategy_Evaluation(min_12_fold_read[i])
    print("\n")
    print("\n")

Compare bench mark with earnings

In [None]:

 #Evaluate results
# all_proposed = []
# for reduce in range(1, 20):
#     reduce_earning = reduce / 10
#     port_test_12fold = list()

reduce_earning = 1.0
proposed = pd.read_csv(f'./results/earning_filter/benchmark_with_earning_filter_{reduce_earning}_100_port_test.txt', header = None, sep = '\t')
i = 0
skip_rows = list()
for item in proposed[0]:
    if proposed[0][i] == "Name: Portfolio, dtype: float64" or proposed[0][i] == "Date":
        skip_rows.append(i)
    i = i + 1
proposed_deleted = pd.read_csv('proposed_model_and_statistics.txt', header = None, sep = '    ', skiprows = skip_rows)
proposed_deleted[0] = pd.to_datetime(proposed_deleted[0])
proposed_deleted.columns =['Date', 'Return_proposed']
proposed_deleted = proposed_deleted.set_index('Date')
proposed_12_fold_read = list()
for i in range(12):
    month = i + 9
    if month > 12:
        month = month - 12
        
    temp = proposed_deleted.loc[
        proposed_deleted.index.month == month
    ]
    series = temp.iloc[:,0]
    temp = pd.Series(series, index = temp.index) 
    proposed_12_fold_read.append(temp)
# all_proposed.append(proposed_12_fold_read)

In [None]:
print(all_proposed)

In [None]:
#Plot the results
for i in range (12):
    proposed_12_fold_read[i].plot(figsize=(8,3))
    mean_12_fold_read[i].plot(figsize=(8,3))
    min_12_fold_read[i].plot(figsize=(8,3))
    plt.grid()
    plt.legend()
    plt.show()