In [1]:
import os
import pandas as pd
from frame import Frame
from testing import z_score_trading
from google.cloud import storage
from google.cloud import bigquery
from data_query import fetch_NASDAQ100_index
from data_query import fetch_NASDAQ100_all_components
from data_query import fetch_SP500_index
from data_query import fetch_SP500_all_components
from data_query import fetch_ftse100_index
from data_query import fetch_ftse100_all_components
from PCA_function import rolling_pca_weights
from preprocessing import preprocessing_X
from sklearn.decomposition import PCA
#
#-----Pulling data from Big Query
#
index_selected='ftse'
#
if index_selected=='nasdaq':
    target_df= fetch_NASDAQ100_index()
    underlying_df=fetch_NASDAQ100_all_components()

if index_selected=='sp500':
    target_df= fetch_SP500_index()
    underlying_df=fetch_SP500_all_components()

if index_selected=='ftse':
    target_df= fetch_ftse100_index()
    underlying_df=fetch_ftse100_all_components()
#
#-----pre-processing the components
processed_df=preprocessing_X(underlying_df)
#
#-----PCA function
#
# Define input variables
X_log=processed_df
n_stocks = 30               # number of stocks used for the replication
window = 30                 # period the trading strat goes
n_pcs = 3                   # number of eigenvectors
#
# Get weights
rep_pf = rolling_pca_weights(X_log, n_stocks, window, n_pcs)


pca_weights_df=rep_pf
#underlying_df=stock_price
#target_df=target_close_price
cal_days=60                 # number of days for the z score
trade_days=30               # maximum number of trading days
thresholds=[2,200,-2,-200]  # thresholds for trading signals
                            # [short minimum threshold, short maximum threshold, long minimum threshold, long maximum threshold]
exit_levels=[0.5,-0.5]      # thresholds for closing a trade
                            # [exit level long position, exit level short position]
exit_thresholds=[0,0]

In [2]:
import pandas as pd
import numpy as np

In [3]:
temp_df=pd.DataFrame(pca_weights_df.iloc[:,0])
temp_df.columns=['temp']
stock_aligned=pd.concat([temp_df,underlying_df], join='inner', axis=1)
column_to_delete=stock_aligned.columns[0]
stock_aligned.drop(column_to_delete, inplace=True, axis=1)

temp_df=pd.DataFrame(stock_aligned.iloc[:,0])
temp_df.columns=['temp']
weight_aligned=pd.concat([temp_df,pca_weights_df], join='inner', axis=1)
column_to_delete=weight_aligned.columns[0]
weight_aligned.drop(column_to_delete, inplace=True, axis=1)

temp_df=pd.DataFrame(stock_aligned.iloc[:,0])
temp_df.columns=['temp']
target_df.set_index("date",inplace=True)
target_aligned=pd.concat([temp_df,target_df], join='inner', axis=1)
column_to_delete=target_aligned.columns[0]
target_aligned.drop(column_to_delete, inplace=True, axis=1)
#

replication_aligned = pd.DataFrame(weight_aligned.to_numpy() * target_aligned.to_numpy())
weight_position = pd.DataFrame(replication_aligned.to_numpy()/stock_aligned.to_numpy())

#reset of all indices
weight_aligned=weight_aligned.reset_index()
target_aligned=target_aligned.reset_index()
stock_aligned=stock_aligned.reset_index()

test_weight__aligned=weight_aligned.drop('date', axis=1)
test_target__aligned=target_aligned.drop('date', axis=1)
test_stock_aligned=stock_aligned.drop('date', axis=1)

# Calculating replication portfolio for cal_days+trade_days days based on Date PCA

replications_df=pd.DataFrame(columns=range(cal_days+trade_days), dtype=float)
replications_df

for i,r in weight_position.iterrows():
    if i>cal_days:
        combined = r.to_numpy() * test_stock_aligned[i-cal_days-1:min(i+trade_days-1, len(weight_position))].to_numpy()
        replication_index=pd.DataFrame(combined.sum(axis=1)).reset_index(drop=True)
        replications_df=pd.concat([replications_df,replication_index.T], axis=0)

replications_df.index=weight_position.index[cal_days+1:]
replications_df.columns=[f'Calibration Day {i-cal_days}' if i < (cal_days) else f'Trading Day {i-cal_days+1}' for i in range(0, cal_days+trade_days,1)]
replications_df=replications_df.astype(float)

# #Calculating target for cal_days+trade_days days from Date
target_match_df=pd.DataFrame(columns=range(cal_days+trade_days,1))

for i,r in weight_position.reset_index().iterrows():
    if i > cal_days:
        target_match=pd.DataFrame(target_aligned.iloc[i-cal_days-1:min(i+trade_days-1, len(weight_position)),1].reset_index(drop=True))
        target_match_df=pd.concat([target_match_df,target_match.T], axis=0)

target_match_df.index=weight_position.index[cal_days+1:]
target_match_df.columns=[f'Calibration Day {i-cal_days}' if i < (cal_days) else f'Trading Day {i-cal_days+1}' for i in range(0, cal_days+trade_days,1)]
target_match_df=target_match_df.astype(float)

#log_return calculation of the target and replication
#
target_log_returns=np.log(target_match_df/target_match_df.shift(1, axis=1))
replications_log_returns=np.log(replications_df/replications_df.shift(1, axis=1))

#calculating difference in log returns over the last 60 days to today
#
spread_past_df=pd.DataFrame(target_log_returns.iloc[:,1:cal_days+1].values-replications_log_returns.iloc[:,1:cal_days+1].values, index=target_log_returns.index)
spread_mean=spread_past_df.mean(axis=1)
spread_vol=spread_past_df.std(axis=1)

#todays spread's z-score
#
z_scores_df=pd.DataFrame((spread_past_df.iloc[:,-1]-spread_mean)/spread_vol, columns=['z_score'])

#checking the trading signal against thresholds
#
pos_low_threshold=thresholds[0]
pos_high_threshold=thresholds[1]
neg_low_threshold=thresholds[3]
neg_high_threshold=thresholds[2]
#
for i, r in z_scores_df.iterrows():
    if r['z_score'] > pos_low_threshold:
        if r['z_score'] <pos_high_threshold:
            z_scores_df.loc[i, 'direction']=-1.0
        else:
            z_scores_df.loc[i,'direction']=0.0
    elif r['z_score'] > neg_low_threshold:
        if r['z_score']<neg_high_threshold:
            z_scores_df.loc[i, 'direction']=1.0
        else: z_scores_df.loc[i, 'direction']=0.0
    else: z_scores_df.loc[i, 'direction']=0.0
#
#spread from today to the end
#
spread_df=pd.DataFrame(target_log_returns.iloc[:,cal_days+1:].values-replications_log_returns.iloc[:,cal_days+1:].values, index=target_log_returns.index)
#
#
z_scores_df['target entry']=target_match_df['Trading Day 1']
z_scores_df['replication entry']=replications_df['Trading Day 1']
#
for day in np.arange(0,trade_days-1, 1):
    single_score_df=pd.DataFrame(((spread_df.iloc[:,day]-spread_mean)/spread_vol), columns=[f'Day {day+1}'])
    z_scores_df=pd.concat((z_scores_df,single_score_df), axis=1)
#
dynamic=True
#
for i, row in z_scores_df.iterrows():
    #test_date=pd.to_datetime(start_date).strftime('%Y-%m-%d')
    #
    #default setting to maximum trade days
    exit_day=trade_days-1

    #if the setting is dynamic looping through the z scores to identify when the position would have been closed
    if dynamic:
        if row['direction']==1:
            for day in range(2, trade_days-1):
                if row[f'Day {day}'] > exit_thresholds[0]:
                    exit_day=day
                    break
        if row['direction']==-1:
            for day in range(2, trade_days-1):
                if row[f'Day {day}'] < exit_thresholds[1]:
                    exit_day=day
                    break

    z_scores_df.loc[i, 'replication exit']=replications_df.loc[i, f'Trading Day {exit_day}']
    z_scores_df.loc[i, 'target exit']=target_match_df.loc[i, f'Trading Day {exit_day}']
    if z_scores_df.loc[i, 'direction']!=0:
        z_scores_df.loc[i, 'exit day']=exit_day
    else:
        z_scores_df.loc[i, 'exit day']=0

z_scores_df['target return']=np.log(z_scores_df['target exit']/z_scores_df['target entry'])
z_scores_df['replication return']=np.log(z_scores_df['replication exit']/z_scores_df['replication entry'])

z_scores_df['date']=weight_aligned['date']
z_scores_df=z_scores_df.set_index('date', drop=True)


In [None]:
z_scores_df

In [None]:
alt_return_df=z_scores_df['direction']

In [None]:
bt_res

In [4]:
bt_res=z_scores_df.copy()

In [5]:
alt_return_df=pd.DataFrame(bt_res['direction'])

In [6]:
alt_return_df=alt_return_df.join(bt_res[['target entry']])

In [7]:
alt_return_df['target entry']

date
2022-06-16    7273.399902
2022-06-17    7045.000000
2022-06-20    7016.299805
2022-06-21    7121.799805
2022-06-22    7152.100098
                 ...     
2025-03-04    8871.299805
2025-03-05    8759.000000
2025-03-06    8755.799805
2025-03-07    8682.799805
2025-03-10    8679.900391
Name: target entry, Length: 694, dtype: float64

In [8]:
alt_return_df['daily target return']=np.log(alt_return_df['target entry']/alt_return_df['target entry'].shift(1))

In [9]:
alt_return_df

Unnamed: 0_level_0,direction,target entry,daily target return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-06-16,0.0,7273.399902,
2022-06-17,0.0,7045.000000,-0.031906
2022-06-20,0.0,7016.299805,-0.004082
2022-06-21,0.0,7121.799805,0.014924
2022-06-22,0.0,7152.100098,0.004246
...,...,...,...
2025-03-04,1.0,8871.299805,0.006968
2025-03-05,-1.0,8759.000000,-0.012740
2025-03-06,0.0,8755.799805,-0.000365
2025-03-07,1.0,8682.799805,-0.008372


In [10]:
 alt_return_df['excess return']=alt_return_df['direction']*(z_scores_df['target return']-z_scores_df['replication return'])

In [11]:
alt_return_df

Unnamed: 0_level_0,direction,target entry,daily target return,excess return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-16,0.0,7273.399902,,-0.000000
2022-06-17,0.0,7045.000000,-0.031906,-0.000000
2022-06-20,0.0,7016.299805,-0.004082,-0.000000
2022-06-21,0.0,7121.799805,0.014924,0.000000
2022-06-22,0.0,7152.100098,0.004246,-0.000000
...,...,...,...,...
2025-03-04,1.0,8871.299805,0.006968,-0.006462
2025-03-05,-1.0,8759.000000,-0.012740,0.012017
2025-03-06,0.0,8755.799805,-0.000365,
2025-03-07,1.0,8682.799805,-0.008372,


In [17]:
alt_return_df['strategy']=alt_return_df['target entry']

In [18]:
import math

In [29]:
for i in range(1, len(alt_return_df)):
    print('i', i)
    if alt_return_df.iloc[i]['direction']==0:
        print(alt_return_df.iloc[i]['daily target return'])
        alt_return_df.loc[i]['strategy']=alt_return_df.iloc[i-1]['strategy']*math.exp(alt_return_df.loc[i]['daily target return'])
    else:
        alt_return_df.loc[i]['strategy']=alt_return_df.iloc[i-1]['strategy']*math.exp(alt_return_df.loc[i]['excess return'])
    break

i 1
-0.03190569877325256


KeyError: 1

In [31]:
alt_return_df

Unnamed: 0_level_0,direction,target entry,daily target return,excess return,strategy
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-06-16,0.0,7273.399902,,-0.000000,7273.399902
2022-06-17,0.0,7045.000000,-0.031906,-0.000000,7045.000000
2022-06-20,0.0,7016.299805,-0.004082,-0.000000,7016.299805
2022-06-21,0.0,7121.799805,0.014924,0.000000,7121.799805
2022-06-22,0.0,7152.100098,0.004246,-0.000000,7152.100098
...,...,...,...,...,...
2025-03-04,1.0,8871.299805,0.006968,-0.006462,8871.299805
2025-03-05,-1.0,8759.000000,-0.012740,0.012017,8759.000000
2025-03-06,0.0,8755.799805,-0.000365,,8755.799805
2025-03-07,1.0,8682.799805,-0.008372,,8682.799805


In [34]:
for i in range(1, len(alt_return_df)):
    print('i', i)

    if alt_return_df.iloc[i]['direction'] == 0:
        alt_return_df.loc[i, 'strategy'] = alt_return_df.loc[i-1, 'strategy'] * math.exp(alt_return_df.loc[i, 'daily target return'])
    else:
        alt_return_df.loc[i, 'strategy'] = alt_return_df.loc[i-1, 'strategy'] * math.exp(alt_return_df.loc[i, 'excess return'])


i 1
i 2
i 3
i 4
i 5
i 6
i 7
i 8
i 9
i 10
i 11
i 12
i 13
i 14
i 15
i 16
i 17
i 18
i 19
i 20
i 21
i 22
i 23
i 24
i 25
i 26
i 27
i 28
i 29
i 30
i 31
i 32
i 33
i 34
i 35
i 36
i 37
i 38
i 39
i 40
i 41
i 42
i 43
i 44
i 45
i 46
i 47
i 48
i 49
i 50
i 51
i 52
i 53
i 54
i 55
i 56
i 57
i 58
i 59
i 60
i 61
i 62
i 63
i 64
i 65
i 66
i 67
i 68
i 69
i 70
i 71
i 72
i 73
i 74
i 75
i 76
i 77
i 78
i 79
i 80
i 81
i 82
i 83
i 84
i 85
i 86
i 87
i 88
i 89
i 90
i 91
i 92
i 93
i 94
i 95
i 96
i 97
i 98
i 99
i 100
i 101
i 102
i 103
i 104
i 105
i 106
i 107
i 108
i 109
i 110
i 111
i 112
i 113
i 114
i 115
i 116
i 117
i 118
i 119
i 120
i 121
i 122
i 123
i 124
i 125
i 126
i 127
i 128
i 129
i 130
i 131
i 132
i 133
i 134
i 135
i 136
i 137
i 138
i 139
i 140
i 141
i 142
i 143
i 144
i 145
i 146
i 147
i 148
i 149
i 150
i 151
i 152
i 153
i 154
i 155
i 156
i 157
i 158
i 159
i 160
i 161
i 162
i 163
i 164
i 165
i 166
i 167
i 168
i 169
i 170
i 171
i 172
i 173
i 174
i 175
i 176
i 177
i 178
i 179
i 180
i 181
i 182
i 183
i 184
i 18

In [32]:
alt_return_df=alt_return_df.reset_index()

In [35]:
alt_return_df

Unnamed: 0,date,direction,target entry,daily target return,excess return,strategy
0,2022-06-16,0.0,7273.399902,,-0.000000,7273.399902
1,2022-06-17,0.0,7045.000000,-0.031906,-0.000000,7045.000000
2,2022-06-20,0.0,7016.299805,-0.004082,-0.000000,7016.299805
3,2022-06-21,0.0,7121.799805,0.014924,0.000000,7121.799805
4,2022-06-22,0.0,7152.100098,0.004246,-0.000000,7152.100098
...,...,...,...,...,...,...
689,2025-03-04,1.0,8871.299805,0.006968,-0.006462,8618.650883
690,2025-03-05,-1.0,8759.000000,-0.012740,0.012017,8722.849159
691,2025-03-06,0.0,8755.799805,-0.000365,,8719.662172
692,2025-03-07,1.0,8682.799805,-0.008372,,
