In [17]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

## Import Data

In [18]:
data = pd.DataFrame(pd.read_csv("prediction_result_3days_200622.csv"))
data.iloc[:,-1] = data.iloc[:,-1].str.replace(',', '').astype(float)
data.columns = ['Ticker','Actual Price', 'Predicted Price', 'MSE', 'MAE', 'R2',
       'Market Share', 'marketcap']
data.head(2)

Unnamed: 0,Ticker,Actual Price,Predicted Price,MSE,MAE,R2,Market Share,marketcap
0,ABDN.L,175.403336,[163.5097],0.077741,0.278821,1,1899229952,333131268597.6992
1,BBOX.L,188.927795,[208.25749],0.279559,0.528733,1,1903740032,359669407279.8203


In [23]:
target_universe = pd.DataFrame(pd.read_excel("target_stock_universe.xlsx", sheet_name = '20220526'))

In [24]:
target_universe['Ticker'] = target_universe['Ticker'].str.split(' ').str[0]
target_universe['Ticker'] = target_universe['Ticker'] + ".L"

In [25]:
target_universe_list = target_universe.iloc[:,0].to_list()

In [26]:
target_universe.head(4)

Unnamed: 0,Ticker,Name,Weight,Shares,Price,Market Cap,SEDOL,ISIN,Index
0,ICP.L,Intermediate Capital Group PLC,0.20684,277.817407,1457.5,4234802176,BYT1DJ1,GB00BYT1DJ19,FTSE100
1,ABDN.L,abrdn plc,0.208414,2146.807357,190.05,4144469248,BF8Q6K6,GB00BF8Q6K64,FTSE100
2,SMDS.L,DS Smith PLC,0.194905,1274.820668,299.3,4118135808,0822011,GB0008220112,FTSE100
3,DPH.L,Dechra Pharmaceuticals PLC,0.186022,104.825618,3474.0,3765449216,0963318,GB0009633180,FTSE100


## List out stocks that get in and out of FTSE100

in total, there are 35 stocks in consideration. The stocks picked are the bottom 10 from FTSE100 and top 25 from FTSE250. Out of the 35 stocks, 2 stocks (HBR LN Equity & HSV LN Equity) has not enough price data, hence removed from consideration. 

In [27]:
merged = data.merge(target_universe, how='left', on='Ticker')

In [55]:
merged = merged.sort_values(by='marketcap', ascending = False).reset_index(drop=True)

ftse_250 = merged[merged["Index"] =='FTSE250']
ftse_100 = merged[merged["Index"] =='FTSE100']

# list of stocks that is going into FTSE100
stocks_into_FTSE100 = ftse_250[ftse_250.index<ftse_100.index[0]]['Ticker'].to_list()

# number of stocks that is going into FTSE100
number_of_stocks = len(stocks_into_FTSE100)

# list of stocks that is going out from FTSE100
stocks_out_from_FTSE100 = ftse_100['Ticker'].tail(number_of_stocks).to_list()

In [56]:
stocks_into_FTSE100 # order from highest market cap

['TUI.L', 'UTG.L', 'CTEC.L']

In [57]:
stocks_out_from_FTSE100 # order from highest market cap

['ABDN.L', 'IDS.L', 'ITV.L']

## import actual result

In [59]:
actual_result = pd.read_csv('ftse100_rebalancing_summary.csv')

## Selecting High Conviction Stocks through Volatility Measurement

In [None]:
from pandas_datareader import data as pdr
import datetime as dt
import yfinance as yf

yf.pdr_override()

stock_ticker = target_universe_list

start = dt.datetime(2017, 5, 25)
end = dt.datetime(2022, 5, 26)
stock_data_raw = pdr.get_data_yahoo(stock_ticker, start = start, end = end)

In [None]:
stock_data = stock_data_raw['Adj Close']

In [None]:
stock_data = stock_data.reset_index()

### To model the volatility, we use EWMA, where the weights assigned decrease exponentially as we move back through time

In [None]:
start_date = '2017-05-25'
end_date = '2022-05-26'
lambda_value = 0.94

filtered_data = stock_data[(stock_data['Date'] >= start_date) & (stock_data['Date'] <= end_date)]

for stock in stock_ticker:
    filtered_data['DailyReturn_' + stock] = filtered_data[stock].pct_change()
    filtered_data['EWMA_' + stock] = filtered_data['DailyReturn_' + stock].ewm(span=int(1 / (1 - lambda_value))).mean()

In [None]:
volatility_data = []
for stock in stock_ticker:
    column_name = 'EWMA_' + stock
    stock_volatility = filtered_data[column_name].std()
    volatility_data.append({'Ticker': stock, 'Volatility': stock_volatility})

volatility_df = pd.DataFrame(volatility_data).sort_values(by='Volatility', ascending = False)
volatility_df

In [None]:
start_date = '2022-05-19'
end_date = '2022-05-26'
lambda_value = 0.94

filtered_data = stock_data[(stock_data['Date'] >= start_date) & (stock_data['Date'] <= end_date)]

for stock in stock_ticker:
    filtered_data['DailyReturn_' + stock] = filtered_data[stock].pct_change()
    filtered_data['EWMA_' + stock] = filtered_data['DailyReturn_' + stock].ewm(span=int(1 / (1 - lambda_value))).mean()
    
volatility_data = []
for stock in stock_ticker:
    column_name = 'EWMA_' + stock
    stock_volatility = filtered_data[column_name].std()
    volatility_data.append({'Ticker': stock, 'Volatility': stock_volatility})

volatility_df = pd.DataFrame(volatility_data).sort_values(by='Volatility', ascending = False)
volatility_df