In [1]:
# Import important libraries

# Basic computation packages
import numpy as np
import pandas as pd
from datetime import datetime

# Plot packages
import matplotlib.pyplot as plt
import seaborn as sns

# Statistical package
from scipy import stats

# Format precision
np.set_printoptions(precision = 3)

In [2]:
# lstm price prediction
lstm_1 = pd.read_csv('../output/prediction_result_3days_20220531.csv', index_col = 0)
lstm_2 = pd.read_csv('../output/prediction_result_3days_20220825.csv', index_col = 0)
lstm_3 = pd.read_csv('../output/prediction_result_3days_20221124.csv', index_col = 0)
lstm_4 = pd.read_csv('../output/prediction_result_3days_20230223.csv', index_col = 0)
lstm_5 = pd.read_csv('../output/prediction_result_3days_20230525.csv', index_col = 0)
lstm_6 = pd.read_csv('../output/prediction_result_3days_20230822.csv', index_col = 0)


lstm = [lstm_1, lstm_2, lstm_3, lstm_4, lstm_5, lstm_6]
dates = ['20220524', '20220823', '20221122', '20230221', '20230523', '20230822']

In [17]:
def clean_data(df, date_reqd):

    df_copy = df.loc[:, ['Predicted Price']].copy()

    def get_threshold(sub_df):
        sub_df_copy = sub_df[sub_df['Market Cap\n']!= '--'].copy()
        sub_df_copy['Market Cap\n'] = sub_df_copy['Market Cap\n'].astype(float)
        sub_df_copy = sub_df_copy.sort_values(by = 'Market Cap\n')
        threshold = sub_df_copy.sort_values(by='Market Cap\n').iloc[9]['Market Cap\n']

        return threshold


    def clean_ref_data(sub_df, idx):
        sub_df = sub_df.loc[:,['Price', 'Market Cap\n', 'ISIN\n']]
        sub_df = sub_df[sub_df['Market Cap\n']!= '--']
        sub_df['Market Cap\n'] = sub_df['Market Cap\n'].astype(float)
        sub_df['Shares Outstanding'] = sub_df['Market Cap\n']/sub_df['Price']
        sub_df['Source'] = idx
        
        return sub_df


    ukx = pd.read_excel('../FTSE/UKX_pre-review.xlsx', sheet_name = date_reqd, index_col='Ticker')
    mcx = pd.read_excel('../FTSE/MCX_pre-review.xlsx', sheet_name = date_reqd, index_col='Ticker')
    threshold = get_threshold(ukx)

    ukx = clean_ref_data(ukx, 'FTSE100')
    mcx = clean_ref_data(mcx, 'FTSE250')

    ref = pd.concat([ukx,mcx])

    df_copy = df_copy.merge(ref[['ISIN\n', 'Shares Outstanding', 'Source']], how = 'left', left_index=True, right_index=True)
    df_copy['Market Cap'] = df_copy['Predicted Price'] * df_copy['Shares Outstanding']
    #df_copy['Threshold'] = threshold

    return df_copy

    

def identify_trade_pairs(prediction, invalid_tickers, target = 2):
    prediction = prediction[~prediction.index.isin(invalid_tickers)]
    ftse100 = prediction[prediction['Source'] == 'FTSE100'].sort_values(by = 'Market Cap')
    ftse250 = prediction[prediction['Source'] == 'FTSE250'].sort_values(by = 'Market Cap', ascending = False)
    target_ftse250 = ftse250.iloc[:target]
    target_ftse100 = ftse100.iloc[:target]
    return pd.concat([target_ftse250, target_ftse100])
    




In [23]:
invalid_tickers = ['CCL LN Equity', 'TUI LN Equity', 'INVP LN Equity', 'BBOX LN Equity', 'EZJ LN Equity']

all_target_stocks = []

for price_prediction, date in zip(lstm, dates):
    predict_df = clean_data(lstm_1, date)
    stocks_to_trade = identify_trade_pairs(predict_df, invalid_tickers, target = 2)
    datestring = datetime.strptime(date, '%Y%m%d')
    stocks_to_trade['Period'] = datestring.strftime('%Y-%m')
    all_target_stocks.append(stocks_to_trade)

df_target_trades = pd.concat(all_target_stocks)
df_target_trades

Unnamed: 0_level_0,Predicted Price,ISIN\n,Shares Outstanding,Source,Market Cap,Period
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CNA LN Equity,85.310745,GB00B033F229,59074520.0,FTSE250,5039691000.0,2022-05
UTG LN Equity,984.9353,GB0006928617,4000766.0,FTSE250,3940495000.0,2022-05
IDS LN Equity,276.65097,GB00BDVZYZ77,9561935.0,FTSE100,2645318000.0,2022-05
ITV LN Equity,66.964294,GB0033986497,40254090.0,FTSE100,2695587000.0,2022-05
WEIR LN Equity,1492.0137,GB0009465807,2596120.0,FTSE250,3873447000.0,2022-08
CTEC LN Equity,172.95068,GB00BD3VFW73,20417650.0,FTSE250,3531246000.0,2022-08
ABDN LN Equity,163.5097,GB00BF8Q6K64,21577420.0,FTSE100,3528118000.0,2022-08
HWDN LN Equity,637.03265,GB0005576813,5562366.0,FTSE100,3543409000.0,2022-08
WEIR LN Equity,1492.0137,GB0009465807,2596120.0,FTSE250,3873447000.0,2022-11
HIK LN Equity,1627.581,GB00B0LCW083,2202290.0,FTSE250,3584405000.0,2022-11


In [5]:
stocks_to_trade

Unnamed: 0_level_0,Predicted Price,ISIN\n,Shares Outstanding,Source,Market Cap,Period
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
FRAS LN Equity,648.7346,GB00B1QH8P22,4571277.0,FTSE100,2965546000.0,2023-08-22
BEZ LN Equity,459.9702,GB00BYQ0JC66,6723118.0,FTSE100,3092434000.0,2023-08-22
