In [4]:
# Import WRDS library
import wrds
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import os

In [67]:

def read_sql_script(fname):
    fd = open(fname, 'r')
    sqlFile = fd.read()
    fd.close()

    return sqlFile


# Get current path
this_path = os.path.abspath(os.curdir)
print(this_path)

# Define sql file names
# these will be used as a global variable
query_historical_prices = read_sql_script('/Users/abigail/Desktop/SMU/QF603/get_historical_prices.sql')
query_shares_outstanding = read_sql_script('/Users/abigail/Desktop/SMU/QF603/get_shares_outstanding.sql')
query_historical_one_price = read_sql_script('/Users/abigail/Desktop/SMU/QF603/get_historical_price_1d.sql')


# Establish live connection; requires user login (passwords will be masked)
db = wrds.Connection() # this will be used as a global variable


def get_historical_prices(isin, start_date, end_date):
    
    print(f'Extracting historical prices for {isin}...')

    df =\
    (
        db
        .raw_sql(
            query_historical_prices.format(isin, start_date, end_date), 
            date_cols = ['trade_date']
            )
    )

    if df.empty:
        print('Dataframe is empty. No results was returned!')
    
    print('--------------------------------------------------')

    return df



def get_historical_one_price(isin, date):
    
    print(f'Extracting historical prices for {isin}...')

    df =\
    (
        db
        .raw_sql(
            query_historical_one_price.format(isin, date), 
            date_cols = ['trade_date']
            )
    )

    if df.empty:
        print('Dataframe is empty. No results was returned!')
    
    print('--------------------------------------------------')

    return df

/Users/abigail/Desktop/SMU/QF603
Enter your WRDS username [abigail]:abigailcjh
Enter your password:········
WRDS recommends setting up a .pgpass file.
Create .pgpass file now [y/n]?: y
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [17]:
#first iteration

ftse_rebal = pd.read_csv('ftse100_rebalancing_summary.csv')


ftse_rebal["post_date"] =\
    (
        ftse_rebal["Post Date"]
        .apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))
    )

#pd.to_datetime(series, format=)


ftse_rebal["start_date"] =\
    (
        ftse_rebal["post_date"] - timedelta(days = 7)
    )
    
ftse_rebal["end_date"] =\
    (
        ftse_rebal["post_date"] + timedelta(days = 28)
    )

ftse_rebal["fmt_start_date"] =\
    (
        ftse_rebal["start_date"]
        .apply(lambda x: datetime.strftime(x,"%d/%m/%Y"))
    )
    
ftse_rebal["fmt_end_date"] =\
    (
        ftse_rebal["end_date"]
        .apply(lambda x: datetime.strftime(x,"%d/%m/%Y"))
    )


target_isins = ftse_rebal["ISIN\n"]
start_dates = ftse_rebal["fmt_start_date"]
end_dates = ftse_rebal["fmt_end_date"]   
    
    


historical_prices =\
    (
        pd.
        concat(
            map(
                get_historical_prices,
                target_isins,
                start_dates,
                end_dates
            )
        )
    )

(
        historical_prices
        .to_csv(
            f'/Users/abigail/Desktop/SMU/QF603/output/historical_prices_ftse_rebal_20230923.csv',
            index = False
            )
)

print('===================================================')
    

In [68]:
#second iteration

ftse_full_rebal = pd.read_csv('ftse_largemid_rebalancing_summary.csv')


ftse_full_rebal["post_date"] =\
    (
        ftse_full_rebal["Post Date"]
        .apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))
    )

#pd.to_datetime(series, format=)



ftse_full_rebal["start_date"] =\
    (
        ftse_full_rebal["post_date"] - timedelta(days = 7)
    )
    
ftse_full_rebal["end_date"] =\
    (
        ftse_full_rebal["post_date"] + timedelta(days = 28)
    )

ftse_full_rebal["fmt_start_date"] =\
    (
        ftse_full_rebal["start_date"]
        .apply(lambda x: datetime.strftime(x,"%d/%m/%Y"))
    )
    
ftse_full_rebal["fmt_end_date"] =\
    (
        ftse_full_rebal["end_date"]
        .apply(lambda x: datetime.strftime(x,"%d/%m/%Y"))
    )




target_isins = ftse_full_rebal["ISIN\n"]
start_dates = ftse_full_rebal["fmt_start_date"]
end_dates = ftse_full_rebal["fmt_end_date"]   



historical_prices =\
    (
        pd.
        concat(
            map(
                get_historical_prices,
                target_isins,
                start_dates,
                end_dates
            )
        )
    )

(
        historical_prices
        .to_csv(
            f'/Users/abigail/Desktop/SMU/QF603/output/historical_prices_ftse_full_rebal_20230923.csv',
            index = False
            )
)

print('===================================================')
    

Extracting historical prices for GB0030927254...
--------------------------------------------------
Extracting historical prices for GB00BF4HYT85...
--------------------------------------------------
Extracting historical prices for GB00BMMV6B79...
Dataframe is empty. No results was returned!
--------------------------------------------------
Extracting historical prices for GB00B033F229...
--------------------------------------------------
Extracting historical prices for GB00BDVZYZ77...
--------------------------------------------------
Extracting historical prices for GB0033986497...
--------------------------------------------------
Extracting historical prices for GG00BJL5FH87...
--------------------------------------------------
Extracting historical prices for GB0005800072...
--------------------------------------------------
Extracting historical prices for GB00BDFBVT43...
--------------------------------------------------
Extracting historical prices for GB00BY2Z0H74...
------

  pd.


In [191]:
ftse_rebal_data = historical_prices.reset_index().copy()

rm_isin = ["GB00BMMV6B79", "GB00BDD85M81", "GB0006886666", "GB0007990962", "GB00B8HX8Z88", "GB00BMMV6B79", "GB0005758098"]
lst_index = []
for i in range(len(rm_isin)):
    index = ftse_full_rebal.index[(ftse_full_rebal["ISIN\n"] == rm_isin[i])][0]
    lst_index.append(index)
    
rm_ftse_full_rebal = ftse_full_rebal.drop(lst_index)
rm_ftse_full_rebal = rm_ftse_full_rebal.reset_index()

lst_isin = list(rm_ftse_full_rebal["ISIN\n"])
lst_date = list(rm_ftse_full_rebal["post_date"])

In [194]:
def get_price(df, isin, date):
    pre_five_pd = []
    pre_three_pd = []
    pre_one_pd = []
    post_three_pd = []
    post_five_pd = []
    post_ten_pd = []
    post_twenty_pd = []
    merge_lst = []
    
    for i in range(len(isin)):
        #print(i)
        index = df.index[(df["trade_date"] == date[i]) & (df["isin_code"] == isin[i])][0]
        
        pre_five_pd.append(df["close"].iloc[index - 5])
        pre_three_pd.append(df["close"].iloc[index - 3])
        pre_one_pd.append(df["close"].iloc[index - 1])
        post_three_pd.append(df["close"].iloc[index + 3])
        post_five_pd.append(df["close"].iloc[index + 5])
        post_ten_pd.append(df["close"].iloc[index + 10])
        post_twenty_pd.append(df["close"].iloc[index + 20])
    merge_lst.append(pre_five_pd)
    merge_lst.append(pre_three_pd)
    merge_lst.append(pre_one_pd)
    merge_lst.append(post_three_pd)
    merge_lst.append(post_five_pd)
    merge_lst.append(post_ten_pd)
    merge_lst.append(post_twenty_pd)
    
    return merge_lst

In [200]:
ftse_tidy_data = pd.DataFrame()

ftse_tidy_data["Name"] = rm_ftse_full_rebal["Name"]
ftse_tidy_data["SEDOL"] = rm_ftse_full_rebal["SEDOL\n"]
ftse_tidy_data["ISIN"] = rm_ftse_full_rebal["ISIN\n"]
ftse_tidy_data["FTSE100"] = rm_ftse_full_rebal["FTSE100"]
ftse_tidy_data["FTSE250"] = rm_ftse_full_rebal["FTSE250"]
ftse_tidy_data["post_date"] = rm_ftse_full_rebal["post_date"]

lst_ftse_price = get_price(ftse_rebal_data, lst_isin, lst_date)

In [201]:
ftse_tidy_data["pre_five_pd"] = lst_ftse_price[0]
ftse_tidy_data["pre_three_pd"] = lst_ftse_price[1]
ftse_tidy_data["pre_one_pd"] = lst_ftse_price[2]
ftse_tidy_data["post_three_pd"] = lst_ftse_price[3]
ftse_tidy_data["post_five_pd"] = lst_ftse_price[4]
ftse_tidy_data["post_ten_pd"] = lst_ftse_price[5]
ftse_tidy_data["post_twenty_pd"] = lst_ftse_price[6]


In [203]:
ftse_tidy_data["diff_five_three"] = ftse_tidy_data["post_three_pd"] - ftse_tidy_data["pre_five_pd"]
ftse_tidy_data["diff_five_five"] = ftse_tidy_data["post_five_pd"] - ftse_tidy_data["pre_five_pd"]
ftse_tidy_data["diff_five_ten"] = ftse_tidy_data["post_ten_pd"] - ftse_tidy_data["pre_five_pd"]
ftse_tidy_data["diff_five_twenty"] = ftse_tidy_data["post_twenty_pd"] - ftse_tidy_data["pre_five_pd"]

ftse_tidy_data["diff_three_three"] = ftse_tidy_data["post_three_pd"] - ftse_tidy_data["pre_three_pd"]
ftse_tidy_data["diff_three_five"] = ftse_tidy_data["post_five_pd"] - ftse_tidy_data["pre_three_pd"]
ftse_tidy_data["diff_three_ten"] = ftse_tidy_data["post_ten_pd"] - ftse_tidy_data["pre_three_pd"]
ftse_tidy_data["diff_three_twenty"] = ftse_tidy_data["post_twenty_pd"] - ftse_tidy_data["pre_three_pd"]


ftse_tidy_data["diff_one_three"] = ftse_tidy_data["post_three_pd"] - ftse_tidy_data["pre_one_pd"]
ftse_tidy_data["diff_one_five"] = ftse_tidy_data["post_five_pd"] - ftse_tidy_data["pre_one_pd"]
ftse_tidy_data["diff_one_ten"] = ftse_tidy_data["post_ten_pd"] - ftse_tidy_data["pre_one_pd"]
ftse_tidy_data["diff_one_twenty"] = ftse_tidy_data["post_twenty_pd"] - ftse_tidy_data["pre_one_pd"]



In [204]:
ftse_tidy_data.describe()

Unnamed: 0,FTSE100,FTSE250,post_date,pre_five_pd,pre_three_pd,pre_one_pd,post_three_pd,post_five_pd,post_ten_pd,post_twenty_pd,...,diff_five_ten,diff_five_twenty,diff_three_three,diff_three_five,diff_three_ten,diff_three_twenty,diff_one_three,diff_one_five,diff_one_ten,diff_one_twenty
count,20.0,66.0,66,66.0,66.0,66.0,66.0,66.0,66.0,66.0,...,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0
mean,0.1,0.060606,2022-11-23 02:32:43.636363520,4.93621,5.115649,5.020407,4.922321,4.868374,5.062907,4.894292,...,0.126697,-0.041918,-0.193329,-0.247276,-0.052742,-0.221358,-0.098087,-0.152034,0.0425,-0.126116
min,-1.0,-1.0,2022-06-20 00:00:00,0.2502,0.2514,0.254,0.2826,0.2792,0.275,0.2782,...,-11.94,-26.02,-2.899995,-3.5,-3.0,-26.639996,-3.5,-4.100005,-3.600005,-25.760008
25%,-1.0,-1.0,2022-09-20 00:00:00,1.054,1.14625,1.09475,1.07475,1.0535,1.1415,1.056,...,-0.26125,-0.64275,-0.26225,-0.2505,-0.210001,-0.329,-0.1695,-0.171499,-0.1215,-0.2555
50%,1.0,1.0,2022-09-20 00:00:00,2.0625,2.1025,2.0475,2.0225,1.96,2.0745,2.235,...,-0.03625,-0.0139,-0.04375,-0.0715,-0.05975,-0.085,-0.022,-0.0379,-0.0115,-0.0865
75%,1.0,1.0,2023-03-20 00:00:00,6.172498,5.772498,5.635998,5.487999,5.464999,6.526249,5.427498,...,0.117497,0.96575,-0.00825,-0.002,0.011875,0.094,0.01775,0.024275,0.051875,0.1107
max,1.0,1.0,2023-06-19 00:00:00,38.699996,38.5,39.100005,35.600005,35.0,35.5,35.0,...,16.042005,27.364996,1.029998,0.648001,11.971,27.329996,0.68,0.620999,11.991,27.329996
std,1.020836,1.005811,,6.724902,6.931511,6.849729,6.509971,6.459906,6.471283,6.486611,...,4.069055,6.926537,0.602846,0.682176,1.662924,6.000309,0.492299,0.605325,1.608523,5.899358


In [205]:
ftse_tidy_data.to_csv(
            f'/Users/abigail/Desktop/SMU/QF603/output/ftse_price_data.csv',
            index = False
            )