In [None]:
# Import libraries
import pandas as pd 
import numpy as np

## Import and Sort FX Data

In [None]:
# Read in foreign exchange data
fx = pd.read_csv('fxrates.csv', index_col='datadate')

# Set index of forex data to datetime type
fx.index = pd.to_datetime(fx.index)

## Import and Sort Fundamentals Data

In [None]:
# Get CSV of Global quarterly fundamentals
fund = pd.read_csv('Global_Fund.csv', index_col= 'datadate')

In [None]:
# Calculate other fundamentals
fund['niq'] = fund['piq'] - fund['txtq'] # Net income
fund['ebitda'] = fund['oiadpq'] + fund['dpq'] # EBITDA
fund['fcfq'] = (fund['oancfy'] - fund['capxy']) / 4 # Free cash flow
fund['capxq'] = fund['capxy'] / 4 # Capital expenditure
fund['oancfq'] = fund['oancfy'] / 4 # Operating cash flow
fund['fincfq'] = fund['fincfy'] / 4 # Financing cash flow
fund['ivncfq'] = fund['ivncfy'] / 4 # Investing cash flow

# Return ratios
fund['roa'] = fund['niq'] / fund['atq'] # Return on assets
fund['roe'] = fund['niq'] / fund['teqq'] # Return on equity
fund['roi'] = (fund['niq'] + fund['xintq']) / (fund['atq'] - fund['lctq']) # Return on investment

# Profit ratios
fund['gpm'] = fund['gpq'] / fund['revtq'] # Gross profit margin
fund['opm'] = fund['oiadpq'] / fund['revtq'] # Operating profit margin
fund['npm'] = fund['niq'] / fund['revtq'] # Net profit margin
                                               
# Liquidity ratios                                               
fund['cr'] = fund['actq'] / fund['lctq'] # Current ratio
fund['qr'] = (fund['actq'] - fund['invtq']) / fund['lctq'] # Quick ratio
                                               
# Solvency ratios
fund['de'] = fund['ltq'] / fund['teqq'] # Debt to equity ratio
fund['icr'] = fund['oiadpq'] / fund['xintq'] # Interest coverage ratio

# Efficiency ratios
fund['atr'] = fund['revtq'] / fund['atq'] # Asset turnover ratio
fund['itr'] = fund['cogsq'] / fund['invtq'] # Inventory turnover ratio

# Operational gearing proxy
fund['og'] = (fund['dpq'] + fund['xintq']) / (fund['cogsq'] + fund['dpq'] + fund['xintq'])

# Growth metrics
fund['rg'] = fund['revtq'].pct_change() # Revenue growth
fund['eag'] = fund['niq'].pct_change() # Earnings growth
fund['ag'] = fund['atq'].pct_change() # Asset growth
fund['eqg'] = fund['teqq'].pct_change() # Equity growth

# Quality of earnings metrics
fund['acr'] = (fund['niq'] - fund['oancfq']) / fund['atq'] # Accruals ratio
fund['cftnir'] = fund['oancfq'] / fund['niq'] # Cashflow to net income ratio

In [None]:
# Obtain EM data
fund = fund[['conm','sedol','fic','curcdq',
       'atq','apq','ltq','niq','oiadpq','revtq',
       'chq','dpq','gpq','invtq','rectq','req',
       'saleq','teqq','xintq','ebitda','fcfq','capxq',
       'fincfq','ivncfq','oancfq','lctq','actq','cogsq',
       'roa','roe','roi','gpm','opm','npm','cr','qr','de',
       'icr','atr','itr','og','rg','eag','ag','eqg','acr','cftnir']]

EM_countries = ['BRA', 'CHL', 'CHN', 'COL', 'CZE', 'EGY', 'GRC', 'HUN', 'IND', 'IDN', 'KOR', 'KWT', 'MYS', 'MEX', 'PER', 'PHL', 'POL', 'QAT', 'SAU', 
               'ZAF', 'TWN', 'THA', 'TUR', 'ARE']

fund = fund[fund['fic'].isin(EM_countries)]

fund.to_csv('EM_fund.csv',index=True)

In [None]:
# Load fundamentals data for EM
fund = pd.read_csv('EM_Fund.csv', index_col= 'datadate')

# Make index into datetime type
fund.index = pd.to_datetime(fund.index)

# Sort the index
fund = fund.sort_index()

### Convert currency for fundamental data

In [None]:
# Ensure the date columns are of datetime type
fund.index = pd.to_datetime(fund.index)
fx.index = pd.to_datetime(fx.index)

# Get unique lists of financial data dates and forex dates
fund_dates = fund.index.unique().tolist()
fx_dates = fx.index.unique().tolist()

# Find the forex date closest (but not after) to each financial data date
date_mapping = {d: max([i for i in fx_dates if i <= d]) for d in fund_dates if len([i for i in fx_dates if i <= d]) > 0}

# Add a column to the financial data for the mapped forex dates
fund['fx_date'] = fund.index.map(date_mapping)

In [None]:
# Create a key to join fx and fund
fx['key'] = fx.index.astype(str) + fx['curd']
fund['key'] = fund['fx_date'].astype(str) + fund['curcdq']

# Merge the forex rates with the financial data
merged_fund = pd.merge(fund, fx, left_on='key', right_on='key', how='left')

# Drop rows where exchange rate isn't available
merged_fund = merged_fund[merged_fund['exratd_toUSD'].notna()]

# Set exchange rate to 1 where currency is USD
merged_fund.loc[merged_fund['curcdq'] == 'USD', 'exratd_toUSD'] = 1

In [None]:
# Define the columns that need to be converted to USD
financial_columns = ['atq', 'apq', 'ltq', 'niq', 'oiadpq', 'revtq', 'chq', 'dpq', 'gpq', 'invtq', 'rectq', 'req',
                     'saleq', 'teqq', 'xintq', 'ebitda', 'fcfq', 'capxq', 'fincfq', 'ivncfq', 'oancfq', 'lctq', 'actq', 'cogsq']

# Convert the financial columns to USD using the exchange rate
for col in financial_columns:
    merged_fund[col] = merged_fund[col] * merged_fund['exratd_toUSD']

# Drop unwanted columns
merged_fund = merged_fund[['conm', 'sedol', 'fic', 'curcdq', 'atq', 'apq', 'ltq', 'niq', 'oiadpq', 'revtq',
                           'chq', 'dpq', 'gpq', 'invtq', 'rectq', 'req', 'saleq', 'teqq', 'xintq', 'ebitda', 'fcfq', 'capxq',
                           'fincfq', 'ivncfq', 'oancfq', 'lctq', 'actq', 'cogsq', 'roa', 'roe', 'roi', 'gpm', 'opm', 'npm', 'cr', 'qr', 'de',
                           'icr', 'atr', 'itr', 'og', 'rg', 'eag', 'ag', 'eqg', 'acr', 'cftnir']]

# Ensure the index is consistent 
merged_fund['datadate'] = fund.index[:len(merged_fund)]

# Set index
merged_fund.set_index('datadate', inplace=True)

# Replace dataframe
fund = merged_fund

### Standardise end-of-quarter dates

In [None]:
# Generate standard quarter-end dates
standard_quarter_ends = pd.to_datetime([
 '2000-03-31', '2000-06-30', '2000-09-29', '2000-12-29', '2001-03-30', 
 '2001-06-29', '2001-09-28', '2001-12-31', '2002-03-29', '2002-06-28', 
 '2002-09-30', '2002-12-31', '2003-03-31', '2003-06-30', '2003-09-30', 
 '2003-12-31', '2004-03-31', '2004-06-30', '2004-09-30', '2004-12-31', 
 '2005-03-31', '2005-06-30', '2005-09-30', '2005-12-30', '2006-03-31', 
 '2006-06-30', '2006-09-29', '2006-12-29', '2007-03-30', '2007-06-29', 
 '2007-09-28', '2007-12-31', '2008-03-31', '2008-06-30', '2008-09-30', 
 '2008-12-31', '2009-03-31', '2009-06-30', '2009-09-30', '2009-12-31', 
 '2010-03-31', '2010-06-30', '2010-09-30', '2010-12-31', '2011-03-31', 
 '2011-06-30', '2011-09-30', '2011-12-30', '2012-03-30', '2012-06-29', 
 '2012-09-28', '2012-12-31', '2013-03-29', '2013-06-28', '2013-09-30', 
 '2013-12-31', '2014-03-31', '2014-06-30', '2014-09-30', '2014-12-31', 
 '2015-03-31', '2015-06-30', '2015-09-30', '2015-12-31', '2016-03-31', 
 '2016-06-30', '2016-09-30', '2016-12-30', '2017-03-31', '2017-06-30', 
 '2017-09-29', '2017-12-29', '2018-03-30', '2018-06-29', '2018-09-28', 
 '2018-12-31', '2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31', 
 '2020-03-31', '2020-06-30', '2020-09-30', '2020-12-31', '2021-03-31', 
 '2021-06-30', '2021-09-30', '2021-12-31', '2022-03-31', '2022-06-30', 
 '2022-09-30', '2022-12-30', '2023-03-31', '2023-06-30','2023-09-30',
 '2023-12-31', '2024-03-31', '2024-06-30'])

In [None]:
# Function to map to the nearest standard quarter end
def map_to_nearest_quarter(date):
    if date in standard_quarter_ends:
        return date
    else:
        nearest_quarter = min(standard_quarter_ends, key=lambda x: abs(x - date))
        return nearest_quarter

# Apply the function to the index
fund['date'] = fund.index.to_series().apply(map_to_nearest_quarter)

# Set index to standardised dates
fund.set_index('date', inplace=True)

In [None]:
# Filter the fund DataFrame to start from 2005-03-31
start_date = '2005-03-31'
fund = fund[fund.index >= start_date]

# Sort the index
fund = fund.sort_index()

In [None]:
# Save EM data with standardised dates
fund.to_csv('Stand_EM_fund.csv',index=True)

In [None]:
# Obtain sedol list
sedols = fund['sedol'].drop_duplicates()

sedols.to_csv('sedols.csv',index=False)

## Import and Sort Price Data

In [None]:
# Define the chunk size
chunk_size = 100000 

# Initialize an empty list to hold the chunks
chunks = []

# Define the data types for each column
dtype = {
    'datadate': str,
    'conm': str,
    'sedol': str,
    'curcdd': str,
    'prccd': float,
    'prchd': float,
    'prcld': float,
    'cshtrd': float,
    'cshoc': float
}

# Read the CSV file in chunks
for chunk in pd.read_csv('EM_daily_price.csv', chunksize=chunk_size, dtype=dtype, index_col='datadate'):
    chunks.append(chunk)

# Concatenate all chunks into a single DataFrame
df = pd.concat(chunks)

# Convert datadate to datetime
df.index = pd.to_datetime(df.index)

In [None]:
# Drop unwanted columns
price = df[['conm','sedol','prccd','prchd','prcld','cshtrd','curcdd','cshoc']]

In [None]:
# List of quarter end dates
quarter_end_dates = pd.to_datetime([
 '2000-03-31', '2000-06-30', '2000-09-29', '2000-12-29', '2001-03-30', '2001-06-29', '2001-09-28', '2001-12-31', '2002-03-29', '2002-06-28', 
 '2002-09-30', '2002-12-31', '2003-03-31', '2003-06-30', '2003-09-30', '2003-12-31', '2004-03-31', '2004-06-30', '2004-09-30', '2004-12-31', 
 '2005-03-31', '2005-06-30', '2005-09-30', '2005-12-30', '2006-03-31', '2006-06-30', '2006-09-29', '2006-12-29', '2007-03-30', '2007-06-29', 
 '2007-09-28', '2007-12-31', '2008-03-31', '2008-06-30', '2008-09-30', '2008-12-31', '2009-03-31', '2009-06-30', '2009-09-30', '2009-12-31', 
 '2010-03-31', '2010-06-30', '2010-09-30', '2010-12-31', '2011-03-31', '2011-06-30', '2011-09-30', '2011-12-30', '2012-03-30', '2012-06-29', 
 '2012-09-28', '2012-12-31', '2013-03-29', '2013-06-28', '2013-09-30', '2013-12-31', '2014-03-31', '2014-06-30', '2014-09-30', '2014-12-31', 
 '2015-03-31', '2015-06-30', '2015-09-30', '2015-12-31', '2016-03-31', '2016-06-30', '2016-09-30', '2016-12-30', '2017-03-31', '2017-06-30', 
 '2017-09-29', '2017-12-29', '2018-03-30', '2018-06-29', '2018-09-28', '2018-12-31', '2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31', 
 '2020-03-31', '2020-06-30', '2020-09-30', '2020-12-31', '2021-03-31', '2021-06-30', '2021-09-30', '2021-12-31', '2022-03-31', '2022-06-30', 
 '2022-09-30', '2022-12-30', '2023-03-31', '2023-06-30','2023-09-30','2023-12-31', '2024-03-31', '2024-06-30'])

In [None]:
# Adjust the quarter-end dates to the previous business day if they fall on a non-business day
business_days = pd.bdate_range(price.index.min(), price.index.max())
business_day_set = set(business_days)

adjusted_dates = {}
for date in quarter_end_dates:
    if date in business_day_set:
        adjusted_dates[date] = date
    else:
        # Adjust to the previous business day
        adjusted_date = date
        while adjusted_date not in business_day_set:
            adjusted_date -= pd.Timedelta(days=1)
            if adjusted_date < price.index.min():
                break
        if adjusted_date in business_day_set:
            adjusted_dates[date] = adjusted_date

# Filter the price data to include only the adjusted dates
adjusted_dates_values = list(adjusted_dates.values())
filtered_price = price[price.index.isin(adjusted_dates_values)].copy()

# Map the filtered dates back to the original quarter-end dates
date_mapping = {v: k for k, v in adjusted_dates.items()}
filtered_price['original_date'] = filtered_price.index.map(date_mapping)
filtered_price.set_index('original_date', inplace=True)

In [None]:
price = filtered_price.copy()

In [None]:
price.index.name = 'datadate'

In [None]:
price = price[['conm','sedol','prccd','prchd','prcld','cshtrd','curcdd','cshoc']]

### Convert Currency for Price Data

In [None]:
# Set index of forex data to datetime type
fx.index = pd.to_datetime(fx.index)
price.index = pd.to_datetime(price.index)

# Get unique lists of price data dates and forex dates
price_dates = price.index.unique().tolist()
fx_dates = fx.index.unique().tolist()

# Find the forex date closest (but not after) to each price data date
date_mapping = {d: max([i for i in fx_dates if i <= d]) for d in price_dates if len([i for i in fx_dates if i <= d]) > 0}

# Add a column to the price data for the mapped forex dates
price['fx_date'] = price.index.map(date_mapping)

# Ensure the correct column names for merging keys
fx['key'] = fx.index.astype(str) + fx['curd']
price['key'] = price['fx_date'].astype(str) + price['curcdd']

In [None]:
# Merge the forex rates with the price data
merged_price = pd.merge(price, fx, left_on='key', right_on='key', how='left')

# Drop rows where exchange rate isn't available
merged_price = merged_price[merged_price['exratd_toUSD'].notna()]

# Set exchange rate to 1 where currency is USD
merged_price.loc[merged_price['curcdd'] == 'USD', 'exratd_toUSD'] = 1

In [None]:
# Define the columns that need to be converted to USD
price_columns = ['prccd', 'prchd', 'prcld']

# Convert the price columns to USD using the exchange rate
for col in price_columns:
    merged_price[col] = merged_price[col] * merged_price['exratd_toUSD']

In [None]:
# Drop unwanted columns
merged_price = merged_price[['conm', 'sedol', 'curcdd', 'prccd', 'prchd', 'prcld', 'cshtrd','cshoc','fx_date']]

# Set index
merged_price.set_index('fx_date', inplace=True)

# Rename the index 
merged_price.index.name = 'datadate'

In [None]:
# Calculate Returns
merged_price['return'] = merged_price.groupby('conm')['prccd'].pct_change()

In [None]:
merged_price.to_csv('Quarterly_EM_price.csv',index=True)

## Merge the Datasets

In [None]:
# Load the OHLCV and Fundamental DataFrames
ohlcv_df = pd.read_csv('Quarterly_EM_price.csv')
fund_df = pd.read_csv('Stand_EM_fund.csv')

In [None]:
# Convert the date columns to datetime format
ohlcv_df['datadate'] = pd.to_datetime(ohlcv_df['datadate'])
fund_df['date'] = pd.to_datetime(fund_df['date'])

In [None]:
# Create a composite key in both DataFrames
ohlcv_df['merge_key'] = ohlcv_df['datadate'].astype(str) + '_' + ohlcv_df['conm'] + '_' + ohlcv_df['sedol']
fund_df['merge_key'] = fund_df['date'].astype(str) + '_' + fund_df['conm'] + '_' + fund_df['sedol']

# Set the merge keys as the index
ohlcv_df.set_index('merge_key', inplace=True)
fund_df.set_index('merge_key', inplace=True)

# Merge the DataFrames on the merge key
merged_df = pd.merge(ohlcv_df, fund_df, left_index=True, right_index=True, how='inner')

# Reset the index to make the merged DataFrame easier to work with
merged_df.reset_index(drop=True, inplace=True)

In [None]:
# Set index to standardised dates
merged_df.set_index('datadate', inplace=True)

In [None]:
# Drop unwanted columns from joined dataset
columns_to_keep = [
    'conm_x', 'sedol_x', 'curcdq', 'prccd', 'return', 'atq', 'apq', 'ltq', 'niq', 
    'oiadpq', 'revtq', 'dpq', 'gpq', 'invtq', 'rectq', 'saleq', 'teqq', 'xintq', 
    'ebitda', 'fcfq', 'fincfq', 'ivncfq', 'oancfq', 'lctq', 'actq', 'cogsq', 'roa', 'roe', 
    'roi', 'gpm', 'opm', 'npm', 'cr', 'qr', 'de', 'icr', 'atr', 'itr', 'og', 'rg', 'eag', 'ag', 'eqg', 
    'acr', 'cftnir', 'prchd', 'prcld', 'cshtrd','cshoc']

data = merged_df[columns_to_keep]

In [None]:
key_vars = ['conm_x', 'sedol_x', 'curcdq', 'actq', 'apq', 'atq',
    'cogsq', 'dpq', 'gpq', 'invtq', 'lctq', 'ltq', 'oiadpq', 'niq',
    'rectq', 'revtq', 'saleq', 'teqq', 'xintq',
    'fincfq', 'ivncfq', 'oancfq', 'prchd', 'prcld', 'prccd', 'cshtrd','cshoc','return']

# Drop rows where any of the key variables are NaN
filtered_data = data.dropna(subset=key_vars)

In [None]:
# Save data
filtered_data.to_csv('Data.csv', index=True)