In [94]:
import requests
import pandas as pd
from api_keys import eodhd_api_key as API_KEY

### Data Downloads

#### Get exchanges list

In [9]:
# should return all data with one request
url = f"https://eodhd.com/api/exchanges-list/?api_token={API_KEY}&fmt=json"
exchanges_data = requests.get(url).json()
exchanges_df = pd.DataFrame(exchanges_data)
# save
exchanges_df.to_csv("eodhd_exchanges.csv", index=False, sep=";")

#### Get names and tickers (raw)

##### US Stocks, ETFS, etc.

In [24]:
# https://eodhd.com/financial-apis/exchanges-api-list-of-tickers-and-trading-hours#Get_List_of_Tickers_Exchange_Symbols

exchange_code = "US"
# get active tickers (still listed as of a month ago)
url = f"https://eodhd.com/api/exchange-symbol-list/{exchange_code}?api_token={API_KEY}&fmt=json"
active_tickers = requests.get(url).json()
# get inactive tickers
url = f"https://eodhd.com/api/exchange-symbol-list/{exchange_code}?delisted=1&api_token={API_KEY}&fmt=json"
inactive_tickers = requests.get(url).json()

active_tickers_df = pd.DataFrame(active_tickers)
inactive_tickers_df = pd.DataFrame(inactive_tickers)

print(f"Retrieved {len(active_tickers_df)} active tickers and {len(inactive_tickers_df)} inactive tickers")

In [32]:
# add "delisted_as_of_may_2024" column
delisted_colname = "delisted_as_of_may_2024"
active_tickers_df[delisted_colname] = False
inactive_tickers_df[delisted_colname] = True

# merge 
all_tickers_df = pd.concat([active_tickers_df, inactive_tickers_df])

# save full dataframe
all_tickers_df.to_csv("raw/nat/eodhd_US_names_and_tickers.csv", index=False, sep=";")

##### Crypto

In [96]:
exchange_code = "CC"
# active 
url = f'https://eodhd.com/api/exchange-symbol-list/{exchange_code}?delisted=0&api_token={API_KEY}&fmt=json'
active_tickers = requests.get(url).json()
# inactive
url = f'https://eodhd.com/api/exchange-symbol-list/{exchange_code}?delisted=1&api_token={API_KEY}&fmt=json'
inactive_tickers = requests.get(url).json()

active_tickers_df = pd.DataFrame(active_tickers)
inactive_tickers_df = pd.DataFrame(inactive_tickers)

In [107]:
# add "delisted_as_of_may_2024" column
delisted_colname = "delisted_as_of_may_2024"
active_tickers_df[delisted_colname] = False
inactive_tickers_df[delisted_colname] = True

# merge 
all_tickers_df = pd.concat([active_tickers_df, inactive_tickers_df])

# save full dataframe
all_tickers_df.to_csv("raw/nat/eodhd_cryptos_names_and_tickers.csv", index=False, sep=";")

##### Commodities


In [None]:
# manually created excel sheet (mix of futures, etfs and equities selected to represent commodities)

### Data Processing and Cleaning

The full list of eodhd US tickers includes many which are not relevant to us. We create and save a filtered version of the dataset and separate stocks and etf data.

##### US stocks dataset

- create indicator column showing S&P 500 membership (May 2024)
- filter for "Common Stock" in Type column (to exclude preferred shares etc.)
- filter for "NYSE" and "NASDAQ" in Exchange column (only major exchanges, exclude OTC, Pink Sheets etc.)

##### US ETFs dataset

- filter for "ETF" in Type column
- filter for "NYSE", "NASDAQ" and "NYSE ARCA" in Exchange column (many ETFs are traded on NYSE ARCA)

In [68]:
# load full dataset
df = pd.read_csv("raw/nat/eodhd_US_names_and_tickers.csv", sep=";")
# remove rows with one of the following columns missing: Name, Code, Type, Exchange (should not be many)
n_before = len(df)
df = df.dropna(subset=["Name", "Code", "Type", "Exchange"])
print(f"Removed {n_before - len(df)} rows with missing Name, Code, Type or Exchange")

Removed 384 rows with missing Name, Code, Type or Exchange


In [69]:
# load sp500 constituents list
sp500_tickers = pd.read_excel("raw/nat/SP500_constituents_may_2024.xlsx")["Symbol"]
sp500_tickers = sp500_tickers.str.replace(".", "-")
# add indicator column
df["in_sp500_as_of_may_2024"] = df["Code"].isin(sp500_tickers)

# note: we leave both listed and delisted stocks included here
stocks_df = df[(df["Type"] == "Common Stock") & 
                (df["Exchange"].isin(["NYSE", "NASDAQ"]))]
print(f"Retained {len(stocks_df)} stocks")

# sort
stocks_df = stocks_df.sort_values(by=["in_sp500_as_of_may_2024", "delisted_as_of_may_2024"], ascending=[False, True])
# save
stocks_df.to_csv("names_and_tickers/eodhd_stocks.csv", index=False, sep=";")

Retained 20518 stocks


In [70]:
# note: we leave both listed and delisted etfs included here
etfs_df = df[(df["Type"] == "ETF") &
                (df["Exchange"].isin(["NYSE", "NASDAQ", "NYSE ARCA"]))]
print(f"Retained {len(etfs_df)} etfs")

# sort
etfs_df = etfs_df.sort_values(by=["delisted_as_of_may_2024"], ascending=[True])
# save
etfs_df.to_csv("names_and_tickers/eodhd_etfs.csv", index=False, sep=";")

Retained 4057 etfs


##### Crypto dataset

- create "Code_clean" column with standalone ticker name (e.g. "BTC" for "BTC-USD")
- create indicator column showing membership of top 200 market cap list (coinmarketcap.com snapshot, Dec 2022)

In [54]:
# load eodhd crypto data and cmc top200 data
import pandas as pd
import re
crypto_df = pd.read_csv("raw/nat/eodhd_cryptos_names_and_tickers.csv", sep=";")
top200_df = pd.read_excel("raw/nat/top200_cryptos_dec_2022.xlsx")

# clean eodhd crypto tickers
if "Code_clean" not in crypto_df.columns:
    crypto_df["Code_clean"] = crypto_df["Code"].apply(lambda x: re.sub(r"-USD$", "", x))

# get top200 column
if "in_top200_as_of_dec_2022" not in crypto_df.columns:
    crypto_df["in_top200_as_of_dec_2022"] = crypto_df["Code_clean"].isin(top200_df["Symbol"])
    print(f"Matched {crypto_df['in_top200_as_of_dec_2022'].sum()} tickers to the top200 list")

# order 
crypto_df = crypto_df.sort_values(["in_top200_as_of_dec_2022", "delisted_as_of_may_2024"], ascending=[False, True])
# save
crypto_df.to_csv("names_and_tickers/eodhd_cryptos.csv", index=False, sep=";")

Matched 195 tickers to the top200 list


##### Commodities dataset

- load from excel file and save to csv

In [2]:
# load raw commodities data
commodities_df = pd.read_excel("raw/nat/commodities_names_and_tickers.xlsx", sheet_name="commodities")

# save to csv
commodities_df.to_csv("names_and_tickers/yahoo_eodhd_commodities.csv", index=False, sep=";")