# Prices 

Get price data from specified source. Expect to get OHLCV. 

## Sources

Current data source: YahooFinance

Considered future data source: IEXCloud


## Problems

Symbols are not always easy to determine. For old or smaller ETF-products, price data for certain symbols is not available at all.
This is a major data source problem.

Change from YahooFinance to IEXCloud or other paid data provider under consideration.


In [1]:
# General
import pandas as pd
from datetime import datetime
import random
import time
from dateutil.relativedelta import relativedelta

# File download
import yfinance as yf 
from tqdm.notebook import tqdm, trange

# Custom modules
from Tools import S3

# Hidden configurations
from mySecrets import config_file, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_BUCKET


In [9]:
# Connect to AWS S3 storage
s3 = S3()
s3.connect(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

Connected.


In [3]:
# Read config table for (ISIN / Provider) mapping
config = pd.read_excel(config_file, sheet_name = "ETF_Overview", header = 1)

# Select relevant columns
config = config[['Security_ISIN', 'Sym_YahooFin', 'Ex_YahooFin', 'CCY_YahooFin']]

# Drop columns without symbols
config = config.dropna(subset = ['Sym_YahooFin', 'CCY_YahooFin'])

config.head()

Unnamed: 0,Security_ISIN,Sym_YahooFin,Ex_YahooFin,CCY_YahooFin
4,LU0836512888,B92C.MU,MUN,EUR
12,IE00B8KGV557,EUNZ.DE,ETR,EUR
13,IE00B86MWN23,EUN0.DE,ETR,EUR
14,IE00B8FHGS14,IQQ0.DE,ETR,EUR
15,IE00B6SPMN59,IBCK.DE,ETR,EUR


In [4]:
# Define range for price data
date_start = '2014-01-01'
date_end = datetime.today()
# date_start = date_end + relativedelta(months = -12)

In [5]:
# Define list of symbols for batch download
symbols = config['Sym_YahooFin']
symbols = ' '.join(symbols)
symbols[:50]

'B92C.MU EUNZ.DE EUN0.DE IQQ0.DE IBCK.DE ISDE.L ISU'

In [6]:
# Get price data
data = yf.download(tickers = symbols, interval = "1d", start = date_start, end = date_end, group_by = 'ticker')


[*********************100%***********************]  1040 of 1040 completed

43 Failed downloads:
- IE00BNC1G707.SG: No data found for this date range, symbol may be delisted
- IE00BNC1G699.SG: No data found for this date range, symbol may be delisted
- LU1834983550.SG: No data found for this date range, symbol may be delisted
- IE00BGV5VN51.SG: No data found for this date range, symbol may be delisted
- FR0010342592.SG: No data found for this date range, symbol may be delisted
- LU1407890620.SG: No data found for this date range, symbol may be delisted
- ESTE.F: No data found for this date range, symbol may be delisted
- LU2082997516.SG: No data found for this date range, symbol may be delisted
- GPAB.L: No data found for this date range, symbol may be delisted
- EB3M.F: No data found for this date range, symbol may be delisted
- 0XCK.IL: No data found for this date range, symbol may be delisted
- LU0908501488.SG: No data found for this date range, symbol may be delisted
- XMBR.L: No d

In [7]:
# Check which symbols failed
df = data.stack(level = 0).rename_axis(['Date', 'Symbol']).reset_index(level = 1)

# Resetindex to make date-index a column that can be stored in files
df = df.reset_index()

# Sort columns and keep relevant ones, for now
df = df[['Symbol', 'Date', 'Adj Close', 'Open', 'High', 'Low', 'Close', 'Volume']]

# Separate by symbols
# PL 10.01.2022 -> group by Symbols
grouped = df.groupby(df['Symbol'])

In [10]:
# Upload file to s3 storage

date = datetime.now().strftime('%Y_%m_%d')
print(date)

# store as tsv
for g in grouped.groups.keys():

    temp = grouped.get_group(g)

    s3.uploadFile(
        df = temp, 
        name = g,
        dir = 'data/price_raw/' + date + "/",
        bucket = "pomato"
    )    

2022_04_15


## Example prices - compare across exchanges

Select single stock and compare prices on different exchanges

Selection: **Apple Inc.**

For the analysis, have a look at:  ***_B012_analyzeData_Prices.ipynb***

This query is helpful to find the symbols:
https://query2.finance.yahoo.com/v1/finance/search?q=Apple&quotesCount=20&newsCount=0

In [4]:
df_sym_sel = [
    # North America
    # ~ USA
    ['AAPL', 'USD', 'NasdaqGS'],

    # ~ Canada
    ['AAPL.NE', 'CAD', 'NEO'],

    # ~ Mexico
    ['AAPL.MX', 'MXN', 'Mexico'],

    # Latin America
    # ~ Argentina
    ['AAPL.BA', 'ARS', 'Buenos Aires'],
    ['AAPLD.BA', 'USD', 'Buenos Aires'],

    # ~ Chile
    ['AAPL.SN', 'USD', 'Santiago'],
    ['AAPLLCL.SN', 'CLP', 'Santiago'],

    # ~ Brazil
    ['AAPL34.SA', 'BRL', 'Sao Paulo'],

    # European Exchanges
    # ~ Germany
    ['APC.DE', 'EUR', 'Xetra'],
    ['APC.F', 'EUR', 'Frankfurt'],
    ['APC.DU', 'EUR', 'Dusseldorf'],
    ['APC.BE', 'EUR', 'Berlin'],
    ['APC.HM', 'EUR', 'Hamburg'],
    ['APC.HA', 'EUR', 'Hanover'],
    ['APC.MU', 'EUR', 'Munich'],
    ['APC.SG', 'EUR', 'Stuttgart'],

    # ~ Austria
    ['AAPL.VI', 'EUR', 'Vienna'],

    # ~ Italy
    ['AAPL.MI', 'EUR', 'Milan'],

    # ~ UK
    ['0R2V.IL', 'USD', 'IOB']


]

df_sym_sel = pd.DataFrame(df_sym_sel, columns=['Symbol', 'CCY', 'Exchange'])
df_sym_sel.head()

Unnamed: 0,Symbol,CCY,Exchange
0,AAPL,USD,NasdaqGS
1,AAPL.NE,CAD,NEO
2,AAPL.MX,MXN,Mexico
3,AAPL.BA,ARS,Buenos Aires
4,AAPLD.BA,USD,Buenos Aires


In [8]:
date_start = '2021-01-01'
date_end = datetime.today()

symbols = df_sym_sel['Symbol']
symbols = ' '.join(symbols)

data = yf.download(tickers = symbols, interval = "1d", start = date_start, end = date_end, group_by = 'ticker')

[*********************100%***********************]  19 of 19 completed

1 Failed download:
- AAPLLCL.SN: No data found, symbol may be delisted


In [9]:
# Check which symbols failed
df = data.stack(level = 0).rename_axis(['Date', 'Symbol']).reset_index(level = 1)

# Resetindex to make date-index a column that can be stored in files
df = df.reset_index()

# Sort columns and keep relevant ones, for now
df = df[['Symbol', 'Date', 'Adj Close', 'Open', 'High', 'Low', 'Close', 'Volume']]

# Separate by symbols
# PL 10.01.2022 -> group by Symbols
grouped = df.groupby(df['Symbol'])

In [10]:
# Upload file to s3 storage

date = datetime.now().strftime('%Y_%m_%d')

# store as tsv
for g in grouped.groups.keys():

    temp = grouped.get_group(g)

    s3.uploadFile(
        df = temp, 
        name = g,
        dir = 'data/price_raw/' + date + "/",
        bucket = AWS_BUCKET
    )    