In [441]:
import investpy
import pandas as pd

## Fetching and formatting the data
Below you will find the *investpy* library in use, which allows us to find ETF stocks based on their ISIN number

In [520]:
def get_recent_data_from_isins(isins):
    df = pd.DataFrame()

    for isin in isins:
        search_results = investpy.search_quotes(text=isin, countries=['germany'])
        data = search_results[0].retrieve_historical_data(from_date='01/01/2010', to_date='11/08/2020')
        data['Name'] = search_results[0].name
        data['ISIN'] = isin
        data['30-day-pct-change'] = data['Close'].pct_change(periods=30, limit=1).fillna(0)
        df = pd.concat([df, data])
    df.sort_index(inplace=True)
    return df

In [521]:
isins = [
    'LU1681043599', 
    'IE00B86MWN23', 
    'LU0533033667', 
    'LU1650490474',
    ]

etf_dataset = get_recent_data_from_isins(isins)

## What do the results look like?
Here we can see that our data has succesfully loaded, now we can get to analyzing!

In [522]:
etf_dataset

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Name,ISIN,30-day-pct-change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-09-30,63.420,63.42,63.120,63.120,40,Lyxor MSCI World Information Technology TR UCI...,LU0533033667,0.000000
2010-10-01,62.760,62.76,62.760,62.760,0,Lyxor MSCI World Information Technology TR UCI...,LU0533033667,0.000000
2010-10-04,62.200,62.20,62.200,62.200,0,Lyxor MSCI World Information Technology TR UCI...,LU0533033667,0.000000
2010-10-06,62.510,62.51,62.510,62.510,0,Lyxor MSCI World Information Technology TR UCI...,LU0533033667,0.000000
2010-10-07,62.450,62.45,62.450,62.450,0,Lyxor MSCI World Information Technology TR UCI...,LU0533033667,0.000000
...,...,...,...,...,...,...,...,...
2020-08-10,195.670,196.18,195.670,196.120,380,Lyxor UCITS EuroMTS All-Maturity Investment Gr...,LU1650490474,0.014169
2020-08-11,44.785,45.15,44.785,44.865,10828,iShares Edge MSCI Europe Min Volatility UCITS,IE00B86MWN23,0.017924
2020-08-11,302.300,303.19,301.630,302.440,3534,Amundi Index Solutions - Amundi MSCI World UCI...,LU1681043599,0.040063
2020-08-11,370.600,371.15,364.850,367.550,4684,Lyxor MSCI World Information Technology TR UCI...,LU0533033667,0.038717


## Visualizing the results
Using *Bokeh* we can create interactive diagrams which show us how the ETF prices have developed over our given time.

In [523]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource, NumeralTickFormatter

In [524]:
output_notebook()

In [525]:
# Color generator for linechart, apparently not built into Bokeh
from bokeh.palettes import Category10
import itertools

def color_gen():
    yield from itertools.cycle(Category10[10])
color = color_gen()

## What is the overall development of the stock price?
Below you will find line charts depicting the overall stock price development over time. This could in the future be replaced by a candlestick visualization for bonus points. However a line chart slims down the visual a bit so we can better compare the stocks.

In [450]:
p2 = figure(plot_width=800, plot_height=400, x_axis_type='datetime')

for isin in isins:
    p2.line('Date', 'Close', source=etf_dataset[etf_dataset['ISIN'] == isin], legend_group='ISIN', color=next(color), line_width=2)

p2.legend.location = 'bottom_left'

In [451]:
show(p2)

## What was the monthly percentage change?
We can also calculate the percentage change for each day, with a 30 day window before it. This gives us an idea of the volatility of the stock.

In [526]:
p = figure(plot_width=800, plot_height=400, x_axis_type='datetime')

for isin in isins:
    p.line('Date', '30-day-pct-change', source=etf_dataset[etf_dataset['ISIN'] == isin], legend_group='ISIN', color=next(color), line_width=2)

p.legend.location = 'bottom_left'
p.yaxis.formatter = NumeralTickFormatter(format='0 %')

In [527]:
show(p)

## That's a lot of data!
Looking at it on a daily granularity might be a bit too much. Let's resample the dataset to a yearly basis to have an easier visual to go on; most ETF decisions are made over a longer-term span anyways, so this should be more suitable.

In [530]:
yearly_aggregate = etf_dataset.groupby(['ISIN']).resample('A').mean().reset_index()
yearly_aggregate['pct-change'] = yearly_aggregate['Close'].pct_change(periods=1, limit=1).fillna(0)
yearly_aggregate['value-change'] = yearly_aggregate['Close'].diff(periods=1).fillna(0)
yearly_aggregate = yearly_aggregate.sort_values('Date')
yearly_aggregate = yearly_aggregate[yearly_aggregate['Date'] > '2011-01-01']

In [531]:
p4 = figure(plot_width=800, plot_height=400, x_axis_type='datetime')

for isin in isins:
    p4.line('Date', 'pct-change', source=yearly_aggregate[yearly_aggregate['ISIN'] == isin], legend_group='ISIN', color=next(color), line_width=2)

p4.yaxis.formatter = NumeralTickFormatter(format='0 %')
p4.legend.location = 'bottom_left'
show(p4)

## What type of ETFs are we looking at?
Below you will find an example of an API called based on an ISIN number to show various info, including the spread across sectors (could also include countries/markets).

In [465]:
import httpx

descriptions = []

for isin in isins:
    r = httpx.get(f'https://api.etf-data.com/product/{isin}').json()
    descriptions.append({
        'name': r['name'],
        'totalFee': r['totalFee'],
        'distributionType': r['distributionType'],
        'baseCurrency': r['baseCurrency'],
        'sectors': r['sectors']
    })

descriptions_df = pd.DataFrame(descriptions)


Unnamed: 0,name,totalFee,distributionType,baseCurrency,sectors
0,Amundi MSCI World UCITS ETF - EUR (C),0.38,ACCUMULATING,EUR,"[{'sector': 'INFORMATION_TECHNOLOGY', 'percent..."
1,iShares Edge MSCI Europe Minimum Volatility UC...,0.25,ACCUMULATING,EUR,"[{'sector': 'CONSUMER_STAPLES', 'percentage': ..."
2,Lyxor MSCI World Information Technology TR UCI...,0.3,ACCUMULATING,EUR,"[{'sector': 'INFORMATION_TECHNOLOGY', 'percent..."
3,Lyxor Euro Government Bond (DR) UCITS ETF - Acc,0.17,ACCUMULATING,EUR,"[{'sector': 'GOVERNMENT', 'percentage': 100.0}]"


In [514]:
from bokeh.palettes import Category20
from bokeh.transform import cumsum
from math import pi

for row in descriptions_df.itertuples():
    name = row.name
    sectors = {}
    for sector in row.sectors:
        sectors[sector['sector']] = sector['percentage']
        
    data = pd.Series(sectors).reset_index(name='value').rename(columns={'index':'sector'})
    data['angle'] = data['value']/data['value'].sum() * 2*pi
    # Todo: fix this silly color thing because it breaks when there's less than 3
    data['color'] = Category20[len(sectors)] if len(sectors) > 2 else 'blue'
    
    p = figure(plot_height=450, plot_width=800, title=f'Sectors: {name}', toolbar_location=None,
        tools="hover", tooltips="@sector: @value")
    
    p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_group='sector', source=data)
    p.legend.location = "top_left"
    p.legend.label_text_font_size = "6pt"
    show(p)
    print(data[['sector', 'value']])

                    sector  value
0   INFORMATION_TECHNOLOGY  21.95
1              HEALTH_CARE  13.42
2   CONSUMER_DISCRETIONARY  12.05
3               FINANCIALS  11.93
4              INDUSTRIALS  10.34
5   COMMUNICATION_SERVICES   9.21
6         CONSUMER_STAPLES   8.10
7                MATERIALS   4.43
8                UTILITIES   3.40
9              REAL_ESTATE   2.79
10                  ENERGY   2.38


                    sector  value
0         CONSUMER_STAPLES  19.49
1              HEALTH_CARE  15.85
2              INDUSTRIALS  13.01
3               FINANCIALS  12.86
4                UTILITIES  10.01
5   COMMUNICATION_SERVICES   8.53
6                MATERIALS   7.24
7   CONSUMER_DISCRETIONARY   5.04
8              REAL_ESTATE   4.26
9   INFORMATION_TECHNOLOGY   2.35
10                  ENERGY   0.69


                   sector  value
0  INFORMATION_TECHNOLOGY  99.74
1  CONSUMER_DISCRETIONARY   0.14
2             INDUSTRIALS   0.12


       sector  value
0  GOVERNMENT  100.0
