In [146]:
import os
from glob import glob

In [160]:
html_sites = glob("html-sites/*.html")

In [161]:
html_sites

['html-sites/Amazon 906866.html', 'html-sites/Microsoft 870747.html']

In [162]:
html_site = html_sites[0]
print(html_site)

html-sites/Amazon 906866.html


In [163]:
from bs4 import BeautifulSoup

with open(html_site) as fp:
    soup = BeautifulSoup(fp, 'html.parser')



In [164]:
soup.title

<title>Amazon 906866</title>

In [165]:
headline_content = soup.find(class_='headline-content')


In [166]:
headline_classes = [
    "headline instrument-name",
    "instrument-type-name",
    "instrument-wkn",
    "instrument-isin",
]

basic_info = dict()

In [167]:
for headline in headline_classes:
    basic_info[headline] = headline_content.find(class_=headline).get_text()

In [168]:
basic_info


{'headline instrument-name': 'Amazon',
 'instrument-type-name': 'Aktie',
 'instrument-wkn': '906866',
 'instrument-isin': 'US0231351067'}

In [169]:
facts = dict()

In [170]:
facts_table = soup.find('div', {'class': 'sh-facts-list'})

In [171]:
rows = facts_table.find_all('tr')
for row in rows:
    key, value = [column.get_text() for column in row.find_all('td')]
    facts[key] = value

In [172]:
facts

{'Branche': 'Einzelhandel',
 'Land': 'USA',
 'Mitarbeiter': '1.608.000',
 'Börsenwert': '58,1 Mrd. EUR (Groß)',
 'Sparplanfähig': 'Ja'}

In [175]:
description = {'description': soup.find('div', 'description').get_text()}


In [176]:
description

{'description': 'Amazon.com ist ein international führender \nOnline-Versandhändler. Bei Amazon finden Käufer neue und gebrauchte \nWaren aus beinahe allen Bereichen der Konsumgüterindustrie. Bücher, \nMusik, DVDs, Elektronikprodukte, Fotos, Digitale Downloads, Software, \nPC- und  Videospiele werden ebenso angeboten wie Geräte für Küche, Haus \nund Garten, Spielwaren, Sport- und Freizeitartikel, Schuhe und Schmuck, \n...'}

In [177]:
share_trends = soup.find('div', {'class':'sh-share-trends'})

In [178]:

trends = [trend.get_text() for trend in share_trends.find_all('a', {'class': 'sh-button-default'})]



In [179]:
trends

['Big Data',
 'Bücher',
 'China',
 'Cloud Computing',
 'Consumer-Electronics',
 'Data Science',
 'Deep Learning',
 'E-Commerce China',
 'Haushaltsroboter',
 'Home Office',
 'Internet-Titans',
 'IT-Giganten',
 'Machine Learning',
 'Musik',
 'Nasdaq 100',
 'Online-Handel - Portale',
 'Online-Musik',
 'Post/ Paketdienste',
 'Quantencomputer',
 'Robotik',
 'S&P 500',
 'Technikgetriebene Trends',
 'Technologie-Portfolios',
 'Video-Portale']

In [180]:
share_affiliated_indexes = [index.get_text() for index in soup.find('div', {'class':'sh-share-affiliated-indexes'}).find_all('a', {'class':'sh-button-default'})]

In [181]:
share_affiliated_indexes

['NASDAQ 100', 'S&P 500']