In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import yfinance as yf

## Web Scraping from Wikipedia - Companies in the DAX as of 22 September 2025

In [2]:
url = "https://de.wikipedia.org/wiki/DAX"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
response = requests.get(url, headers=headers)
print(response)
soup = BeautifulSoup(response.content, "html.parser")

<Response [200]>


In [3]:
companies = []
tickers = []
industries = []
for ind, element in enumerate(soup.select("tbody tr td")[21:340]):
    if ind % 8 == 0:
        companies.append(element.get_text().strip())
    if ind % 8 == 1:
        tickers.append(element.get_text())
    if ind % 8 == 2:
        industries.append(element.get_text())

In [4]:
DAX = pd.DataFrame({
    "company":companies,
    "ticker":tickers,
    "industry":industries
})

In [5]:
# Add termination to tickers so they are readable by Yahoo Finance API

exception = {
    "Airbus": "AIR.PA"   # only non-DE listing
}

DAX["ticker_yahoo"] = DAX["ticker"].apply(lambda t: t + ".DE")

# Apply only real exceptions
DAX["ticker_yahoo"] = DAX.apply(
    lambda x: exception.get(x["company"], x["ticker_yahoo"]),
    axis=1
)

In [6]:
DAX

Unnamed: 0,company,ticker,industry,ticker_yahoo
0,Adidas,ADS,Sportartikel,ADS.DE
1,Airbus,AIR,"Luftfahrt, Raumfahrt, Rüstung",AIR.PA
2,Allianz,ALV,Versicherungen,ALV.DE
3,BASF,BAS,Chemie,BAS.DE
4,Bayer,BAYN,"Chemie, Pharma",BAYN.DE
5,Beiersdorf,BEI,Konsumgüter (Produktion),BEI.DE
6,BMW,BMW,Automobil (Produktion),BMW.DE
7,Brenntag,BNR,Chemie (Handel),BNR.DE
8,Commerzbank,CBK,Banken,CBK.DE
9,Continental,CON,Automobil (Zulieferer),CON.DE


## Yahoo! Finance's - Import close prices of DAX companies for the last 2 years with yfinance library

In [7]:
tickers_yahoo = list(DAX['ticker_yahoo'])

# Pull data for 2 years
data = yf.download(tickers_yahoo, start="2023-10-01", end="2025-09-30", interval="1d", group_by='ticker')

# Example: extract 'Close' prices into a single DataFrame
close_prices = pd.DataFrame({t: data[t]['Close'] for t in tickers_yahoo})
close_prices.head()

  data = yf.download(tickers_yahoo, start="2023-10-01", end="2025-09-30", interval="1d", group_by='ticker')
[*********************100%***********************]  40 of 40 completed


Unnamed: 0_level_0,ADS.DE,AIR.PA,ALV.DE,BAS.DE,BAYN.DE,BEI.DE,BMW.DE,BNR.DE,CBK.DE,CON.DE,...,RWE.DE,SAP.DE,G24.DE,SIE.DE,ENR.DE,SHL.DE,SY1.DE,VOW3.DE,VNA.DE,ZAL.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-10-02,162.601059,120.697922,202.67897,37.115082,44.471085,120.034698,85.925385,68.103165,9.99159,62.552277,...,31.951027,120.238358,62.604763,127.729805,12.135,46.397182,87.446121,94.590225,21.191998,21.610001
2023-10-03,158.768173,119.505936,201.086288,36.999901,43.494728,119.099625,85.148071,67.336906,9.725528,61.324299,...,30.721416,119.6903,62.254692,126.456696,11.735,46.078667,87.17231,92.941399,20.504789,20.459999
2023-10-04,160.151184,118.756149,199.903168,37.079643,43.876347,118.262978,85.237411,66.34639,9.644759,60.877766,...,30.787123,119.396698,63.129879,126.87471,11.605,46.735001,88.893456,93.340584,20.439783,20.190001
2023-10-05,154.263565,118.736916,200.858765,36.63221,43.484818,119.247269,84.245659,66.533272,9.692269,60.542862,...,31.012396,119.533714,63.052082,125.6586,11.575,46.078667,89.304192,92.420723,20.384066,20.459999
2023-10-06,157.306152,119.467484,204.135132,37.053066,43.688019,120.625282,85.514389,67.430359,9.849057,61.677814,...,30.721416,120.923416,64.141197,127.93882,11.455,45.789108,87.367889,92.663704,20.560509,21.780001


In [8]:
close_prices.isnull().sum()

ADS.DE     2
AIR.PA     0
ALV.DE     2
BAS.DE     2
BAYN.DE    2
BEI.DE     2
BMW.DE     2
BNR.DE     2
CBK.DE     2
CON.DE     2
DTG.DE     2
DBK.DE     2
DB1.DE     2
DHL.DE     4
DTE.DE     2
EOAN.DE    2
FRE.DE     2
FME.DE     2
G1A.DE     2
HNR1.DE    2
HEI.DE     2
HEN3.DE    2
IFX.DE     2
MBG.DE     2
MRK.DE     2
MTX.DE     2
MUV2.DE    2
PAH3.DE    2
QIA.DE     2
RHM.DE     2
RWE.DE     2
SAP.DE     2
G24.DE     2
SIE.DE     2
ENR.DE     2
SHL.DE     2
SY1.DE     2
VOW3.DE    2
VNA.DE     2
ZAL.DE     2
dtype: int64

In [9]:
close_prices.dropna(inplace=True)

In [10]:
returns = close_prices.pct_change(fill_method=None) * 100
returns.dropna(inplace=True)

In [11]:
returns.head()

Unnamed: 0_level_0,ADS.DE,AIR.PA,ALV.DE,BAS.DE,BAYN.DE,BEI.DE,BMW.DE,BNR.DE,CBK.DE,CON.DE,...,RWE.DE,SAP.DE,G24.DE,SIE.DE,ENR.DE,SHL.DE,SY1.DE,VOW3.DE,VNA.DE,ZAL.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-10-03,-2.357233,-0.987578,-0.785815,-0.310335,-2.195486,-0.779003,-0.904637,-1.125143,-2.662867,-1.963122,...,-3.848422,-0.455809,-0.559176,-0.996721,-3.296255,-0.686498,-0.31312,-1.743126,-3.242772,-5.321617
2023-10-04,0.871088,-0.627405,-0.588365,0.215521,0.87739,-0.702477,0.104923,-1.470986,-0.83048,-0.728151,...,0.213878,-0.245301,1.405817,0.330559,-1.107798,1.424377,1.974418,0.429502,-0.31703,-1.319641
2023-10-05,-3.676288,-0.016196,0.47803,-1.206682,-0.892347,0.83229,-1.163518,0.281676,0.492601,-0.550125,...,0.731712,0.114757,-0.123233,-0.958513,-0.258507,-1.404373,0.462054,-0.985489,-0.272593,1.337288
2023-10-06,1.97233,0.615283,1.63118,1.14887,0.467293,1.155593,1.505989,1.348329,1.617659,1.874627,...,-0.938268,1.162602,1.727326,1.814615,-1.036716,-0.6284,-2.16821,0.262907,0.865593,6.451621
2023-10-10,6.669178,1.062122,0.42354,1.135811,1.55416,-0.489602,1.891129,0.1663,0.578873,0.060321,...,4.70516,0.485594,-0.454817,1.604042,4.626798,1.918208,2.798292,1.254922,2.34869,3.994485


## Export data

In [None]:
DAX.to_csv("data/DAX.csv")

In [None]:
close_prices.to_csv("data/close_prices.csv")

In [None]:
returns.to_csv("data/returns.csv")